Merge branch 'dev/fst-followup' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Chris Mason <clm@fb.com>

Wed, 27 Jan 2016 13:48:23 +0000 (05:48 -0800)

committer Chris Mason <clm@fb.com>

Wed, 27 Jan 2016 13:48:23 +0000 (05:48 -0800)
author Chris Mason <clm@fb.com>
Wed, 27 Jan 2016 13:48:23 +0000 (05:48 -0800)
committer Chris Mason <clm@fb.com>
Wed, 27 Jan 2016 13:48:23 +0000 (05:48 -0800)
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c

index 88d9af3..5fb60ea 100644 (file)
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -328,8 +328,8 @@ static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq,
                 list_add_tail(&work->ordered_list, &wq->ordered_list);
                 spin_unlock_irqrestore(&wq->list_lock, flags);
         }
-       queue_work(wq->normal_wq, &work->normal_work);
         trace_btrfs_work_queued(work);
+       queue_work(wq->normal_wq, &work->normal_work);
  }
  
  void btrfs_queue_work(struct btrfs_workqueue *wq,
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c

index 08405a3..b90cd37 100644 (file)
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -560,13 +560,13 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
   */
  static void __merge_refs(struct list_head *head, int mode)
  {
-       struct __prelim_ref *ref1;
+       struct __prelim_ref *pos1;
  
-       list_for_each_entry(ref1, head, list) {
-               struct __prelim_ref *ref2 = ref1, *tmp;
+       list_for_each_entry(pos1, head, list) {
+               struct __prelim_ref *pos2 = pos1, *tmp;
  
-               list_for_each_entry_safe_continue(ref2, tmp, head, list) {
-                       struct __prelim_ref *xchg;
+               list_for_each_entry_safe_continue(pos2, tmp, head, list) {
+                       struct __prelim_ref *xchg, *ref1 = pos1, *ref2 = pos2;
                         struct extent_inode_elem *eie;
  
                         if (!ref_for_same_block(ref1, ref2))
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h

index c5f40dc..a949664 100644 (file)
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1614,7 +1614,7 @@ struct btrfs_fs_info {
  
         spinlock_t delayed_iput_lock;
         struct list_head delayed_iputs;
-       struct rw_semaphore delayed_iput_sem;
+       struct mutex cleaner_delayed_iput_mutex;
  
         /* this protects tree_mod_seq_list */
         spinlock_t tree_mod_seq_lock;
@@ -3641,6 +3641,7 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
  int __get_raid_index(u64 flags);
  int btrfs_start_write_no_snapshoting(struct btrfs_root *root);
  void btrfs_end_write_no_snapshoting(struct btrfs_root *root);
+void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
  void check_system_chunk(struct btrfs_trans_handle *trans,
                         struct btrfs_root *root,
                         const u64 type);
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c

index 1e668fb..cbb7dbf 100644 (file)
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -614,7 +614,7 @@ static void btrfs_dev_replace_update_device_in_mapping_tree(
                 em = lookup_extent_mapping(em_tree, start, (u64)-1);
                 if (!em)
                         break;
-               map = (struct map_lookup *)em->bdev;
+               map = em->map_lookup;
                 for (i = 0; i < map->num_stripes; i++)
                         if (srcdev == map->stripes[i].dev)
                                 map->stripes[i].dev = tgtdev;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c

index ca78bf2..50bed6c 100644 (file)
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -55,6 +55,12 @@
  #include <asm/cpufeature.h>
  #endif
  
+#define BTRFS_SUPER_FLAG_SUPP  (BTRFS_HEADER_FLAG_WRITTEN |\
+                                BTRFS_HEADER_FLAG_RELOC |\
+                                BTRFS_SUPER_FLAG_ERROR |\
+                                BTRFS_SUPER_FLAG_SEEDING |\
+                                BTRFS_SUPER_FLAG_METADUMP)
+
  static const struct extent_io_ops btree_extent_io_ops;
  static void end_workqueue_fn(struct btrfs_work *work);
  static void free_fs_root(struct btrfs_root *root);
@@ -1584,8 +1590,23 @@ int btrfs_init_fs_root(struct btrfs_root *root)
         ret = get_anon_bdev(&root->anon_dev);
         if (ret)
                 goto free_writers;
+
+       mutex_lock(&root->objectid_mutex);
+       ret = btrfs_find_highest_objectid(root,
+                                       &root->highest_objectid);
+       if (ret) {
+               mutex_unlock(&root->objectid_mutex);
+               goto free_root_dev;
+       }
+
+       ASSERT(root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID);
+
+       mutex_unlock(&root->objectid_mutex);
+
         return 0;
  
+free_root_dev:
+       free_anon_bdev(root->anon_dev);
  free_writers:
         btrfs_free_subvolume_writers(root->subv_writers);
  fail:
@@ -1767,7 +1788,6 @@ static int cleaner_kthread(void *arg)
         int again;
         struct btrfs_trans_handle *trans;
  
-       set_freezable();
         do {
                 again = 0;
  
@@ -1787,7 +1807,10 @@ static int cleaner_kthread(void *arg)
                         goto sleep;
                 }
  
+               mutex_lock(&root->fs_info->cleaner_delayed_iput_mutex);
                 btrfs_run_delayed_iputs(root);
+               mutex_unlock(&root->fs_info->cleaner_delayed_iput_mutex);
+
                 again = btrfs_clean_one_deleted_snapshot(root);
                 mutex_unlock(&root->fs_info->cleaner_mutex);
  
@@ -2557,8 +2580,8 @@ int open_ctree(struct super_block *sb,
         mutex_init(&fs_info->delete_unused_bgs_mutex);
         mutex_init(&fs_info->reloc_mutex);
         mutex_init(&fs_info->delalloc_root_mutex);
+       mutex_init(&fs_info->cleaner_delayed_iput_mutex);
         seqlock_init(&fs_info->profiles_lock);
-       init_rwsem(&fs_info->delayed_iput_sem);
  
         INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
         INIT_LIST_HEAD(&fs_info->space_info);
@@ -2743,26 +2766,6 @@ int open_ctree(struct super_block *sb,
                 goto fail_alloc;
         }
  
-       /*
-        * Leafsize and nodesize were always equal, this is only a sanity check.
-        */
-       if (le32_to_cpu(disk_super->__unused_leafsize) !=
-           btrfs_super_nodesize(disk_super)) {
-               printk(KERN_ERR "BTRFS: couldn't mount because metadata "
-                      "blocksizes don't match.  node %d leaf %d\n",
-                      btrfs_super_nodesize(disk_super),
-                      le32_to_cpu(disk_super->__unused_leafsize));
-               err = -EINVAL;
-               goto fail_alloc;
-       }
-       if (btrfs_super_nodesize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) {
-               printk(KERN_ERR "BTRFS: couldn't mount because metadata "
-                      "blocksize (%d) was too large\n",
-                      btrfs_super_nodesize(disk_super));
-               err = -EINVAL;
-               goto fail_alloc;
-       }
-
         features = btrfs_super_incompat_flags(disk_super);
         features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
         if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
@@ -2834,17 +2837,6 @@ int open_ctree(struct super_block *sb,
         sb->s_blocksize = sectorsize;
         sb->s_blocksize_bits = blksize_bits(sectorsize);
  
-       if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
-               printk(KERN_ERR "BTRFS: valid FS not found on %s\n", sb->s_id);
-               goto fail_sb_buffer;
-       }
-
-       if (sectorsize != PAGE_SIZE) {
-               printk(KERN_ERR "BTRFS: incompatible sector size (%lu) "
-                      "found on %s\n", (unsigned long)sectorsize, sb->s_id);
-               goto fail_sb_buffer;
-       }
-
         mutex_lock(&fs_info->chunk_mutex);
         ret = btrfs_read_sys_array(tree_root);
         mutex_unlock(&fs_info->chunk_mutex);
@@ -2916,6 +2908,18 @@ retry_root_backup:
         tree_root->commit_root = btrfs_root_node(tree_root);
         btrfs_set_root_refs(&tree_root->root_item, 1);
  
+       mutex_lock(&tree_root->objectid_mutex);
+       ret = btrfs_find_highest_objectid(tree_root,
+                                       &tree_root->highest_objectid);
+       if (ret) {
+               mutex_unlock(&tree_root->objectid_mutex);
+               goto recovery_tree_root;
+       }
+
+       ASSERT(tree_root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID);
+
+       mutex_unlock(&tree_root->objectid_mutex);
+
         ret = btrfs_read_roots(fs_info, tree_root);
         if (ret)
                 goto recovery_tree_root;
@@ -4019,8 +4023,17 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
                               int read_only)
  {
         struct btrfs_super_block *sb = fs_info->super_copy;
+       u64 nodesize = btrfs_super_nodesize(sb);
+       u64 sectorsize = btrfs_super_sectorsize(sb);
         int ret = 0;
  
+       if (btrfs_super_magic(sb) != BTRFS_MAGIC) {
+               printk(KERN_ERR "BTRFS: no valid FS found\n");
+               ret = -EINVAL;
+       }
+       if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP)
+               printk(KERN_WARNING "BTRFS: unrecognized super flag: %llu\n",
+                               btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
         if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
                 printk(KERN_ERR "BTRFS: tree_root level too big: %d >= %d\n",
                                 btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
@@ -4038,31 +4051,46 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
         }
  
         /*
-        * The common minimum, we don't know if we can trust the nodesize/sectorsize
-        * items yet, they'll be verified later. Issue just a warning.
+        * Check sectorsize and nodesize first, other check will need it.
+        * Check all possible sectorsize(4K, 8K, 16K, 32K, 64K) here.
          */
-       if (!IS_ALIGNED(btrfs_super_root(sb), 4096))
+       if (!is_power_of_2(sectorsize) || sectorsize < 4096 ||
+           sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) {
+               printk(KERN_ERR "BTRFS: invalid sectorsize %llu\n", sectorsize);
+               ret = -EINVAL;
+       }
+       /* Only PAGE SIZE is supported yet */
+       if (sectorsize != PAGE_CACHE_SIZE) {
+               printk(KERN_ERR "BTRFS: sectorsize %llu not supported yet, only support %lu\n",
+                               sectorsize, PAGE_CACHE_SIZE);
+               ret = -EINVAL;
+       }
+       if (!is_power_of_2(nodesize) || nodesize < sectorsize ||
+           nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) {
+               printk(KERN_ERR "BTRFS: invalid nodesize %llu\n", nodesize);
+               ret = -EINVAL;
+       }
+       if (nodesize != le32_to_cpu(sb->__unused_leafsize)) {
+               printk(KERN_ERR "BTRFS: invalid leafsize %u, should be %llu\n",
+                               le32_to_cpu(sb->__unused_leafsize),
+                               nodesize);
+               ret = -EINVAL;
+       }
+
+       /* Root alignment check */
+       if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) {
                 printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n",
                                 btrfs_super_root(sb));
-       if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096))
+               ret = -EINVAL;
+       }
+       if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) {
                 printk(KERN_WARNING "BTRFS: chunk_root block unaligned: %llu\n",
                                 btrfs_super_chunk_root(sb));
-       if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096))
-               printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
-                               btrfs_super_log_root(sb));
-
-       /*
-        * Check the lower bound, the alignment and other constraints are
-        * checked later.
-        */
-       if (btrfs_super_nodesize(sb) < 4096) {
-               printk(KERN_ERR "BTRFS: nodesize too small: %u < 4096\n",
-                               btrfs_super_nodesize(sb));
                 ret = -EINVAL;
         }
-       if (btrfs_super_sectorsize(sb) < 4096) {
-               printk(KERN_ERR "BTRFS: sectorsize too small: %u < 4096\n",
-                               btrfs_super_sectorsize(sb));
+       if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) {
+               printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
+                               btrfs_super_log_root(sb));
                 ret = -EINVAL;
         }
  
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c

index 60cc139..e2287c7 100644 (file)
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4139,8 +4139,10 @@ commit_trans:
                     !atomic_read(&root->fs_info->open_ioctl_trans)) {
                         need_commit--;
  
-                       if (need_commit > 0)
+                       if (need_commit > 0) {
+                               btrfs_start_delalloc_roots(fs_info, 0, -1);
                                 btrfs_wait_ordered_roots(fs_info, -1);
+                       }
  
                         trans = btrfs_join_transaction(root);
                         if (IS_ERR(trans))
@@ -4153,11 +4155,12 @@ commit_trans:
                                 if (ret)
                                         return ret;
                                 /*
-                                * make sure that all running delayed iput are
-                                * done
+                                * The cleaner kthread might still be doing iput
+                                * operations. Wait for it to finish so that
+                                * more space is released.
                                  */
-                               down_write(&root->fs_info->delayed_iput_sem);
-                               up_write(&root->fs_info->delayed_iput_sem);
+                               mutex_lock(&root->fs_info->cleaner_delayed_iput_mutex);
+                               mutex_unlock(&root->fs_info->cleaner_delayed_iput_mutex);
                                 goto again;
                         } else {
                                 btrfs_end_transaction(trans, root);
@@ -10399,7 +10402,7 @@ btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info,
          * more device items and remove one chunk item), but this is done at
          * btrfs_remove_chunk() through a call to check_system_chunk().
          */
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
         num_items = 3 + map->num_stripes;
         free_extent_map(em);
  
@@ -10586,7 +10589,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
  
         disk_super = fs_info->super_copy;
         if (!btrfs_super_root(disk_super))
-               return 1;
+               return -EINVAL;
  
         features = btrfs_super_incompat_flags(disk_super);
         if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
@@ -10816,3 +10819,23 @@ int btrfs_start_write_no_snapshoting(struct btrfs_root *root)
         }
         return 1;
  }
+
+static int wait_snapshoting_atomic_t(atomic_t *a)
+{
+       schedule();
+       return 0;
+}
+
+void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
+{
+       while (true) {
+               int ret;
+
+               ret = btrfs_start_write_no_snapshoting(root);
+               if (ret)
+                       break;
+               wait_on_atomic_t(&root->will_be_snapshoted,
+                                wait_snapshoting_atomic_t,
+                                TASK_UNINTERRUPTIBLE);
+       }
+}
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c

index 6a98bdd..84fb56d 100644 (file)
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -76,7 +76,7 @@ void free_extent_map(struct extent_map *em)
                 WARN_ON(extent_map_in_tree(em));
                 WARN_ON(!list_empty(&em->list));
                 if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
-                       kfree(em->bdev);
+                       kfree(em->map_lookup);
                 kmem_cache_free(extent_map_cache, em);
         }
  }
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h

index b2991fd..eb8b8fa 100644 (file)
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -32,7 +32,15 @@ struct extent_map {
         u64 block_len;
         u64 generation;
         unsigned long flags;
-       struct block_device *bdev;
+       union {
+               struct block_device *bdev;
+
+               /*
+                * used for chunk mappings
+                * flags & EXTENT_FLAG_FS_MAPPING must be set
+                */
+               struct map_lookup *map_lookup;
+       };
         atomic_t refs;
         unsigned int compress_type;
         struct list_head list;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c

index 364e0f1..af782fd 100644 (file)
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -406,8 +406,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
  /* simple helper to fault in pages and copy.  This should go away
   * and be replaced with calls into generic code.
   */
-static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
-                                        size_t write_bytes,
+static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
                                          struct page **prepared_pages,
                                          struct iov_iter *i)
  {
@@ -1588,8 +1587,7 @@ again:
                         ret = 0;
                 }
  
-               copied = btrfs_copy_from_user(pos, num_pages,
-                                          write_bytes, pages, i);
+               copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
  
                 /*
                  * if we have trouble faulting in the pages, fall
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c

index 0f33d58..dfa8124 100644 (file)
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -23,6 +23,7 @@
  #include "locking.h"
  #include "free-space-tree.h"
  #include "transaction.h"
+#include "sysfs.h"
  
  static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
                                         struct btrfs_fs_info *fs_info,
@@ -1183,6 +1184,9 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
         }
  
         btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
+       btrfs_sysfs_feature_update(fs_info,
+               BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE, FEAT_COMPAT_RO);
+
         fs_info->creating_free_space_tree = 0;
  
         ret = btrfs_commit_transaction(trans, tree_root);
@@ -1251,6 +1255,9 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
                 return PTR_ERR(trans);
  
         btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
+       btrfs_sysfs_feature_update(fs_info,
+               BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE, FEAT_COMPAT_RO);
+
         fs_info->free_space_root = NULL;
  
         ret = clear_free_space_tree(trans, free_space_root);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c

index 8b57c17..e50316c 100644 (file)
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -515,7 +515,7 @@ out:
         return ret;
  }
  
-static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
+int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
  {
         struct btrfs_path *path;
         int ret;
@@ -555,13 +555,6 @@ int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid)
         int ret;
         mutex_lock(&root->objectid_mutex);
  
-       if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) {
-               ret = btrfs_find_highest_objectid(root,
-                                                 &root->highest_objectid);
-               if (ret)
-                       goto out;
-       }
-
         if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
                 ret = -ENOSPC;
                 goto out;
diff --git a/fs/btrfs/inode-map.h b/fs/btrfs/inode-map.h

index ddb347b..c8e864b 100644 (file)
--- a/fs/btrfs/inode-map.h
+++ b/fs/btrfs/inode-map.h
@@ -9,5 +9,6 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
                          struct btrfs_trans_handle *trans);
  
  int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid);
+int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid);
  
  #endif
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index 85afe66..e456545 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3134,7 +3134,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
  {
         struct btrfs_fs_info *fs_info = root->fs_info;
  
-       down_read(&fs_info->delayed_iput_sem);
         spin_lock(&fs_info->delayed_iput_lock);
         while (!list_empty(&fs_info->delayed_iputs)) {
                 struct btrfs_inode *inode;
@@ -3153,7 +3152,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
                 spin_lock(&fs_info->delayed_iput_lock);
         }
         spin_unlock(&fs_info->delayed_iput_lock);
-       up_read(&root->fs_info->delayed_iput_sem);
  }
  
  /*
@@ -4873,26 +4871,6 @@ next:
         return err;
  }
  
-static int wait_snapshoting_atomic_t(atomic_t *a)
-{
-       schedule();
-       return 0;
-}
-
-static void wait_for_snapshot_creation(struct btrfs_root *root)
-{
-       while (true) {
-               int ret;
-
-               ret = btrfs_start_write_no_snapshoting(root);
-               if (ret)
-                       break;
-               wait_on_atomic_t(&root->will_be_snapshoted,
-                                wait_snapshoting_atomic_t,
-                                TASK_UNINTERRUPTIBLE);
-       }
-}
-
  static int btrfs_setsize(struct inode *inode, struct iattr *attr)
  {
         struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -4924,7 +4902,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
                  * truncation, it must capture all writes that happened before
                  * this truncation.
                  */
-               wait_for_snapshot_creation(root);
+               btrfs_wait_for_snapshot_creation(root);
                 ret = btrfs_cont_expand(inode, oldsize, newsize);
                 if (ret) {
                         btrfs_end_write_no_snapshoting(root);
@@ -7137,21 +7115,41 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
         if (ret)
                 return ERR_PTR(ret);
  
-       em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
-                             ins.offset, ins.offset, ins.offset, 0);
-       if (IS_ERR(em)) {
-               btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
-               return em;
-       }
-
+       /*
+        * Create the ordered extent before the extent map. This is to avoid
+        * races with the fast fsync path that would lead to it logging file
+        * extent items that point to disk extents that were not yet written to.
+        * The fast fsync path collects ordered extents into a local list and
+        * then collects all the new extent maps, so we must create the ordered
+        * extent first and make sure the fast fsync path collects any new
+        * ordered extents after collecting new extent maps as well.
+        * The fsync path simply can not rely on inode_dio_wait() because it
+        * causes deadlock with AIO.
+        */
         ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
                                            ins.offset, ins.offset, 0);
         if (ret) {
                 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
-               free_extent_map(em);
                 return ERR_PTR(ret);
         }
  
+       em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
+                             ins.offset, ins.offset, ins.offset, 0);
+       if (IS_ERR(em)) {
+               struct btrfs_ordered_extent *oe;
+
+               btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
+               oe = btrfs_lookup_ordered_extent(inode, start);
+               ASSERT(oe);
+               if (WARN_ON(!oe))
+                       return em;
+               set_bit(BTRFS_ORDERED_IOERR, &oe->flags);
+               set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags);
+               btrfs_remove_ordered_extent(inode, oe);
+               /* Once for our lookup and once for the ordered extents tree. */
+               btrfs_put_ordered_extent(oe);
+               btrfs_put_ordered_extent(oe);
+       }
         return em;
  }
  
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c

index e392dd6..1568f57 100644 (file)
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -568,6 +568,10 @@ static noinline int create_subvol(struct inode *dir,
                 goto fail;
         }
  
+       mutex_lock(&new_root->objectid_mutex);
+       new_root->highest_objectid = new_dirid;
+       mutex_unlock(&new_root->objectid_mutex);
+
         /*
          * insert the directory item
          */
@@ -1455,6 +1459,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
  
         if (range->compress_type == BTRFS_COMPRESS_LZO) {
                 btrfs_set_fs_incompat(root->fs_info, COMPRESS_LZO);
+               btrfs_sysfs_feature_update(root->fs_info,
+                       BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO, FEAT_INCOMPAT);
         }
  
         ret = defrag_count;
@@ -4063,6 +4069,8 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
         btrfs_free_path(path);
  
         btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL);
+       btrfs_sysfs_feature_update(root->fs_info,
+               BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL, FEAT_INCOMPAT);
         btrfs_end_transaction(trans, root);
  out:
         mnt_drop_write_file(file);
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c

index 6d70754..5516136 100644 (file)
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -609,13 +609,28 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
         return 1;
  }
  
+static int rbio_stripe_page_index(struct btrfs_raid_bio *rbio, int stripe,
+                                 int index)
+{
+       return stripe * rbio->stripe_npages + index;
+}
+
+/*
+ * these are just the pages from the rbio array, not from anything
+ * the FS sent down to us
+ */
+static struct page *rbio_stripe_page(struct btrfs_raid_bio *rbio, int stripe,
+                                    int index)
+{
+       return rbio->stripe_pages[rbio_stripe_page_index(rbio, stripe, index)];
+}
+
  /*
   * helper to index into the pstripe
   */
  static struct page *rbio_pstripe_page(struct btrfs_raid_bio *rbio, int index)
  {
-       index += (rbio->nr_data * rbio->stripe_len) >> PAGE_CACHE_SHIFT;
-       return rbio->stripe_pages[index];
+       return rbio_stripe_page(rbio, rbio->nr_data, index);
  }
  
  /*
@@ -626,10 +641,7 @@ static struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index)
  {
         if (rbio->nr_data + 1 == rbio->real_stripes)
                 return NULL;
-
-       index += ((rbio->nr_data + 1) * rbio->stripe_len) >>
-               PAGE_CACHE_SHIFT;
-       return rbio->stripe_pages[index];
+       return rbio_stripe_page(rbio, rbio->nr_data + 1, index);
  }
  
  /*
@@ -889,6 +901,7 @@ static void raid_write_end_io(struct bio *bio)
  {
         struct btrfs_raid_bio *rbio = bio->bi_private;
         int err = bio->bi_error;
+       int max_errors;
  
         if (err)
                 fail_bio_stripe(rbio, bio);
@@ -901,7 +914,9 @@ static void raid_write_end_io(struct bio *bio)
         err = 0;
  
         /* OK, we have read all the stripes we need to. */
-       if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
+       max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
+                    0 : rbio->bbio->max_errors;
+       if (atomic_read(&rbio->error) > max_errors)
                 err = -EIO;
  
         rbio_orig_end_io(rbio, err);
@@ -947,8 +962,7 @@ static struct page *page_in_rbio(struct btrfs_raid_bio *rbio,
   */
  static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
  {
-       unsigned long nr = stripe_len * nr_stripes;
-       return DIV_ROUND_UP(nr, PAGE_CACHE_SIZE);
+       return DIV_ROUND_UP(stripe_len, PAGE_CACHE_SIZE) * nr_stripes;
  }
  
  /*
@@ -966,8 +980,8 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
         void *p;
  
         rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2 +
-                      DIV_ROUND_UP(stripe_npages, BITS_PER_LONG / 8),
-                       GFP_NOFS);
+                      DIV_ROUND_UP(stripe_npages, BITS_PER_LONG) *
+                      sizeof(long), GFP_NOFS);
         if (!rbio)
                 return ERR_PTR(-ENOMEM);
  
@@ -1021,18 +1035,17 @@ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
                 if (!page)
                         return -ENOMEM;
                 rbio->stripe_pages[i] = page;
-               ClearPageUptodate(page);
         }
         return 0;
  }
  
-/* allocate pages for just the p/q stripes */
+/* only allocate pages for p/q stripes */
  static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
  {
         int i;
         struct page *page;
  
-       i = (rbio->nr_data * rbio->stripe_len) >> PAGE_CACHE_SHIFT;
+       i = rbio_stripe_page_index(rbio, rbio->nr_data, 0);
  
         for (; i < rbio->nr_pages; i++) {
                 if (rbio->stripe_pages[i])
@@ -1120,18 +1133,6 @@ static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
         }
  }
  
-/*
- * these are just the pages from the rbio array, not from anything
- * the FS sent down to us
- */
-static struct page *rbio_stripe_page(struct btrfs_raid_bio *rbio, int stripe, int page)
-{
-       int index;
-       index = stripe * (rbio->stripe_len >> PAGE_CACHE_SHIFT);
-       index += page;
-       return rbio->stripe_pages[index];
-}
-
  /*
   * helper function to walk our bio list and populate the bio_pages array with
   * the result.  This seems expensive, but it is faster than constantly
@@ -1175,7 +1176,6 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
  {
         struct btrfs_bio *bbio = rbio->bbio;
         void *pointers[rbio->real_stripes];
-       int stripe_len = rbio->stripe_len;
         int nr_data = rbio->nr_data;
         int stripe;
         int pagenr;
@@ -1183,7 +1183,6 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
         int q_stripe = -1;
         struct bio_list bio_list;
         struct bio *bio;
-       int pages_per_stripe = stripe_len >> PAGE_CACHE_SHIFT;
         int ret;
  
         bio_list_init(&bio_list);
@@ -1226,7 +1225,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
         else
                 clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
  
-       for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
+       for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                 struct page *p;
                 /* first collect one page from each data stripe */
                 for (stripe = 0; stripe < nr_data; stripe++) {
@@ -1268,7 +1267,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
          * everything else.
          */
         for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
-               for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
+               for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                         struct page *page;
                         if (stripe < rbio->nr_data) {
                                 page = page_in_rbio(rbio, stripe, pagenr, 1);
@@ -1292,7 +1291,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
                 if (!bbio->tgtdev_map[stripe])
                         continue;
  
-               for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
+               for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                         struct page *page;
                         if (stripe < rbio->nr_data) {
                                 page = page_in_rbio(rbio, stripe, pagenr, 1);
@@ -1506,7 +1505,6 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
         int bios_to_read = 0;
         struct bio_list bio_list;
         int ret;
-       int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
         int pagenr;
         int stripe;
         struct bio *bio;
@@ -1525,7 +1523,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
          * stripe
          */
         for (stripe = 0; stripe < rbio->nr_data; stripe++) {
-               for (pagenr = 0; pagenr < nr_pages; pagenr++) {
+               for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                         struct page *page;
                         /*
                          * we want to find all the pages missing from
@@ -1801,7 +1799,6 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
         int pagenr, stripe;
         void **pointers;
         int faila = -1, failb = -1;
-       int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
         struct page *page;
         int err;
         int i;
@@ -1824,7 +1821,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
  
         index_rbio_pages(rbio);
  
-       for (pagenr = 0; pagenr < nr_pages; pagenr++) {
+       for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                 /*
                  * Now we just use bitmap to mark the horizontal stripes in
                  * which we have data when doing parity scrub.
@@ -1935,7 +1932,7 @@ pstripe:
                  * other endio functions will fiddle the uptodate bits
                  */
                 if (rbio->operation == BTRFS_RBIO_WRITE) {
-                       for (i = 0;  i < nr_pages; i++) {
+                       for (i = 0;  i < rbio->stripe_npages; i++) {
                                 if (faila != -1) {
                                         page = rbio_stripe_page(rbio, faila, i);
                                         SetPageUptodate(page);
@@ -2031,7 +2028,6 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
         int bios_to_read = 0;
         struct bio_list bio_list;
         int ret;
-       int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
         int pagenr;
         int stripe;
         struct bio *bio;
@@ -2055,7 +2051,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
                         continue;
                 }
  
-               for (pagenr = 0; pagenr < nr_pages; pagenr++) {
+               for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                         struct page *p;
  
                         /*
@@ -2279,37 +2275,11 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
                         if (!page)
                                 return -ENOMEM;
                         rbio->stripe_pages[index] = page;
-                       ClearPageUptodate(page);
                 }
         }
         return 0;
  }
  
-/*
- * end io function used by finish_rmw.  When we finally
- * get here, we've written a full stripe
- */
-static void raid_write_parity_end_io(struct bio *bio)
-{
-       struct btrfs_raid_bio *rbio = bio->bi_private;
-       int err = bio->bi_error;
-
-       if (bio->bi_error)
-               fail_bio_stripe(rbio, bio);
-
-       bio_put(bio);
-
-       if (!atomic_dec_and_test(&rbio->stripes_pending))
-               return;
-
-       err = 0;
-
-       if (atomic_read(&rbio->error))
-               err = -EIO;
-
-       rbio_orig_end_io(rbio, err);
-}
-
  static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
                                          int need_check)
  {
@@ -2462,7 +2432,7 @@ submit_write:
                         break;
  
                 bio->bi_private = rbio;
-               bio->bi_end_io = raid_write_parity_end_io;
+               bio->bi_end_io = raid_write_end_io;
                 submit_bio(WRITE, bio);
         }
         return;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c

index 0c981eb..b1a6853 100644 (file)
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2813,7 +2813,7 @@ out:
  
  static inline int scrub_calc_parity_bitmap_len(int nsectors)
  {
-       return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * (BITS_PER_LONG / 8);
+       return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * sizeof(long);
  }
  
  static void scrub_parity_get(struct scrub_parity *sparity)
@@ -3458,7 +3458,7 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
                 return ret;
         }
  
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
         if (em->start != chunk_offset)
                 goto out;
  
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c

index 86f7fdc..49b4241 100644 (file)
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -58,6 +58,7 @@
  #include "dev-replace.h"
  #include "free-space-cache.h"
  #include "backref.h"
+#include "sysfs.h"
  #include "tests/btrfs-tests.h"
  
  #include "qgroup.h"
@@ -383,6 +384,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
         int ret = 0;
         char *compress_type;
         bool compress_force = false;
+       enum btrfs_compression_type saved_compress_type;
+       bool saved_compress_force;
+       int no_compress = 0;
  
         cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
         if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE))
@@ -462,6 +466,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                         /* Fallthrough */
                 case Opt_compress:
                 case Opt_compress_type:
+                       saved_compress_type = btrfs_test_opt(root, COMPRESS) ?
+                               info->compress_type : BTRFS_COMPRESS_NONE;
+                       saved_compress_force =
+                               btrfs_test_opt(root, FORCE_COMPRESS);
                         if (token == Opt_compress ||
                             token == Opt_compress_force ||
                             strcmp(args[0].from, "zlib") == 0) {
@@ -470,6 +478,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                 btrfs_set_opt(info->mount_opt, COMPRESS);
                                 btrfs_clear_opt(info->mount_opt, NODATACOW);
                                 btrfs_clear_opt(info->mount_opt, NODATASUM);
+                               no_compress = 0;
                         } else if (strcmp(args[0].from, "lzo") == 0) {
                                 compress_type = "lzo";
                                 info->compress_type = BTRFS_COMPRESS_LZO;
@@ -477,25 +486,24 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                 btrfs_clear_opt(info->mount_opt, NODATACOW);
                                 btrfs_clear_opt(info->mount_opt, NODATASUM);
                                 btrfs_set_fs_incompat(info, COMPRESS_LZO);
+                               btrfs_sysfs_feature_update(root->fs_info,
+                                       BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO,
+                                       FEAT_INCOMPAT);
+                               no_compress = 0;
                         } else if (strncmp(args[0].from, "no", 2) == 0) {
                                 compress_type = "no";
                                 btrfs_clear_opt(info->mount_opt, COMPRESS);
                                 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
                                 compress_force = false;
+                               no_compress++;
                         } else {
                                 ret = -EINVAL;
                                 goto out;
                         }
  
                         if (compress_force) {
-                               btrfs_set_and_info(root, FORCE_COMPRESS,
-                                                  "force %s compression",
-                                                  compress_type);
+                               btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
                         } else {
-                               if (!btrfs_test_opt(root, COMPRESS))
-                                       btrfs_info(root->fs_info,
-                                                  "btrfs: use %s compression",
-                                                  compress_type);
                                 /*
                                  * If we remount from compress-force=xxx to
                                  * compress=xxx, we need clear FORCE_COMPRESS
@@ -504,6 +512,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                  */
                                 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
                         }
+                       if ((btrfs_test_opt(root, COMPRESS) &&
+                            (info->compress_type != saved_compress_type ||
+                             compress_force != saved_compress_force)) ||
+                           (!btrfs_test_opt(root, COMPRESS) &&
+                            no_compress == 1)) {
+                               btrfs_info(root->fs_info,
+                                          "%s %s compression",
+                                          (compress_force) ? "force" : "use",
+                                          compress_type);
+                       }
+                       compress_force = false;
                         break;
                 case Opt_ssd:
                         btrfs_set_and_info(root, SSD,
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c

index e0ac859..539e7b5 100644 (file)
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -202,6 +202,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF);
  BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56);
  BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
  BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
+BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
  
  static struct attribute *btrfs_supported_feature_attrs[] = {
         BTRFS_FEAT_ATTR_PTR(mixed_backref),
@@ -213,6 +214,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
         BTRFS_FEAT_ATTR_PTR(raid56),
         BTRFS_FEAT_ATTR_PTR(skinny_metadata),
         BTRFS_FEAT_ATTR_PTR(no_holes),
+       BTRFS_FEAT_ATTR_PTR(free_space_tree),
         NULL
  };
  
@@ -780,6 +782,39 @@ failure:
         return error;
  }
  
+
+/*
+ * Change per-fs features in /sys/fs/btrfs/UUID/features to match current
+ * values in superblock. Call after any changes to incompat/compat_ro flags
+ */
+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
+               u64 bit, enum btrfs_feature_set set)
+{
+       struct btrfs_fs_devices *fs_devs;
+       struct kobject *fsid_kobj;
+       u64 features;
+       int ret;
+
+       if (!fs_info)
+               return;
+
+       features = get_features(fs_info, set);
+       ASSERT(bit & supported_feature_masks[set]);
+
+       fs_devs = fs_info->fs_devices;
+       fsid_kobj = &fs_devs->fsid_kobj;
+
+       if (!fsid_kobj->state_initialized)
+               return;
+
+       /*
+        * FIXME: this is too heavy to update just one value, ideally we'd like
+        * to use sysfs_update_group but some refactoring is needed first.
+        */
+       sysfs_remove_group(fsid_kobj, &btrfs_feature_attr_group);
+       ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group);
+}
+
  static int btrfs_init_debugfs(void)
  {
  #ifdef CONFIG_DEBUG_FS
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h

index 9c09522..d7da1a4 100644 (file)
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -56,7 +56,7 @@ static struct btrfs_feature_attr btrfs_attr_##_name = {                            \
  #define BTRFS_FEAT_ATTR_COMPAT(name, feature) \
         BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature)
  #define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \
-       BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT, feature)
+       BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT_RO, feature)
  #define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \
         BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
  
@@ -90,4 +90,7 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
                                 struct kobject *parent);
  int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs);
  void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
+               u64 bit, enum btrfs_feature_set set);
+
  #endif /* _BTRFS_SYSFS_H_ */
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c

index 323e12c..978c3a8 100644 (file)
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4127,7 +4127,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
                                      struct inode *inode,
                                      struct btrfs_path *path,
                                      struct list_head *logged_list,
-                                    struct btrfs_log_ctx *ctx)
+                                    struct btrfs_log_ctx *ctx,
+                                    const u64 start,
+                                    const u64 end)
  {
         struct extent_map *em, *n;
         struct list_head extents;
@@ -4166,7 +4168,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
         }
  
         list_sort(NULL, &extents, extent_cmp);
-
+       /*
+        * Collect any new ordered extents within the range. This is to
+        * prevent logging file extent items without waiting for the disk
+        * location they point to being written. We do this only to deal
+        * with races against concurrent lockless direct IO writes.
+        */
+       btrfs_get_logged_extents(inode, logged_list, start, end);
  process:
         while (!list_empty(&extents)) {
                 em = list_entry(extents.next, struct extent_map, list);
@@ -4701,7 +4709,7 @@ log_extents:
                         goto out_unlock;
                 }
                 ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
-                                               &logged_list, ctx);
+                                               &logged_list, ctx, start, end);
                 if (ret) {
                         err = ret;
                         goto out_unlock;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c

index c32abbc..5b505f6 100644 (file)
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -108,7 +108,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
         },
  };
  
-const u64 const btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
+const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
         [BTRFS_RAID_RAID10] = BTRFS_BLOCK_GROUP_RAID10,
         [BTRFS_RAID_RAID1]  = BTRFS_BLOCK_GROUP_RAID1,
         [BTRFS_RAID_DUP]    = BTRFS_BLOCK_GROUP_DUP,
@@ -233,6 +233,7 @@ static struct btrfs_device *__alloc_device(void)
         spin_lock_init(&dev->reada_lock);
         atomic_set(&dev->reada_in_flight, 0);
         atomic_set(&dev->dev_stats_ccnt, 0);
+       btrfs_device_data_ordered_init(dev);
         INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
         INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
  
@@ -1183,7 +1184,7 @@ again:
                 struct map_lookup *map;
                 int i;
  
-               map = (struct map_lookup *)em->bdev;
+               map = em->map_lookup;
                 for (i = 0; i < map->num_stripes; i++) {
                         u64 end;
  
@@ -2755,7 +2756,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
                         free_extent_map(em);
                 return -EINVAL;
         }
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
         lock_chunks(root->fs_info->chunk_root);
         check_system_chunk(trans, extent_root, map->type);
         unlock_chunks(root->fs_info->chunk_root);
@@ -3751,7 +3752,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
         if (btrfs_get_num_tolerated_disk_barrier_failures(bctl->meta.target) <
                 btrfs_get_num_tolerated_disk_barrier_failures(bctl->data.target)) {
                 btrfs_warn(fs_info,
-       "metatdata profile 0x%llx has lower redundancy than data profile 0x%llx",
+       "metadata profile 0x%llx has lower redundancy than data profile 0x%llx",
                         bctl->meta.target, bctl->data.target);
         }
  
@@ -4468,6 +4469,8 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
                 return;
  
         btrfs_set_fs_incompat(info, RAID56);
+       btrfs_sysfs_feature_update(info, BTRFS_FEATURE_INCOMPAT_RAID56,
+               FEAT_INCOMPAT);
  }
  
  #define BTRFS_MAX_DEVS(r) ((BTRFS_LEAF_DATA_SIZE(r)            \
@@ -4718,7 +4721,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                 goto error;
         }
         set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
-       em->bdev = (struct block_device *)map;
+       em->map_lookup = map;
         em->start = start;
         em->len = num_bytes;
         em->block_start = 0;
@@ -4813,7 +4816,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
                 return -EINVAL;
         }
  
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
         item_size = btrfs_chunk_item_size(map->num_stripes);
         stripe_size = em->orig_block_len;
  
@@ -4968,7 +4971,7 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
         if (!em)
                 return 1;
  
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
         for (i = 0; i < map->num_stripes; i++) {
                 if (map->stripes[i].dev->missing) {
                         miss_ndevs++;
@@ -5048,7 +5051,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
                 return 1;
         }
  
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
         if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
                 ret = map->num_stripes;
         else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
@@ -5084,7 +5087,7 @@ unsigned long btrfs_full_stripe_len(struct btrfs_root *root,
         BUG_ON(!em);
  
         BUG_ON(em->start > logical || em->start + em->len < logical);
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
         if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
                 len = map->stripe_len * nr_data_stripes(map);
         free_extent_map(em);
@@ -5105,7 +5108,7 @@ int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
         BUG_ON(!em);
  
         BUG_ON(em->start > logical || em->start + em->len < logical);
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
         if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
                 ret = 1;
         free_extent_map(em);
@@ -5264,7 +5267,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                 return -EINVAL;
         }
  
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
         offset = logical - em->start;
  
         stripe_len = map->stripe_len;
@@ -5378,35 +5381,33 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                  * target drive.
                  */
                 for (i = 0; i < tmp_num_stripes; i++) {
-                       if (tmp_bbio->stripes[i].dev->devid == srcdev_devid) {
-                               /*
-                                * In case of DUP, in order to keep it
-                                * simple, only add the mirror with the
-                                * lowest physical address
-                                */
-                               if (found &&
-                                   physical_of_found <=
-                                    tmp_bbio->stripes[i].physical)
-                                       continue;
-                               index_srcdev = i;
-                               found = 1;
-                               physical_of_found =
-                                       tmp_bbio->stripes[i].physical;
-                       }
+                       if (tmp_bbio->stripes[i].dev->devid != srcdev_devid)
+                               continue;
+
+                       /*
+                        * In case of DUP, in order to keep it simple, only add
+                        * the mirror with the lowest physical address
+                        */
+                       if (found &&
+                           physical_of_found <= tmp_bbio->stripes[i].physical)
+                               continue;
+
+                       index_srcdev = i;
+                       found = 1;
+                       physical_of_found = tmp_bbio->stripes[i].physical;
                 }
  
-               if (found) {
-                       mirror_num = index_srcdev + 1;
-                       patch_the_first_stripe_for_dev_replace = 1;
-                       physical_to_patch_in_first_stripe = physical_of_found;
-               } else {
+               btrfs_put_bbio(tmp_bbio);
+
+               if (!found) {
                         WARN_ON(1);
                         ret = -EIO;
-                       btrfs_put_bbio(tmp_bbio);
                         goto out;
                 }
  
-               btrfs_put_bbio(tmp_bbio);
+               mirror_num = index_srcdev + 1;
+               patch_the_first_stripe_for_dev_replace = 1;
+               physical_to_patch_in_first_stripe = physical_of_found;
         } else if (mirror_num > map->num_stripes) {
                 mirror_num = 0;
         }
@@ -5806,7 +5807,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
                 free_extent_map(em);
                 return -EIO;
         }
-       map = (struct map_lookup *)em->bdev;
+       map = em->map_lookup;
  
         length = em->len;
         rmap_len = map->stripe_len;
@@ -6069,7 +6070,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
         bbio->fs_info = root->fs_info;
         atomic_set(&bbio->stripes_pending, bbio->num_stripes);
  
-       if (bbio->raid_map) {
+       if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
+           ((rw & WRITE) || (mirror_num > 1))) {
                 /* In this case, map_length has been set to the length of
                    a single stripe; not the whole write */
                 if (rw & WRITE) {
@@ -6210,6 +6212,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
         struct extent_map *em;
         u64 logical;
         u64 length;
+       u64 stripe_len;
         u64 devid;
         u8 uuid[BTRFS_UUID_SIZE];
         int num_stripes;
@@ -6218,6 +6221,37 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
  
         logical = key->offset;
         length = btrfs_chunk_length(leaf, chunk);
+       stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+       num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+       /* Validation check */
+       if (!num_stripes) {
+               btrfs_err(root->fs_info, "invalid chunk num_stripes: %u",
+                         num_stripes);
+               return -EIO;
+       }
+       if (!IS_ALIGNED(logical, root->sectorsize)) {
+               btrfs_err(root->fs_info,
+                         "invalid chunk logical %llu", logical);
+               return -EIO;
+       }
+       if (!length || !IS_ALIGNED(length, root->sectorsize)) {
+               btrfs_err(root->fs_info,
+                       "invalid chunk length %llu", length);
+               return -EIO;
+       }
+       if (!is_power_of_2(stripe_len)) {
+               btrfs_err(root->fs_info, "invalid chunk stripe length: %llu",
+                         stripe_len);
+               return -EIO;
+       }
+       if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+           btrfs_chunk_type(leaf, chunk)) {
+               btrfs_err(root->fs_info, "unrecognized chunk type: %llu",
+                         ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
+                           BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+                         btrfs_chunk_type(leaf, chunk));
+               return -EIO;
+       }
  
         read_lock(&map_tree->map_tree.lock);
         em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
@@ -6234,7 +6268,6 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
         em = alloc_extent_map();
         if (!em)
                 return -ENOMEM;
-       num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
         map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
         if (!map) {
                 free_extent_map(em);
@@ -6242,7 +6275,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
         }
  
         set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
-       em->bdev = (struct block_device *)map;
+       em->map_lookup = map;
         em->start = logical;
         em->len = length;
         em->orig_start = 0;
@@ -6944,7 +6977,7 @@ void btrfs_update_commit_device_bytes_used(struct btrfs_root *root,
         /* In order to kick the device replace finish process */
         lock_chunks(root);
         list_for_each_entry(em, &transaction->pending_chunks, list) {
-               map = (struct map_lookup *)em->bdev;
+               map = em->map_lookup;
  
                 for (i = 0; i < map->num_stripes; i++) {
                         dev = map->stripes[i].dev;
author	Chris Mason <clm@fb.com>
	Wed, 27 Jan 2016 13:48:23 +0000 (05:48 -0800)
committer	Chris Mason <clm@fb.com>
	Wed, 27 Jan 2016 13:48:23 +0000 (05:48 -0800)
fs/btrfs/async-thread.c		patch \| blob \| history
fs/btrfs/backref.c		patch \| blob \| history
fs/btrfs/ctree.h		patch \| blob \| history
fs/btrfs/dev-replace.c		patch \| blob \| history
fs/btrfs/disk-io.c		patch \| blob \| history
fs/btrfs/extent-tree.c		patch \| blob \| history
fs/btrfs/extent_map.c		patch \| blob \| history
fs/btrfs/extent_map.h		patch \| blob \| history
fs/btrfs/file.c		patch \| blob \| history
fs/btrfs/free-space-tree.c		patch \| blob \| history
fs/btrfs/inode-map.c		patch \| blob \| history
fs/btrfs/inode-map.h		patch \| blob \| history
fs/btrfs/inode.c		patch \| blob \| history
fs/btrfs/ioctl.c		patch \| blob \| history
fs/btrfs/raid56.c		patch \| blob \| history
fs/btrfs/scrub.c		patch \| blob \| history
fs/btrfs/super.c		patch \| blob \| history
fs/btrfs/sysfs.c		patch \| blob \| history
fs/btrfs/sysfs.h		patch \| blob \| history
fs/btrfs/tree-log.c		patch \| blob \| history
fs/btrfs/volumes.c		patch \| blob \| history