Btrfs: add sanity test for outstanding_extents accounting
[cascardo/linux.git] / fs / btrfs / inode.c
index 8bf326a..3717b3d 100644 (file)
@@ -108,6 +108,13 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
 
 static int btrfs_dirty_inode(struct inode *inode);
 
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+void btrfs_test_inode_set_ops(struct inode *inode)
+{
+       BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
+}
+#endif
+
 static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
                                     struct inode *inode,  struct inode *dir,
                                     const struct qstr *qstr)
@@ -1530,10 +1537,32 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
 static void btrfs_split_extent_hook(struct inode *inode,
                                    struct extent_state *orig, u64 split)
 {
+       u64 size;
+
        /* not delalloc, ignore it */
        if (!(orig->state & EXTENT_DELALLOC))
                return;
 
+       size = orig->end - orig->start + 1;
+       if (size > BTRFS_MAX_EXTENT_SIZE) {
+               u64 num_extents;
+               u64 new_size;
+
+               /*
+                * See the explanation in btrfs_merge_extent_hook, the same
+                * applies here, just in reverse.
+                */
+               new_size = orig->end - split + 1;
+               num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
+                                       BTRFS_MAX_EXTENT_SIZE);
+               new_size = split - orig->start;
+               num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
+                                       BTRFS_MAX_EXTENT_SIZE);
+               if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
+                             BTRFS_MAX_EXTENT_SIZE) >= num_extents)
+                       return;
+       }
+
        spin_lock(&BTRFS_I(inode)->lock);
        BTRFS_I(inode)->outstanding_extents++;
        spin_unlock(&BTRFS_I(inode)->lock);
@@ -1549,10 +1578,55 @@ static void btrfs_merge_extent_hook(struct inode *inode,
                                    struct extent_state *new,
                                    struct extent_state *other)
 {
+       u64 new_size, old_size;
+       u64 num_extents;
+
        /* not delalloc, ignore it */
        if (!(other->state & EXTENT_DELALLOC))
                return;
 
+       if (new->start > other->start)
+               new_size = new->end - other->start + 1;
+       else
+               new_size = other->end - new->start + 1;
+
+       /* we're not bigger than the max, unreserve the space and go */
+       if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
+               spin_lock(&BTRFS_I(inode)->lock);
+               BTRFS_I(inode)->outstanding_extents--;
+               spin_unlock(&BTRFS_I(inode)->lock);
+               return;
+       }
+
+       /*
+        * We have to add up either side to figure out how many extents were
+        * accounted for before we merged into one big extent.  If the number of
+        * extents we accounted for is <= the amount we need for the new range
+        * then we can return, otherwise drop.  Think of it like this
+        *
+        * [ 4k][MAX_SIZE]
+        *
+        * So we've grown the extent by a MAX_SIZE extent, this would mean we
+        * need 2 outstanding extents, on one side we have 1 and the other side
+        * we have 1 so they are == and we can return.  But in this case
+        *
+        * [MAX_SIZE+4k][MAX_SIZE+4k]
+        *
+        * Each range on their own accounts for 2 extents, but merged together
+        * they are only 3 extents worth of accounting, so we need to drop in
+        * this case.
+        */
+       old_size = other->end - other->start + 1;
+       num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
+                               BTRFS_MAX_EXTENT_SIZE);
+       old_size = new->end - new->start + 1;
+       num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
+                                BTRFS_MAX_EXTENT_SIZE);
+
+       if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
+                     BTRFS_MAX_EXTENT_SIZE) >= num_extents)
+               return;
+
        spin_lock(&BTRFS_I(inode)->lock);
        BTRFS_I(inode)->outstanding_extents--;
        spin_unlock(&BTRFS_I(inode)->lock);
@@ -1604,7 +1678,7 @@ static void btrfs_del_delalloc_inode(struct btrfs_root *root,
  * have pending delalloc work to be done.
  */
 static void btrfs_set_bit_hook(struct inode *inode,
-                              struct extent_state *state, unsigned long *bits)
+                              struct extent_state *state, unsigned *bits)
 {
 
        if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC))
@@ -1627,6 +1701,10 @@ static void btrfs_set_bit_hook(struct inode *inode,
                        spin_unlock(&BTRFS_I(inode)->lock);
                }
 
+               /* For sanity tests */
+               if (btrfs_test_is_dummy_root(root))
+                       return;
+
                __percpu_counter_add(&root->fs_info->delalloc_bytes, len,
                                     root->fs_info->delalloc_batch);
                spin_lock(&BTRFS_I(inode)->lock);
@@ -1645,9 +1723,11 @@ static void btrfs_set_bit_hook(struct inode *inode,
  */
 static void btrfs_clear_bit_hook(struct inode *inode,
                                 struct extent_state *state,
-                                unsigned long *bits)
+                                unsigned *bits)
 {
        u64 len = state->end + 1 - state->start;
+       u64 num_extents = div64_u64(len + BTRFS_MAX_EXTENT_SIZE -1,
+                                   BTRFS_MAX_EXTENT_SIZE);
 
        spin_lock(&BTRFS_I(inode)->lock);
        if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG))
@@ -1667,7 +1747,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
                        *bits &= ~EXTENT_FIRST_DELALLOC;
                } else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
                        spin_lock(&BTRFS_I(inode)->lock);
-                       BTRFS_I(inode)->outstanding_extents--;
+                       BTRFS_I(inode)->outstanding_extents -= num_extents;
                        spin_unlock(&BTRFS_I(inode)->lock);
                }
 
@@ -1680,6 +1760,10 @@ static void btrfs_clear_bit_hook(struct inode *inode,
                    root != root->fs_info->tree_root)
                        btrfs_delalloc_release_metadata(inode, len);
 
+               /* For sanity tests. */
+               if (btrfs_test_is_dummy_root(root))
+                       return;
+
                if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
                    && do_list && !(state->state & EXTENT_NORESERVE))
                        btrfs_free_reserved_data_space(inode, len);
@@ -2945,7 +3029,7 @@ static int __readpage_endio_check(struct inode *inode,
        return 0;
 zeroit:
        if (__ratelimit(&_rs))
-               btrfs_info(BTRFS_I(inode)->root->fs_info,
+               btrfs_warn(BTRFS_I(inode)->root->fs_info,
                           "csum failed ino %llu off %llu csum %u expected csum %u",
                           btrfs_ino(inode), start, csum, csum_expected);
        memset(kaddr + pgoff, 1, len);
@@ -3407,7 +3491,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
 
 out:
        if (ret)
-               btrfs_crit(root->fs_info,
+               btrfs_err(root->fs_info,
                        "could not do orphan cleanup %d", ret);
        btrfs_free_path(path);
        return ret;
@@ -3490,7 +3574,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
        struct btrfs_path *path;
        struct extent_buffer *leaf;
        struct btrfs_inode_item *inode_item;
-       struct btrfs_timespec *tspec;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_key location;
        unsigned long ptr;
@@ -3527,17 +3610,19 @@ static void btrfs_read_locked_inode(struct inode *inode)
        i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
        btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
 
-       tspec = btrfs_inode_atime(inode_item);
-       inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
-       inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
+       inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
+       inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime);
+
+       inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime);
+       inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime);
 
-       tspec = btrfs_inode_mtime(inode_item);
-       inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
-       inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
+       inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime);
+       inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime);
 
-       tspec = btrfs_inode_ctime(inode_item);
-       inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
-       inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
+       BTRFS_I(inode)->i_otime.tv_sec =
+               btrfs_timespec_sec(leaf, &inode_item->otime);
+       BTRFS_I(inode)->i_otime.tv_nsec =
+               btrfs_timespec_nsec(leaf, &inode_item->otime);
 
        inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
        BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
@@ -3658,21 +3743,26 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
        btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
        btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
 
-       btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
+       btrfs_set_token_timespec_sec(leaf, &item->atime,
                                     inode->i_atime.tv_sec, &token);
-       btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
+       btrfs_set_token_timespec_nsec(leaf, &item->atime,
                                      inode->i_atime.tv_nsec, &token);
 
-       btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
+       btrfs_set_token_timespec_sec(leaf, &item->mtime,
                                     inode->i_mtime.tv_sec, &token);
-       btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
+       btrfs_set_token_timespec_nsec(leaf, &item->mtime,
                                      inode->i_mtime.tv_nsec, &token);
 
-       btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
+       btrfs_set_token_timespec_sec(leaf, &item->ctime,
                                     inode->i_ctime.tv_sec, &token);
-       btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
+       btrfs_set_token_timespec_nsec(leaf, &item->ctime,
                                      inode->i_ctime.tv_nsec, &token);
 
+       btrfs_set_token_timespec_sec(leaf, &item->otime,
+                                    BTRFS_I(inode)->i_otime.tv_sec, &token);
+       btrfs_set_token_timespec_nsec(leaf, &item->otime,
+                                     BTRFS_I(inode)->i_otime.tv_nsec, &token);
+
        btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
                                     &token);
        btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
@@ -5009,6 +5099,7 @@ static int fixup_tree_root_location(struct btrfs_root *root,
        struct btrfs_root *new_root;
        struct btrfs_root_ref *ref;
        struct extent_buffer *leaf;
+       struct btrfs_key key;
        int ret;
        int err = 0;
 
@@ -5019,9 +5110,12 @@ static int fixup_tree_root_location(struct btrfs_root *root,
        }
 
        err = -ENOENT;
-       ret = btrfs_find_item(root->fs_info->tree_root, path,
-                               BTRFS_I(dir)->root->root_key.objectid,
-                               location->objectid, BTRFS_ROOT_REF_KEY, NULL);
+       key.objectid = BTRFS_I(dir)->root->root_key.objectid;
+       key.type = BTRFS_ROOT_REF_KEY;
+       key.offset = location->objectid;
+
+       ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, path,
+                               0, 0);
        if (ret) {
                if (ret < 0)
                        err = ret;
@@ -5260,7 +5354,10 @@ static struct inode *new_simple_dir(struct super_block *s,
        inode->i_op = &btrfs_dir_ro_inode_operations;
        inode->i_fop = &simple_dir_operations;
        inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
-       inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+       inode->i_mtime = CURRENT_TIME;
+       inode->i_atime = inode->i_mtime;
+       inode->i_ctime = inode->i_mtime;
+       BTRFS_I(inode)->i_otime = inode->i_mtime;
 
        return inode;
 }
@@ -5828,7 +5925,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 
        inode_init_owner(inode, dir, mode);
        inode_set_bytes(inode, 0);
-       inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+
+       inode->i_mtime = CURRENT_TIME;
+       inode->i_atime = inode->i_mtime;
+       inode->i_ctime = inode->i_mtime;
+       BTRFS_I(inode)->i_otime = inode->i_mtime;
+
        inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
                                  struct btrfs_inode_item);
        memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item,
@@ -7137,11 +7239,12 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
        u64 start = iblock << inode->i_blkbits;
        u64 lockstart, lockend;
        u64 len = bh_result->b_size;
+       u64 orig_len = len;
        int unlock_bits = EXTENT_LOCKED;
        int ret = 0;
 
        if (create)
-               unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
+               unlock_bits |= EXTENT_DIRTY;
        else
                len = min_t(u64, len, root->sectorsize);
 
@@ -7208,7 +7311,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
            ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
             em->block_start != EXTENT_MAP_HOLE)) {
                int type;
-               int ret;
                u64 block_start, orig_start, orig_block_len, ram_bytes;
 
                if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
@@ -7272,14 +7374,12 @@ unlock:
                if (start + len > i_size_read(inode))
                        i_size_write(inode, start + len);
 
-               spin_lock(&BTRFS_I(inode)->lock);
-               BTRFS_I(inode)->outstanding_extents++;
-               spin_unlock(&BTRFS_I(inode)->lock);
-
-               ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
-                                    lockstart + len - 1, EXTENT_DELALLOC, NULL,
-                                    &cached_state, GFP_NOFS);
-               BUG_ON(ret);
+               if (len < orig_len) {
+                       spin_lock(&BTRFS_I(inode)->lock);
+                       BTRFS_I(inode)->outstanding_extents++;
+                       spin_unlock(&BTRFS_I(inode)->lock);
+               }
+               btrfs_free_reserved_data_space(inode, len);
        }
 
        /*
@@ -7808,8 +7908,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
        }
 
        /* async crcs make it difficult to collect full stripe writes. */
-       if (btrfs_get_alloc_profile(root, 1) &
-           (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6))
+       if (btrfs_get_alloc_profile(root, 1) & BTRFS_BLOCK_GROUP_RAID56_MASK)
                async_submit = 0;
        else
                async_submit = 1;
@@ -8056,8 +8155,6 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
                else if (ret >= 0 && (size_t)ret < count)
                        btrfs_delalloc_release_space(inode,
                                                     count - (size_t)ret);
-               else
-                       btrfs_delalloc_release_metadata(inode, 0);
        }
 out:
        if (wakeup)
@@ -8578,6 +8675,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
 
        ei->delayed_node = NULL;
 
+       ei->i_otime.tv_sec = 0;
+       ei->i_otime.tv_nsec = 0;
+
        inode = &ei->vfs_inode;
        extent_map_tree_init(&ei->extent_tree);
        extent_io_tree_init(&ei->io_tree, &inode->i_data);