Btrfs: improve performance on fsync against new inode after rename/unlink
[cascardo/linux.git] / fs / btrfs / inode.c
index d2be95c..453b9d0 100644 (file)
@@ -3428,10 +3428,10 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                found_key.offset = 0;
                inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
                ret = PTR_ERR_OR_ZERO(inode);
-               if (ret && ret != -ESTALE)
+               if (ret && ret != -ENOENT)
                        goto out;
 
-               if (ret == -ESTALE && root == root->fs_info->tree_root) {
+               if (ret == -ENOENT && root == root->fs_info->tree_root) {
                        struct btrfs_root *dead_root;
                        struct btrfs_fs_info *fs_info = root->fs_info;
                        int is_dead_root = 0;
@@ -3467,7 +3467,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                 * Inode is already gone but the orphan item is still there,
                 * kill the orphan item.
                 */
-               if (ret == -ESTALE) {
+               if (ret == -ENOENT) {
                        trans = btrfs_start_transaction(root, 1);
                        if (IS_ERR(trans)) {
                                ret = PTR_ERR(trans);
@@ -3626,7 +3626,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
 /*
  * read an inode from the btree into the in-memory inode
  */
-static void btrfs_read_locked_inode(struct inode *inode)
+static int btrfs_read_locked_inode(struct inode *inode)
 {
        struct btrfs_path *path;
        struct extent_buffer *leaf;
@@ -3645,14 +3645,19 @@ static void btrfs_read_locked_inode(struct inode *inode)
                filled = true;
 
        path = btrfs_alloc_path();
-       if (!path)
+       if (!path) {
+               ret = -ENOMEM;
                goto make_bad;
+       }
 
        memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
 
        ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
-       if (ret)
+       if (ret) {
+               if (ret > 0)
+                       ret = -ENOENT;
                goto make_bad;
+       }
 
        leaf = path->nodes[0];
 
@@ -3805,11 +3810,12 @@ cache_acl:
        }
 
        btrfs_update_iflags(inode);
-       return;
+       return 0;
 
 make_bad:
        btrfs_free_path(path);
        make_bad_inode(inode);
+       return ret;
 }
 
 /*
@@ -4197,6 +4203,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
        int err = 0;
        struct btrfs_root *root = BTRFS_I(dir)->root;
        struct btrfs_trans_handle *trans;
+       u64 last_unlink_trans;
 
        if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
                return -ENOTEMPTY;
@@ -4219,11 +4226,27 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
        if (err)
                goto out;
 
+       last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
+
        /* now the directory is empty */
        err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
                                 dentry->d_name.name, dentry->d_name.len);
-       if (!err)
+       if (!err) {
                btrfs_i_size_write(inode, 0);
+               /*
+                * Propagate the last_unlink_trans value of the deleted dir to
+                * its parent directory. This is to prevent an unrecoverable
+                * log tree in the case we do something like this:
+                * 1) create dir foo
+                * 2) create snapshot under dir foo
+                * 3) delete the snapshot
+                * 4) rmdir foo
+                * 5) mkdir foo
+                * 6) fsync foo or some file inside foo
+                */
+               if (last_unlink_trans >= trans->transid)
+                       BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
+       }
 out:
        btrfs_end_transaction(trans, root);
        btrfs_btree_balance_dirty(root);
@@ -4558,6 +4581,7 @@ delete:
                        BUG_ON(ret);
                        if (btrfs_should_throttle_delayed_refs(trans, root))
                                btrfs_async_run_delayed_refs(root,
+                                                            trans->transid,
                                        trans->delayed_ref_updates * 2, 0);
                        if (be_nice) {
                                if (truncate_space_check(trans, root,
@@ -5263,7 +5287,7 @@ void btrfs_evict_inode(struct inode *inode)
                if (steal_from_global) {
                        if (!btrfs_check_space_for_delayed_refs(trans, root))
                                ret = btrfs_block_rsv_migrate(global_rsv, rsv,
-                                                             min_size);
+                                                             min_size, 0);
                        else
                                ret = -ENOSPC;
                }
@@ -5594,7 +5618,9 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
                return ERR_PTR(-ENOMEM);
 
        if (inode->i_state & I_NEW) {
-               btrfs_read_locked_inode(inode);
+               int ret;
+
+               ret = btrfs_read_locked_inode(inode);
                if (!is_bad_inode(inode)) {
                        inode_tree_add(inode);
                        unlock_new_inode(inode);
@@ -5603,7 +5629,8 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
                } else {
                        unlock_new_inode(inode);
                        iput(inode);
-                       inode = ERR_PTR(-ESTALE);
+                       ASSERT(ret < 0);
+                       inode = ERR_PTR(ret < 0 ? ret : -ESTALE);
                }
        }
 
@@ -5757,6 +5784,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
        int name_len;
        int is_curr = 0;        /* ctx->pos points to the current index? */
        bool emitted;
+       bool put = false;
 
        /* FIXME, use a real flag for deciding about the key type */
        if (root->fs_info->tree_root == root)
@@ -5774,7 +5802,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
        if (key_type == BTRFS_DIR_INDEX_KEY) {
                INIT_LIST_HEAD(&ins_list);
                INIT_LIST_HEAD(&del_list);
-               btrfs_get_delayed_items(inode, &ins_list, &del_list);
+               put = btrfs_readdir_get_delayed_items(inode, &ins_list,
+                                                     &del_list);
        }
 
        key.type = key_type;
@@ -5921,8 +5950,8 @@ next:
 nopos:
        ret = 0;
 err:
-       if (key_type == BTRFS_DIR_INDEX_KEY)
-               btrfs_put_delayed_items(&ins_list, &del_list);
+       if (put)
+               btrfs_readdir_put_delayed_items(inode, &ins_list, &del_list);
        btrfs_free_path(path);
        return ret;
 }
@@ -9113,7 +9142,7 @@ static int btrfs_truncate(struct inode *inode)
 
        /* Migrate the slack space for the truncate to our reserve */
        ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
-                                     min_size);
+                                     min_size, 0);
        BUG_ON(ret);
 
        /*
@@ -9153,7 +9182,7 @@ static int btrfs_truncate(struct inode *inode)
                }
 
                ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
-                                             rsv, min_size);
+                                             rsv, min_size, 0);
                BUG_ON(ret);    /* shouldn't happen */
                trans->block_rsv = rsv;
        }
@@ -9174,7 +9203,6 @@ static int btrfs_truncate(struct inode *inode)
                ret = btrfs_end_transaction(trans, root);
                btrfs_btree_balance_dirty(root);
        }
-
 out:
        btrfs_free_block_rsv(root, rsv);
 
@@ -10534,7 +10562,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = {
 static const struct file_operations btrfs_dir_file_operations = {
        .llseek         = generic_file_llseek,
        .read           = generic_read_dir,
-       .iterate        = btrfs_real_readdir,
+       .iterate_shared = btrfs_real_readdir,
        .unlocked_ioctl = btrfs_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = btrfs_compat_ioctl,