Btrfs: fix truncation of compressed and inlined extents
[cascardo/linux.git] / fs / btrfs / inode.c
index a0fa725..cbb4286 100644 (file)
@@ -1096,6 +1096,9 @@ static noinline void async_cow_submit(struct btrfs_work *work)
        nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>
                PAGE_CACHE_SHIFT;
 
+       /*
+        * atomic_sub_return implies a barrier for waitqueue_active
+        */
        if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) <
            5 * 1024 * 1024 &&
            waitqueue_active(&root->fs_info->async_submit_wait))
@@ -3018,8 +3021,6 @@ static int __readpage_endio_check(struct inode *inode,
        char *kaddr;
        u32 csum_expected;
        u32 csum = ~(u32)0;
-       static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
-                                     DEFAULT_RATELIMIT_BURST);
 
        csum_expected = *(((u32 *)io_bio->csum) + icsum);
 
@@ -3032,9 +3033,8 @@ static int __readpage_endio_check(struct inode *inode,
        kunmap_atomic(kaddr);
        return 0;
 zeroit:
-       if (__ratelimit(&_rs))
-               btrfs_warn(BTRFS_I(inode)->root->fs_info,
-                          "csum failed ino %llu off %llu csum %u expected csum %u",
+       btrfs_warn_rl(BTRFS_I(inode)->root->fs_info,
+               "csum failed ino %llu off %llu csum %u expected csum %u",
                           btrfs_ino(inode), start, csum, csum_expected);
        memset(kaddr + pgoff, 1, len);
        flush_dcache_page(page);
@@ -4217,6 +4217,47 @@ static int truncate_space_check(struct btrfs_trans_handle *trans,
 
 }
 
+static int truncate_inline_extent(struct inode *inode,
+                                 struct btrfs_path *path,
+                                 struct btrfs_key *found_key,
+                                 const u64 item_end,
+                                 const u64 new_size)
+{
+       struct extent_buffer *leaf = path->nodes[0];
+       int slot = path->slots[0];
+       struct btrfs_file_extent_item *fi;
+       u32 size = (u32)(new_size - found_key->offset);
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+
+       fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+
+       if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) {
+               loff_t offset = new_size;
+               loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE);
+
+               /*
+                * Zero out the remaining of the last page of our inline extent,
+                * instead of directly truncating our inline extent here - that
+                * would be much more complex (decompressing all the data, then
+                * compressing the truncated data, which might be bigger than
+                * the size of the inline extent, resize the extent, etc).
+                * We release the path because to get the page we might need to
+                * read the extent item from disk (data not in the page cache).
+                */
+               btrfs_release_path(path);
+               return btrfs_truncate_page(inode, offset, page_end - offset, 0);
+       }
+
+       btrfs_set_file_extent_ram_bytes(leaf, fi, size);
+       size = btrfs_file_extent_calc_inline_size(size);
+       btrfs_truncate_item(root, path, size, 1);
+
+       if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
+               inode_sub_bytes(inode, item_end + 1 - new_size);
+
+       return 0;
+}
+
 /*
  * this can truncate away extent items, csum items and directory items.
  * It starts at a high offset and removes keys until it can't find
@@ -4411,27 +4452,40 @@ search_again:
                         * special encodings
                         */
                        if (!del_item &&
-                           btrfs_file_extent_compression(leaf, fi) == 0 &&
                            btrfs_file_extent_encryption(leaf, fi) == 0 &&
                            btrfs_file_extent_other_encoding(leaf, fi) == 0) {
-                               u32 size = new_size - found_key.offset;
-
-                               if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
-                                       inode_sub_bytes(inode, item_end + 1 -
-                                                       new_size);
 
                                /*
-                                * update the ram bytes to properly reflect
-                                * the new size of our item
+                                * Need to release path in order to truncate a
+                                * compressed extent. So delete any accumulated
+                                * extent items so far.
                                 */
-                               btrfs_set_file_extent_ram_bytes(leaf, fi, size);
-                               size =
-                                   btrfs_file_extent_calc_inline_size(size);
-                               btrfs_truncate_item(root, path, size, 1);
+                               if (btrfs_file_extent_compression(leaf, fi) !=
+                                   BTRFS_COMPRESS_NONE && pending_del_nr) {
+                                       err = btrfs_del_items(trans, root, path,
+                                                             pending_del_slot,
+                                                             pending_del_nr);
+                                       if (err) {
+                                               btrfs_abort_transaction(trans,
+                                                                       root,
+                                                                       err);
+                                               goto error;
+                                       }
+                                       pending_del_nr = 0;
+                               }
+
+                               err = truncate_inline_extent(inode, path,
+                                                            &found_key,
+                                                            item_end,
+                                                            new_size);
+                               if (err) {
+                                       btrfs_abort_transaction(trans,
+                                                               root, err);
+                                       goto error;
+                               }
                        } else if (test_bit(BTRFS_ROOT_REF_COWS,
                                            &root->state)) {
-                               inode_sub_bytes(inode, item_end + 1 -
-                                               found_key.offset);
+                               inode_sub_bytes(inode, item_end + 1 - new_size);
                        }
                }
 delete:
@@ -5084,7 +5138,8 @@ void btrfs_evict_inode(struct inode *inode)
                goto no_delete;
        }
        /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
-       btrfs_wait_ordered_range(inode, 0, (u64)-1);
+       if (!special_file(inode->i_mode))
+               btrfs_wait_ordered_range(inode, 0, (u64)-1);
 
        btrfs_free_io_failure_record(inode, 0, (u64)-1);
 
@@ -7408,6 +7463,10 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
        return em;
 }
 
+struct btrfs_dio_data {
+       u64 outstanding_extents;
+       u64 reserve;
+};
 
 static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
                                   struct buffer_head *bh_result, int create)
@@ -7415,10 +7474,10 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
        struct extent_map *em;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct extent_state *cached_state = NULL;
+       struct btrfs_dio_data *dio_data = NULL;
        u64 start = iblock << inode->i_blkbits;
        u64 lockstart, lockend;
        u64 len = bh_result->b_size;
-       u64 *outstanding_extents = NULL;
        int unlock_bits = EXTENT_LOCKED;
        int ret = 0;
 
@@ -7436,7 +7495,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
                 * that anything that needs to check if there's a transction doesn't get
                 * confused.
                 */
-               outstanding_extents = current->journal_info;
+               dio_data = current->journal_info;
                current->journal_info = NULL;
        }
 
@@ -7568,17 +7627,18 @@ unlock:
                 * within our reservation, otherwise we need to adjust our inode
                 * counter appropriately.
                 */
-               if (*outstanding_extents) {
-                       (*outstanding_extents)--;
+               if (dio_data->outstanding_extents) {
+                       (dio_data->outstanding_extents)--;
                } else {
                        spin_lock(&BTRFS_I(inode)->lock);
                        BTRFS_I(inode)->outstanding_extents++;
                        spin_unlock(&BTRFS_I(inode)->lock);
                }
 
-               current->journal_info = outstanding_extents;
                btrfs_free_reserved_data_space(inode, len);
-               set_bit(BTRFS_INODE_DIO_READY, &BTRFS_I(inode)->runtime_flags);
+               WARN_ON(dio_data->reserve < len);
+               dio_data->reserve -= len;
+               current->journal_info = dio_data;
        }
 
        /*
@@ -7601,8 +7661,8 @@ unlock:
 unlock_err:
        clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
                         unlock_bits, 1, 0, &cached_state, GFP_NOFS);
-       if (outstanding_extents)
-               current->journal_info = outstanding_extents;
+       if (dio_data)
+               current->journal_info = dio_data;
        return ret;
 }
 
@@ -8329,7 +8389,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
-       u64 outstanding_extents = 0;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_dio_data dio_data = { 0 };
        size_t count = 0;
        int flags = 0;
        bool wakeup = true;
@@ -8367,7 +8428,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
                ret = btrfs_delalloc_reserve_space(inode, count);
                if (ret)
                        goto out;
-               outstanding_extents = div64_u64(count +
+               dio_data.outstanding_extents = div64_u64(count +
                                                BTRFS_MAX_EXTENT_SIZE - 1,
                                                BTRFS_MAX_EXTENT_SIZE);
 
@@ -8376,7 +8437,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
                 * do the accounting properly if we go over the number we
                 * originally calculated.  Abuse current->journal_info for this.
                 */
-               current->journal_info = &outstanding_extents;
+               dio_data.reserve = round_up(count, root->sectorsize);
+               current->journal_info = &dio_data;
        } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
                                     &BTRFS_I(inode)->runtime_flags)) {
                inode_dio_end(inode);
@@ -8391,16 +8453,9 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
        if (iov_iter_rw(iter) == WRITE) {
                current->journal_info = NULL;
                if (ret < 0 && ret != -EIOCBQUEUED) {
-                       /*
-                        * If the error comes from submitting stage,
-                        * btrfs_get_blocsk_direct() has free'd data space,
-                        * and metadata space will be handled by
-                        * finish_ordered_fn, don't do that again to make
-                        * sure bytes_may_use is correct.
-                        */
-                       if (!test_and_clear_bit(BTRFS_INODE_DIO_READY,
-                                    &BTRFS_I(inode)->runtime_flags))
-                               btrfs_delalloc_release_space(inode, count);
+                       if (dio_data.reserve)
+                               btrfs_delalloc_release_space(inode,
+                                                       dio_data.reserve);
                } else if (ret >= 0 && (size_t)ret < count)
                        btrfs_delalloc_release_space(inode,
                                                     count - (size_t)ret);