Merge branch 'for-linus-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/mason...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 27 Aug 2016 03:22:01 +0000 (20:22 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 27 Aug 2016 03:22:01 +0000 (20:22 -0700)
Pull btrfs fixes from Chris Mason:
 "We've queued up a few different fixes in here.  These range from
  enospc corners to fsync and quota fixes, and a few targeted at error
  handling for corrupt metadata/fuzzing"

* 'for-linus-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs: fix lockdep warning on deadlock against an inode's log mutex
  Btrfs: detect corruption when non-root leaf has zero item
  Btrfs: check btree node's nritems
  btrfs: don't create or leak aliased root while cleaning up orphans
  Btrfs: fix em leak in find_first_block_group
  btrfs: do not background blkdev_put()
  Btrfs: clarify do_chunk_alloc()'s return value
  btrfs: fix fsfreeze hang caused by delayed iputs deal
  btrfs: update btrfs_space_info's bytes_may_use timely
  btrfs: divide btrfs_update_reserved_bytes() into two functions
  btrfs: use correct offset for reloc_inode in prealloc_file_extent_cluster()
  btrfs: qgroup: Fix qgroup incorrectness caused by log replay
  btrfs: relocation: Fix leaking qgroups numbers on data extents
  btrfs: qgroup: Refactor btrfs_qgroup_insert_dirty_extent()
  btrfs: waiting on qgroup rescan should not always be interruptible
  btrfs: properly track when rescan worker is running
  btrfs: flush_space: treat return value of do_chunk_alloc properly
  Btrfs: add ASSERT for block group's memory leak
  btrfs: backref: Fix soft lockup in __merge_refs function
  Btrfs: fix memory leak of reloc_root

1  2 
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.h
fs/btrfs/inode.c
fs/btrfs/volumes.c

diff --combined fs/btrfs/ctree.h
@@@ -1028,6 -1028,7 +1028,7 @@@ struct btrfs_fs_info 
        struct btrfs_workqueue *qgroup_rescan_workers;
        struct completion qgroup_rescan_completion;
        struct btrfs_work qgroup_rescan_work;
+       bool qgroup_rescan_running;     /* protected by qgroup_rescan_lock */
  
        /* filesystem state */
        unsigned long fs_state;
        struct list_head pinned_chunks;
  
        int creating_free_space_tree;
+       /* Used to record internally whether fs has been frozen */
+       int fs_frozen;
  };
  
  struct btrfs_subvolume_writers {
@@@ -2578,7 -2581,7 +2581,7 @@@ int btrfs_alloc_logged_file_extent(stru
                                   struct btrfs_root *root,
                                   u64 root_objectid, u64 owner, u64 offset,
                                   struct btrfs_key *ins);
- int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
+ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes,
                         u64 min_alloc_size, u64 empty_size, u64 hint_byte,
                         struct btrfs_key *ins, int is_data, int delalloc);
  int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@@ -3119,7 -3122,7 +3122,7 @@@ int btrfs_create_subvol_root(struct btr
                             struct btrfs_root *new_root,
                             struct btrfs_root *parent_root,
                             u64 new_dirid);
 -int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
 +int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
                         size_t size, struct bio *bio,
                         unsigned long bio_flags);
  int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
diff --combined fs/btrfs/disk-io.c
@@@ -124,6 -124,7 +124,6 @@@ struct async_submit_bio 
        struct list_head list;
        extent_submit_bio_hook_t *submit_bio_start;
        extent_submit_bio_hook_t *submit_bio_done;
 -      int rw;
        int mirror_num;
        unsigned long bio_flags;
        /*
@@@ -559,8 -560,29 +559,29 @@@ static noinline int check_leaf(struct b
        u32 nritems = btrfs_header_nritems(leaf);
        int slot;
  
-       if (nritems == 0)
+       if (nritems == 0) {
+               struct btrfs_root *check_root;
+               key.objectid = btrfs_header_owner(leaf);
+               key.type = BTRFS_ROOT_ITEM_KEY;
+               key.offset = (u64)-1;
+               check_root = btrfs_get_fs_root(root->fs_info, &key, false);
+               /*
+                * The only reason we also check NULL here is that during
+                * open_ctree() some roots has not yet been set up.
+                */
+               if (!IS_ERR_OR_NULL(check_root)) {
+                       /* if leaf is the root, then it's fine */
+                       if (leaf->start !=
+                           btrfs_root_bytenr(&check_root->root_item)) {
+                               CORRUPT("non-root leaf's nritems is 0",
+                                       leaf, root, 0);
+                               return -EIO;
+                       }
+               }
                return 0;
+       }
  
        /* Check the 0 item */
        if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
        return 0;
  }
  
+ static int check_node(struct btrfs_root *root, struct extent_buffer *node)
+ {
+       unsigned long nr = btrfs_header_nritems(node);
+       if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) {
+               btrfs_crit(root->fs_info,
+                          "corrupt node: block %llu root %llu nritems %lu",
+                          node->start, root->objectid, nr);
+               return -EIO;
+       }
+       return 0;
+ }
  static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
                                      u64 phy_offset, struct page *page,
                                      u64 start, u64 end, int mirror)
                ret = -EIO;
        }
  
+       if (found_level > 0 && check_node(root, eb))
+               ret = -EIO;
        if (!ret)
                set_extent_buffer_uptodate(eb);
  err:
@@@ -726,7 -764,7 +763,7 @@@ static void end_workqueue_bio(struct bi
        fs_info = end_io_wq->info;
        end_io_wq->error = bio->bi_error;
  
 -      if (bio->bi_rw & REQ_WRITE) {
 +      if (bio_op(bio) == REQ_OP_WRITE) {
                if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
                        wq = fs_info->endio_meta_write_workers;
                        func = btrfs_endio_meta_write_helper;
@@@ -796,7 -834,7 +833,7 @@@ static void run_one_async_start(struct 
        int ret;
  
        async = container_of(work, struct  async_submit_bio, work);
 -      ret = async->submit_bio_start(async->inode, async->rw, async->bio,
 +      ret = async->submit_bio_start(async->inode, async->bio,
                                      async->mirror_num, async->bio_flags,
                                      async->bio_offset);
        if (ret)
@@@ -829,8 -867,9 +866,8 @@@ static void run_one_async_done(struct b
                return;
        }
  
 -      async->submit_bio_done(async->inode, async->rw, async->bio,
 -                             async->mirror_num, async->bio_flags,
 -                             async->bio_offset);
 +      async->submit_bio_done(async->inode, async->bio, async->mirror_num,
 +                             async->bio_flags, async->bio_offset);
  }
  
  static void run_one_async_free(struct btrfs_work *work)
  }
  
  int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
 -                      int rw, struct bio *bio, int mirror_num,
 +                      struct bio *bio, int mirror_num,
                        unsigned long bio_flags,
                        u64 bio_offset,
                        extent_submit_bio_hook_t *submit_bio_start,
                return -ENOMEM;
  
        async->inode = inode;
 -      async->rw = rw;
        async->bio = bio;
        async->mirror_num = mirror_num;
        async->submit_bio_start = submit_bio_start;
  
        atomic_inc(&fs_info->nr_async_submits);
  
 -      if (rw & REQ_SYNC)
 +      if (bio->bi_opf & REQ_SYNC)
                btrfs_set_work_high_priority(&async->work);
  
        btrfs_queue_work(fs_info->workers, &async->work);
@@@ -900,8 -940,9 +937,8 @@@ static int btree_csum_one_bio(struct bi
        return ret;
  }
  
 -static int __btree_submit_bio_start(struct inode *inode, int rw,
 -                                  struct bio *bio, int mirror_num,
 -                                  unsigned long bio_flags,
 +static int __btree_submit_bio_start(struct inode *inode, struct bio *bio,
 +                                  int mirror_num, unsigned long bio_flags,
                                    u64 bio_offset)
  {
        /*
        return btree_csum_one_bio(bio);
  }
  
 -static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
 +static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
                                 int mirror_num, unsigned long bio_flags,
                                 u64 bio_offset)
  {
         * when we're called for a write, we're already in the async
         * submission context.  Just jump into btrfs_map_bio
         */
 -      ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
 +      ret = btrfs_map_bio(BTRFS_I(inode)->root, bio, mirror_num, 1);
        if (ret) {
                bio->bi_error = ret;
                bio_endio(bio);
@@@ -940,14 -981,14 +977,14 @@@ static int check_async_write(struct ino
        return 1;
  }
  
 -static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 +static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
                                 int mirror_num, unsigned long bio_flags,
                                 u64 bio_offset)
  {
        int async = check_async_write(inode, bio_flags);
        int ret;
  
 -      if (!(rw & REQ_WRITE)) {
 +      if (bio_op(bio) != REQ_OP_WRITE) {
                /*
                 * called for a read, do the setup so that checksum validation
                 * can happen in the async kernel threads
                                          bio, BTRFS_WQ_ENDIO_METADATA);
                if (ret)
                        goto out_w_error;
 -              ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
 -                                  mirror_num, 0);
 +              ret = btrfs_map_bio(BTRFS_I(inode)->root, bio, mirror_num, 0);
        } else if (!async) {
                ret = btree_csum_one_bio(bio);
                if (ret)
                        goto out_w_error;
 -              ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
 -                                  mirror_num, 0);
 +              ret = btrfs_map_bio(BTRFS_I(inode)->root, bio, mirror_num, 0);
        } else {
                /*
                 * kthread helpers are used to submit writes so that
                 * checksumming can happen in parallel across all CPUs
                 */
                ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
 -                                        inode, rw, bio, mirror_num, 0,
 +                                        inode, bio, mirror_num, 0,
                                          bio_offset,
                                          __btree_submit_bio_start,
                                          __btree_submit_bio_done);
@@@ -1618,8 -1661,8 +1655,8 @@@ fail
        return ret;
  }
  
- static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
-                                              u64 root_id)
+ struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
+                                       u64 root_id)
  {
        struct btrfs_root *root;
  
@@@ -2298,6 -2341,7 +2335,7 @@@ static void btrfs_init_qgroup(struct bt
        fs_info->quota_enabled = 0;
        fs_info->pending_quota_state = 0;
        fs_info->qgroup_ulist = NULL;
+       fs_info->qgroup_rescan_running = false;
        mutex_init(&fs_info->qgroup_rescan_lock);
  }
  
@@@ -2624,6 -2668,7 +2662,7 @@@ int open_ctree(struct super_block *sb
        atomic_set(&fs_info->qgroup_op_seq, 0);
        atomic_set(&fs_info->reada_works_cnt, 0);
        atomic64_set(&fs_info->tree_mod_seq, 0);
+       fs_info->fs_frozen = 0;
        fs_info->sb = sb;
        fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
        fs_info->metadata_ratio = 0;
@@@ -3419,9 -3464,9 +3458,9 @@@ static int write_dev_supers(struct btrf
                 * to go down lazy.
                 */
                if (i == 0)
 -                      ret = btrfsic_submit_bh(WRITE_FUA, bh);
 +                      ret = btrfsic_submit_bh(REQ_OP_WRITE, WRITE_FUA, bh);
                else
 -                      ret = btrfsic_submit_bh(WRITE_SYNC, bh);
 +                      ret = btrfsic_submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh);
                if (ret)
                        errors++;
        }
@@@ -3485,13 -3530,12 +3524,13 @@@ static int write_dev_flush(struct btrfs
  
        bio->bi_end_io = btrfs_end_empty_barrier;
        bio->bi_bdev = device->bdev;
 +      bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
        init_completion(&device->flush_wait);
        bio->bi_private = &device->flush_wait;
        device->flush_bio = bio;
  
        bio_get(bio);
 -      btrfsic_submit_bio(WRITE_FLUSH, bio);
 +      btrfsic_submit_bio(bio);
  
        return 0;
  }
@@@ -3739,8 -3783,15 +3778,15 @@@ void btrfs_drop_and_free_fs_root(struc
        if (btrfs_root_refs(&root->root_item) == 0)
                synchronize_srcu(&fs_info->subvol_srcu);
  
-       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
+       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
                btrfs_free_log(NULL, root);
+               if (root->reloc_root) {
+                       free_extent_buffer(root->reloc_root->node);
+                       free_extent_buffer(root->reloc_root->commit_root);
+                       btrfs_put_fs_root(root->reloc_root);
+                       root->reloc_root = NULL;
+               }
+       }
  
        if (root->free_ino_pinned)
                __btrfs_remove_free_space_cache(root->free_ino_pinned);
@@@ -3851,7 -3902,7 +3897,7 @@@ void close_ctree(struct btrfs_root *roo
        smp_mb();
  
        /* wait for the qgroup rescan worker to stop */
-       btrfs_qgroup_wait_for_completion(fs_info);
+       btrfs_qgroup_wait_for_completion(fs_info, false);
  
        /* wait for the uuid_scan task to finish */
        down(&fs_info->uuid_tree_rescan_sem);
diff --combined fs/btrfs/disk-io.h
@@@ -68,6 -68,8 +68,8 @@@ struct extent_buffer *btrfs_find_tree_b
  struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
                                      struct btrfs_key *location);
  int btrfs_init_fs_root(struct btrfs_root *root);
+ struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
+                                       u64 root_id);
  int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
                         struct btrfs_root *root);
  void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
@@@ -123,7 -125,7 +125,7 @@@ void btrfs_csum_final(u32 crc, char *re
  int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
                        enum btrfs_wq_endio_type metadata);
  int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
 -                      int rw, struct bio *bio, int mirror_num,
 +                      struct bio *bio, int mirror_num,
                        unsigned long bio_flags, u64 bio_offset,
                        extent_submit_bio_hook_t *submit_bio_start,
                        extent_submit_bio_hook_t *submit_bio_done);
diff --combined fs/btrfs/extent-tree.c
@@@ -60,21 -60,6 +60,6 @@@ enum 
        CHUNK_ALLOC_FORCE = 2,
  };
  
- /*
-  * Control how reservations are dealt with.
-  *
-  * RESERVE_FREE - freeing a reservation.
-  * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for
-  *   ENOSPC accounting
-  * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update
-  *   bytes_may_use as the ENOSPC accounting is done elsewhere
-  */
- enum {
-       RESERVE_FREE = 0,
-       RESERVE_ALLOC = 1,
-       RESERVE_ALLOC_NO_ACCOUNT = 2,
- };
  static int update_block_group(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root, u64 bytenr,
                              u64 num_bytes, int alloc);
@@@ -104,9 -89,10 +89,10 @@@ static int find_next_key(struct btrfs_p
                         struct btrfs_key *key);
  static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
                            int dump_block_groups);
- static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
-                                      u64 num_bytes, int reserve,
-                                      int delalloc);
+ static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
+                                   u64 ram_bytes, u64 num_bytes, int delalloc);
+ static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
+                                    u64 num_bytes, int delalloc);
  static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
                               u64 num_bytes);
  int btrfs_pin_extent(struct btrfs_root *root,
@@@ -2058,7 -2044,7 +2044,7 @@@ int btrfs_discard_extent(struct btrfs_r
         */
        btrfs_bio_counter_inc_blocked(root->fs_info);
        /* Tell the block device(s) that the sectors can be discarded */
 -      ret = btrfs_map_block(root->fs_info, REQ_DISCARD,
 +      ret = btrfs_map_block(root->fs_info, REQ_OP_DISCARD,
                              bytenr, &num_bytes, &bbio, 0);
        /* Error condition is -ENOMEM */
        if (!ret) {
@@@ -3501,7 -3487,6 +3487,6 @@@ again
                dcs = BTRFS_DC_SETUP;
        else if (ret == -ENOSPC)
                set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags);
-       btrfs_free_reserved_data_space(inode, 0, num_pages);
  
  out_put:
        iput(inode);
@@@ -4472,6 -4457,15 +4457,15 @@@ void check_system_chunk(struct btrfs_tr
        }
  }
  
+ /*
+  * If force is CHUNK_ALLOC_FORCE:
+  *    - return 1 if it successfully allocates a chunk,
+  *    - return errors including -ENOSPC otherwise.
+  * If force is NOT CHUNK_ALLOC_FORCE:
+  *    - return 0 if it doesn't need to allocate a new chunk,
+  *    - return 1 if it successfully allocates a chunk,
+  *    - return errors including -ENOSPC otherwise.
+  */
  static int do_chunk_alloc(struct btrfs_trans_handle *trans,
                          struct btrfs_root *extent_root, u64 flags, int force)
  {
@@@ -4882,7 -4876,7 +4876,7 @@@ static int flush_space(struct btrfs_roo
                                     btrfs_get_alloc_profile(root, 0),
                                     CHUNK_ALLOC_NO_FORCE);
                btrfs_end_transaction(trans, root);
-               if (ret == -ENOSPC)
+               if (ret > 0 || ret == -ENOSPC)
                        ret = 0;
                break;
        case COMMIT_TRANS:
@@@ -6497,19 -6491,15 +6491,15 @@@ void btrfs_wait_block_group_reservation
  }
  
  /**
-  * btrfs_update_reserved_bytes - update the block_group and space info counters
+  * btrfs_add_reserved_bytes - update the block_group and space info counters
   * @cache:    The cache we are manipulating
+  * @ram_bytes:  The number of bytes of file content, and will be same to
+  *              @num_bytes except for the compress path.
   * @num_bytes:        The number of bytes in question
-  * @reserve:  One of the reservation enums
   * @delalloc:   The blocks are allocated for the delalloc write
   *
-  * This is called by the allocator when it reserves space, or by somebody who is
-  * freeing space that was never actually used on disk.  For example if you
-  * reserve some space for a new leaf in transaction A and before transaction A
-  * commits you free that leaf, you call this with reserve set to 0 in order to
-  * clear the reservation.
-  *
-  * Metadata reservations should be called with RESERVE_ALLOC so we do the proper
+  * This is called by the allocator when it reserves space. Metadata
+  * reservations should be called with RESERVE_ALLOC so we do the proper
   * ENOSPC accounting.  For data we handle the reservation through clearing the
   * delalloc bits in the io_tree.  We have to do this since we could end up
   * allocating less disk space for the amount of data we have reserved in the
   * make the reservation and return -EAGAIN, otherwise this function always
   * succeeds.
   */
- static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
-                                      u64 num_bytes, int reserve, int delalloc)
+ static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
+                                   u64 ram_bytes, u64 num_bytes, int delalloc)
  {
        struct btrfs_space_info *space_info = cache->space_info;
        int ret = 0;
  
        spin_lock(&space_info->lock);
        spin_lock(&cache->lock);
-       if (reserve != RESERVE_FREE) {
-               if (cache->ro) {
-                       ret = -EAGAIN;
-               } else {
-                       cache->reserved += num_bytes;
-                       space_info->bytes_reserved += num_bytes;
-                       if (reserve == RESERVE_ALLOC) {
-                               trace_btrfs_space_reservation(cache->fs_info,
-                                               "space_info", space_info->flags,
-                                               num_bytes, 0);
-                               space_info->bytes_may_use -= num_bytes;
-                       }
-                       if (delalloc)
-                               cache->delalloc_bytes += num_bytes;
-               }
+       if (cache->ro) {
+               ret = -EAGAIN;
        } else {
-               if (cache->ro)
-                       space_info->bytes_readonly += num_bytes;
-               cache->reserved -= num_bytes;
-               space_info->bytes_reserved -= num_bytes;
+               cache->reserved += num_bytes;
+               space_info->bytes_reserved += num_bytes;
  
+               trace_btrfs_space_reservation(cache->fs_info,
+                               "space_info", space_info->flags,
+                               ram_bytes, 0);
+               space_info->bytes_may_use -= ram_bytes;
                if (delalloc)
-                       cache->delalloc_bytes -= num_bytes;
+                       cache->delalloc_bytes += num_bytes;
        }
        spin_unlock(&cache->lock);
        spin_unlock(&space_info->lock);
        return ret;
  }
  
+ /**
+  * btrfs_free_reserved_bytes - update the block_group and space info counters
+  * @cache:      The cache we are manipulating
+  * @num_bytes:  The number of bytes in question
+  * @delalloc:   The blocks are allocated for the delalloc write
+  *
+  * This is called by somebody who is freeing space that was never actually used
+  * on disk.  For example if you reserve some space for a new leaf in transaction
+  * A and before transaction A commits you free that leaf, you call this with
+  * reserve set to 0 in order to clear the reservation.
+  */
+ static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
+                                    u64 num_bytes, int delalloc)
+ {
+       struct btrfs_space_info *space_info = cache->space_info;
+       int ret = 0;
+       spin_lock(&space_info->lock);
+       spin_lock(&cache->lock);
+       if (cache->ro)
+               space_info->bytes_readonly += num_bytes;
+       cache->reserved -= num_bytes;
+       space_info->bytes_reserved -= num_bytes;
+       if (delalloc)
+               cache->delalloc_bytes -= num_bytes;
+       spin_unlock(&cache->lock);
+       spin_unlock(&space_info->lock);
+       return ret;
+ }
  void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root)
  {
@@@ -7191,7 -7200,7 +7200,7 @@@ void btrfs_free_tree_block(struct btrfs
                WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
  
                btrfs_add_free_space(cache, buf->start, buf->len);
-               btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
+               btrfs_free_reserved_bytes(cache, buf->len, 0);
                btrfs_put_block_group(cache);
                trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
                pin = 0;
@@@ -7416,9 -7425,9 +7425,9 @@@ btrfs_release_block_group(struct btrfs_
   * the free space extent currently.
   */
  static noinline int find_free_extent(struct btrfs_root *orig_root,
-                                    u64 num_bytes, u64 empty_size,
-                                    u64 hint_byte, struct btrfs_key *ins,
-                                    u64 flags, int delalloc)
+                               u64 ram_bytes, u64 num_bytes, u64 empty_size,
+                               u64 hint_byte, struct btrfs_key *ins,
+                               u64 flags, int delalloc)
  {
        int ret = 0;
        struct btrfs_root *root = orig_root->fs_info->extent_root;
        struct btrfs_space_info *space_info;
        int loop = 0;
        int index = __get_raid_index(flags);
-       int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
-               RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
        bool failed_cluster_refill = false;
        bool failed_alloc = false;
        bool use_cluster = true;
@@@ -7763,8 -7770,8 +7770,8 @@@ checks
                                             search_start - offset);
                BUG_ON(offset > search_start);
  
-               ret = btrfs_update_reserved_bytes(block_group, num_bytes,
-                                                 alloc_type, delalloc);
+               ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
+                               num_bytes, delalloc);
                if (ret == -EAGAIN) {
                        btrfs_add_free_space(block_group, offset, num_bytes);
                        goto loop;
@@@ -7936,7 -7943,7 +7943,7 @@@ again
        up_read(&info->groups_sem);
  }
  
- int btrfs_reserve_extent(struct btrfs_root *root,
+ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
                         u64 num_bytes, u64 min_alloc_size,
                         u64 empty_size, u64 hint_byte,
                         struct btrfs_key *ins, int is_data, int delalloc)
        flags = btrfs_get_alloc_profile(root, is_data);
  again:
        WARN_ON(num_bytes < root->sectorsize);
-       ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
-                              flags, delalloc);
+       ret = find_free_extent(root, ram_bytes, num_bytes, empty_size,
+                              hint_byte, ins, flags, delalloc);
        if (!ret && !is_data) {
                btrfs_dec_block_group_reservations(root->fs_info,
                                                   ins->objectid);
                        num_bytes = min(num_bytes >> 1, ins->offset);
                        num_bytes = round_down(num_bytes, root->sectorsize);
                        num_bytes = max(num_bytes, min_alloc_size);
+                       ram_bytes = num_bytes;
                        if (num_bytes == min_alloc_size)
                                final_tried = true;
                        goto again;
@@@ -7995,7 -8003,7 +8003,7 @@@ static int __btrfs_free_reserved_extent
                if (btrfs_test_opt(root->fs_info, DISCARD))
                        ret = btrfs_discard_extent(root, start, len, NULL);
                btrfs_add_free_space(cache, start, len);
-               btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
+               btrfs_free_reserved_bytes(cache, len, delalloc);
                trace_btrfs_reserved_extent_free(root, start, len);
        }
  
@@@ -8223,8 -8231,8 +8231,8 @@@ int btrfs_alloc_logged_file_extent(stru
        if (!block_group)
                return -EINVAL;
  
-       ret = btrfs_update_reserved_bytes(block_group, ins->offset,
-                                         RESERVE_ALLOC_NO_ACCOUNT, 0);
+       ret = btrfs_add_reserved_bytes(block_group, ins->offset,
+                                      ins->offset, 0);
        BUG_ON(ret); /* logic error */
        ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
                                         0, owner, offset, ins, 1);
@@@ -8368,7 -8376,7 +8376,7 @@@ struct extent_buffer *btrfs_alloc_tree_
        if (IS_ERR(block_rsv))
                return ERR_CAST(block_rsv);
  
-       ret = btrfs_reserve_extent(root, blocksize, blocksize,
+       ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
                                   empty_size, hint, &ins, 0, 0);
        if (ret)
                goto out_unuse;
@@@ -8521,35 -8529,6 +8529,6 @@@ reada
        wc->reada_slot = slot;
  }
  
- /*
-  * These may not be seen by the usual inc/dec ref code so we have to
-  * add them here.
-  */
- static int record_one_subtree_extent(struct btrfs_trans_handle *trans,
-                                    struct btrfs_root *root, u64 bytenr,
-                                    u64 num_bytes)
- {
-       struct btrfs_qgroup_extent_record *qrecord;
-       struct btrfs_delayed_ref_root *delayed_refs;
-       qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS);
-       if (!qrecord)
-               return -ENOMEM;
-       qrecord->bytenr = bytenr;
-       qrecord->num_bytes = num_bytes;
-       qrecord->old_roots = NULL;
-       delayed_refs = &trans->transaction->delayed_refs;
-       spin_lock(&delayed_refs->lock);
-       if (btrfs_qgroup_insert_dirty_extent(trans->fs_info,
-                                            delayed_refs, qrecord))
-               kfree(qrecord);
-       spin_unlock(&delayed_refs->lock);
-       return 0;
- }
  static int account_leaf_items(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              struct extent_buffer *eb)
  
                num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
  
-               ret = record_one_subtree_extent(trans, root, bytenr, num_bytes);
+               ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info,
+                               bytenr, num_bytes, GFP_NOFS);
                if (ret)
                        return ret;
        }
@@@ -8732,8 -8712,9 +8712,9 @@@ walk_down
                        btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
                        path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
  
-                       ret = record_one_subtree_extent(trans, root, child_bytenr,
-                                                       root->nodesize);
+                       ret = btrfs_qgroup_insert_dirty_extent(trans,
+                                       root->fs_info, child_bytenr,
+                                       root->nodesize, GFP_NOFS);
                        if (ret)
                                goto out;
                }
@@@ -9906,6 -9887,7 +9887,7 @@@ static int find_first_block_group(struc
                        } else {
                                ret = 0;
                        }
+                       free_extent_map(em);
                        goto out;
                }
                path->slots[0]++;
@@@ -9942,6 -9924,7 +9924,7 @@@ void btrfs_put_block_group_cache(struc
                block_group->iref = 0;
                block_group->inode = NULL;
                spin_unlock(&block_group->lock);
+               ASSERT(block_group->io_ctl.inode == NULL);
                iput(inode);
                last = block_group->key.objectid + block_group->key.offset;
                btrfs_put_block_group(block_group);
@@@ -9999,6 -9982,10 +9982,10 @@@ int btrfs_free_block_groups(struct btrf
                        free_excluded_extents(info->extent_root, block_group);
  
                btrfs_remove_free_space_cache(block_group);
+               ASSERT(list_empty(&block_group->dirty_list));
+               ASSERT(list_empty(&block_group->io_list));
+               ASSERT(list_empty(&block_group->bg_list));
+               ASSERT(atomic_read(&block_group->count) == 1);
                btrfs_put_block_group(block_group);
  
                spin_lock(&info->block_group_cache_lock);
diff --combined fs/btrfs/extent_io.h
@@@ -20,6 -20,7 +20,7 @@@
  #define EXTENT_DAMAGED                (1U << 14)
  #define EXTENT_NORESERVE      (1U << 15)
  #define EXTENT_QGROUP_RESERVED        (1U << 16)
+ #define EXTENT_CLEAR_DATA_RESV        (1U << 17)
  #define EXTENT_IOBITS         (EXTENT_LOCKED | EXTENT_WRITEBACK)
  #define EXTENT_CTLBITS                (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
  
@@@ -63,16 -64,16 +64,16 @@@ struct btrfs_root
  struct btrfs_io_bio;
  struct io_failure_record;
  
 -typedef       int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
 -                                     struct bio *bio, int mirror_num,
 -                                     unsigned long bio_flags, u64 bio_offset);
 +typedef       int (extent_submit_bio_hook_t)(struct inode *inode, struct bio *bio,
 +                                     int mirror_num, unsigned long bio_flags,
 +                                     u64 bio_offset);
  struct extent_io_ops {
        int (*fill_delalloc)(struct inode *inode, struct page *locked_page,
                             u64 start, u64 end, int *page_started,
                             unsigned long *nr_written);
        int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
        extent_submit_bio_hook_t *submit_bio_hook;
 -      int (*merge_bio_hook)(int rw, struct page *page, unsigned long offset,
 +      int (*merge_bio_hook)(struct page *page, unsigned long offset,
                              size_t size, struct bio *bio,
                              unsigned long bio_flags);
        int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
diff --combined fs/btrfs/inode.c
@@@ -566,6 -566,8 +566,8 @@@ cont
                                                     PAGE_SET_WRITEBACK |
                                                     page_error_op |
                                                     PAGE_END_WRITEBACK);
+                       btrfs_free_reserved_data_space_noquota(inode, start,
+                                               end - start + 1);
                        goto free_pages_out;
                }
        }
@@@ -742,7 -744,7 +744,7 @@@ retry
                lock_extent(io_tree, async_extent->start,
                            async_extent->start + async_extent->ram_size - 1);
  
-               ret = btrfs_reserve_extent(root,
+               ret = btrfs_reserve_extent(root, async_extent->ram_size,
                                           async_extent->compressed_size,
                                           async_extent->compressed_size,
                                           0, alloc_hint, &ins, 1, 1);
@@@ -969,7 -971,8 +971,8 @@@ static noinline int cow_file_range(stru
                                     EXTENT_DEFRAG, PAGE_UNLOCK |
                                     PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
                                     PAGE_END_WRITEBACK);
+                       btrfs_free_reserved_data_space_noquota(inode, start,
+                                               end - start + 1);
                        *nr_written = *nr_written +
                             (end - start + PAGE_SIZE) / PAGE_SIZE;
                        *page_started = 1;
                unsigned long op;
  
                cur_alloc_size = disk_num_bytes;
-               ret = btrfs_reserve_extent(root, cur_alloc_size,
+               ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
                                           root->sectorsize, 0, alloc_hint,
                                           &ins, 1, 1);
                if (ret < 0)
@@@ -1489,8 -1492,10 +1492,10 @@@ out_check
                extent_clear_unlock_delalloc(inode, cur_offset,
                                             cur_offset + num_bytes - 1,
                                             locked_page, EXTENT_LOCKED |
-                                            EXTENT_DELALLOC, PAGE_UNLOCK |
-                                            PAGE_SET_PRIVATE2);
+                                            EXTENT_DELALLOC |
+                                            EXTENT_CLEAR_DATA_RESV,
+                                            PAGE_UNLOCK | PAGE_SET_PRIVATE2);
                if (!nolock && nocow)
                        btrfs_end_write_no_snapshoting(root);
                cur_offset = extent_end;
@@@ -1807,7 -1812,9 +1812,9 @@@ static void btrfs_clear_bit_hook(struc
                        return;
  
                if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
-                   && do_list && !(state->state & EXTENT_NORESERVE))
+                   && do_list && !(state->state & EXTENT_NORESERVE)
+                   && (*bits & (EXTENT_DO_ACCOUNTING |
+                   EXTENT_CLEAR_DATA_RESV)))
                        btrfs_free_reserved_data_space_noquota(inode,
                                        state->start, len);
  
   * return 0 if page can be merged to bio
   * return error otherwise
   */
 -int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
 +int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
                         size_t size, struct bio *bio,
                         unsigned long bio_flags)
  {
  
        length = bio->bi_iter.bi_size;
        map_length = length;
 -      ret = btrfs_map_block(root->fs_info, rw, logical,
 +      ret = btrfs_map_block(root->fs_info, bio_op(bio), logical,
                              &map_length, NULL, 0);
        if (ret < 0)
                return ret;
   * At IO completion time the cums attached on the ordered extent record
   * are inserted into the btree
   */
 -static int __btrfs_submit_bio_start(struct inode *inode, int rw,
 -                                  struct bio *bio, int mirror_num,
 -                                  unsigned long bio_flags,
 +static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
 +                                  int mirror_num, unsigned long bio_flags,
                                    u64 bio_offset)
  {
        struct btrfs_root *root = BTRFS_I(inode)->root;
   * At IO completion time the cums attached on the ordered extent record
   * are inserted into the btree
   */
 -static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
 +static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio,
                          int mirror_num, unsigned long bio_flags,
                          u64 bio_offset)
  {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int ret;
  
 -      ret = btrfs_map_bio(root, rw, bio, mirror_num, 1);
 +      ret = btrfs_map_bio(root, bio, mirror_num, 1);
        if (ret) {
                bio->bi_error = ret;
                bio_endio(bio);
   * extent_io.c submission hook. This does the right thing for csum calculation
   * on write, or reading the csums from the tree before a read
   */
 -static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 +static int btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
                          int mirror_num, unsigned long bio_flags,
                          u64 bio_offset)
  {
        if (btrfs_is_free_space_inode(inode))
                metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
  
 -      if (!(rw & REQ_WRITE)) {
 +      if (bio_op(bio) != REQ_OP_WRITE) {
                ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
                if (ret)
                        goto out;
                        goto mapit;
                /* we're doing a write, do the async checksumming */
                ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
 -                                 inode, rw, bio, mirror_num,
 +                                 inode, bio, mirror_num,
                                   bio_flags, bio_offset,
                                   __btrfs_submit_bio_start,
                                   __btrfs_submit_bio_done);
        }
  
  mapit:
 -      ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
 +      ret = btrfs_map_bio(root, bio, mirror_num, 0);
  
  out:
        if (ret < 0) {
@@@ -7251,7 -7259,7 +7258,7 @@@ static struct extent_map *btrfs_new_ext
        int ret;
  
        alloc_hint = get_extent_allocation_hint(inode, start, len);
-       ret = btrfs_reserve_extent(root, len, root->sectorsize, 0,
+       ret = btrfs_reserve_extent(root, len, len, root->sectorsize, 0,
                                   alloc_hint, &ins, 1, 1);
        if (ret)
                return ERR_PTR(ret);
@@@ -7751,6 -7759,13 +7758,13 @@@ static int btrfs_get_blocks_direct(stru
                                ret = PTR_ERR(em2);
                                goto unlock_err;
                        }
+                       /*
+                        * For inode marked NODATACOW or extent marked PREALLOC,
+                        * use the existing or preallocated extent, so does not
+                        * need to adjust btrfs_space_info's bytes_may_use.
+                        */
+                       btrfs_free_reserved_data_space_noquota(inode,
+                                       start, len);
                        goto unlock;
                }
        }
@@@ -7785,7 -7800,6 +7799,6 @@@ unlock
                        i_size_write(inode, start + len);
  
                adjust_dio_outstanding_extents(inode, dio_data, len);
-               btrfs_free_reserved_data_space(inode, start, len);
                WARN_ON(dio_data->reserve < len);
                dio_data->reserve -= len;
                dio_data->unsubmitted_oe_range_end = start + len;
  }
  
  static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio,
 -                                      int rw, int mirror_num)
 +                                      int mirror_num)
  {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int ret;
  
 -      BUG_ON(rw & REQ_WRITE);
 +      BUG_ON(bio_op(bio) == REQ_OP_WRITE);
  
        bio_get(bio);
  
        if (ret)
                goto err;
  
 -      ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
 +      ret = btrfs_map_bio(root, bio, mirror_num, 0);
  err:
        bio_put(bio);
        return ret;
@@@ -7892,7 -7906,7 +7905,7 @@@ static int dio_read_error(struct inode 
        int read_mode;
        int ret;
  
 -      BUG_ON(failed_bio->bi_rw & REQ_WRITE);
 +      BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
  
        ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
        if (ret)
                free_io_failure(inode, failrec);
                return -EIO;
        }
 +      bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
  
        btrfs_debug(BTRFS_I(inode)->root->fs_info,
                    "Repair DIO Read Error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d\n",
                    read_mode, failrec->this_mirror, failrec->in_validation);
  
 -      ret = submit_dio_repair_bio(inode, bio, read_mode,
 -                                  failrec->this_mirror);
 +      ret = submit_dio_repair_bio(inode, bio, failrec->this_mirror);
        if (ret) {
                free_io_failure(inode, failrec);
                bio_put(bio);
@@@ -8216,7 -8230,7 +8229,7 @@@ static void btrfs_endio_direct_write(st
        bio_put(bio);
  }
  
 -static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
 +static int __btrfs_submit_bio_start_direct_io(struct inode *inode,
                                    struct bio *bio, int mirror_num,
                                    unsigned long bio_flags, u64 offset)
  {
@@@ -8234,8 -8248,8 +8247,8 @@@ static void btrfs_end_dio_bio(struct bi
  
        if (err)
                btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
 -                         "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d",
 -                         btrfs_ino(dip->inode), bio->bi_rw,
 +                         "direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d",
 +                         btrfs_ino(dip->inode), bio_op(bio), bio->bi_opf,
                           (unsigned long long)bio->bi_iter.bi_sector,
                           bio->bi_iter.bi_size, err);
  
@@@ -8309,11 -8323,11 +8322,11 @@@ static inline int btrfs_lookup_and_bind
  }
  
  static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
 -                                       int rw, u64 file_offset, int skip_sum,
 +                                       u64 file_offset, int skip_sum,
                                         int async_submit)
  {
        struct btrfs_dio_private *dip = bio->bi_private;
 -      int write = rw & REQ_WRITE;
 +      bool write = bio_op(bio) == REQ_OP_WRITE;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int ret;
  
  
        if (write && async_submit) {
                ret = btrfs_wq_submit_bio(root->fs_info,
 -                                 inode, rw, bio, 0, 0,
 -                                 file_offset,
 +                                 inode, bio, 0, 0, file_offset,
                                   __btrfs_submit_bio_start_direct_io,
                                   __btrfs_submit_bio_done);
                goto err;
                        goto err;
        }
  map:
 -      ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
 +      ret = btrfs_map_bio(root, bio, 0, async_submit);
  err:
        bio_put(bio);
        return ret;
  }
  
 -static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
 +static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip,
                                    int skip_sum)
  {
        struct inode *inode = dip->inode;
        int i;
  
        map_length = orig_bio->bi_iter.bi_size;
 -      ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
 -                            &map_length, NULL, 0);
 +      ret = btrfs_map_block(root->fs_info, bio_op(orig_bio),
 +                            start_sector << 9, &map_length, NULL, 0);
        if (ret)
                return -EIO;
  
        if (!bio)
                return -ENOMEM;
  
 +      bio_set_op_attrs(bio, bio_op(orig_bio), orig_bio->bi_opf);
        bio->bi_private = dip;
        bio->bi_end_io = btrfs_end_dio_bio;
        btrfs_io_bio(bio)->logical = file_offset;
@@@ -8419,7 -8433,7 +8432,7 @@@ next_block
                         * before we're done setting it up
                         */
                        atomic_inc(&dip->pending_bios);
 -                      ret = __btrfs_submit_dio_bio(bio, inode, rw,
 +                      ret = __btrfs_submit_dio_bio(bio, inode,
                                                     file_offset, skip_sum,
                                                     async_submit);
                        if (ret) {
                                                  start_sector, GFP_NOFS);
                        if (!bio)
                                goto out_err;
 +                      bio_set_op_attrs(bio, bio_op(orig_bio), orig_bio->bi_opf);
                        bio->bi_private = dip;
                        bio->bi_end_io = btrfs_end_dio_bio;
                        btrfs_io_bio(bio)->logical = file_offset;
  
                        map_length = orig_bio->bi_iter.bi_size;
 -                      ret = btrfs_map_block(root->fs_info, rw,
 +                      ret = btrfs_map_block(root->fs_info, bio_op(orig_bio),
                                              start_sector << 9,
                                              &map_length, NULL, 0);
                        if (ret) {
        }
  
  submit:
 -      ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
 +      ret = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum,
                                     async_submit);
        if (!ret)
                return 0;
@@@ -8483,14 -8496,14 +8496,14 @@@ out_err
        return 0;
  }
  
 -static void btrfs_submit_direct(int rw, struct bio *dio_bio,
 -                              struct inode *inode, loff_t file_offset)
 +static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
 +                              loff_t file_offset)
  {
        struct btrfs_dio_private *dip = NULL;
        struct bio *io_bio = NULL;
        struct btrfs_io_bio *btrfs_bio;
        int skip_sum;
 -      int write = rw & REQ_WRITE;
 +      bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
        int ret = 0;
  
        skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
                        dio_data->unsubmitted_oe_range_end;
        }
  
 -      ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
 +      ret = btrfs_submit_direct_hook(dip, skip_sum);
        if (!ret)
                return;
  
@@@ -10306,6 -10319,7 +10319,7 @@@ static int __btrfs_prealloc_file_range(
        u64 last_alloc = (u64)-1;
        int ret = 0;
        bool own_trans = true;
+       u64 end = start + num_bytes - 1;
  
        if (trans)
                own_trans = false;
                 * sized chunks.
                 */
                cur_bytes = min(cur_bytes, last_alloc);
-               ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
-                                          *alloc_hint, &ins, 1, 0);
+               ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes,
+                               min_size, 0, *alloc_hint, &ins, 1, 0);
                if (ret) {
                        if (own_trans)
                                btrfs_end_transaction(trans, root);
@@@ -10414,6 -10428,9 +10428,9 @@@ next
                if (own_trans)
                        btrfs_end_transaction(trans, root);
        }
+       if (cur_offset < end)
+               btrfs_free_reserved_data_space(inode, cur_offset,
+                       end - cur_offset + 1);
        return ret;
  }
  
diff --combined fs/btrfs/volumes.c
@@@ -461,7 -461,7 +461,7 @@@ loop_lock
                        sync_pending = 0;
                }
  
 -              btrfsic_submit_bio(cur->bi_rw, cur);
 +              btrfsic_submit_bio(cur);
                num_run++;
                batch_run++;
  
@@@ -834,10 -834,6 +834,6 @@@ static void __free_device(struct work_s
        struct btrfs_device *device;
  
        device = container_of(work, struct btrfs_device, rcu_work);
-       if (device->bdev)
-               blkdev_put(device->bdev, device->mode);
        rcu_string_free(device->name);
        kfree(device);
  }
@@@ -852,6 -848,17 +848,17 @@@ static void free_device(struct rcu_hea
        schedule_work(&device->rcu_work);
  }
  
+ static void btrfs_close_bdev(struct btrfs_device *device)
+ {
+       if (device->bdev && device->writeable) {
+               sync_blockdev(device->bdev);
+               invalidate_bdev(device->bdev);
+       }
+       if (device->bdev)
+               blkdev_put(device->bdev, device->mode);
+ }
  static void btrfs_close_one_device(struct btrfs_device *device)
  {
        struct btrfs_fs_devices *fs_devices = device->fs_devices;
        if (device->missing)
                fs_devices->missing_devices--;
  
-       if (device->bdev && device->writeable) {
-               sync_blockdev(device->bdev);
-               invalidate_bdev(device->bdev);
-       }
+       btrfs_close_bdev(device);
  
        new_device = btrfs_alloc_device(NULL, &device->devid,
                                        device->uuid);
@@@ -1932,6 -1936,8 +1936,8 @@@ int btrfs_rm_device(struct btrfs_root *
                btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device);
        }
  
+       btrfs_close_bdev(device);
        call_rcu(&device->rcu, free_device);
  
        num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
@@@ -2025,6 -2031,9 +2031,9 @@@ void btrfs_rm_dev_replace_free_srcdev(s
                /* zero out the old super if it is writable */
                btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
        }
+       btrfs_close_bdev(srcdev);
        call_rcu(&srcdev->rcu, free_device);
  
        /*
@@@ -2080,6 -2089,8 +2089,8 @@@ void btrfs_destroy_dev_replace_tgtdev(s
         * the device_list_mutex lock.
         */
        btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
+       btrfs_close_bdev(tgtdev);
        call_rcu(&tgtdev->rcu, free_device);
  }
  
@@@ -5318,7 -5329,7 +5329,7 @@@ void btrfs_put_bbio(struct btrfs_bio *b
                kfree(bbio);
  }
  
 -static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 +static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int op,
                             u64 logical, u64 *length,
                             struct btrfs_bio **bbio_ret,
                             int mirror_num, int need_raid_map)
                raid56_full_stripe_start *= full_stripe_len;
        }
  
 -      if (rw & REQ_DISCARD) {
 +      if (op == REQ_OP_DISCARD) {
                /* we don't discard raid56 yet */
                if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
                        ret = -EOPNOTSUPP;
                   For other RAID types and for RAID[56] reads, just allow a single
                   stripe (on a single disk). */
                if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
 -                  (rw & REQ_WRITE)) {
 +                  (op == REQ_OP_WRITE)) {
                        max_len = stripe_len * nr_data_stripes(map) -
                                (offset - raid56_full_stripe_start);
                } else {
                btrfs_dev_replace_set_lock_blocking(dev_replace);
  
        if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
 -          !(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) &&
 -          dev_replace->tgtdev != NULL) {
 +          op != REQ_OP_WRITE && op != REQ_OP_DISCARD &&
 +          op != REQ_GET_READ_MIRRORS && dev_replace->tgtdev != NULL) {
                /*
                 * in dev-replace case, for repair case (that's the only
                 * case where the mirror is selected explicitly when
                            (offset + *length);
  
        if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
 -              if (rw & REQ_DISCARD)
 +              if (op == REQ_OP_DISCARD)
                        num_stripes = min_t(u64, map->num_stripes,
                                            stripe_nr_end - stripe_nr_orig);
                stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
                                &stripe_index);
 -              if (!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)))
 +              if (op != REQ_OP_WRITE && op != REQ_OP_DISCARD &&
 +                  op != REQ_GET_READ_MIRRORS)
                        mirror_num = 1;
        } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
 -              if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS))
 +              if (op == REQ_OP_WRITE || op == REQ_OP_DISCARD ||
 +                  op == REQ_GET_READ_MIRRORS)
                        num_stripes = map->num_stripes;
                else if (mirror_num)
                        stripe_index = mirror_num - 1;
                }
  
        } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
 -              if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) {
 +              if (op == REQ_OP_WRITE || op == REQ_OP_DISCARD ||
 +                  op == REQ_GET_READ_MIRRORS) {
                        num_stripes = map->num_stripes;
                } else if (mirror_num) {
                        stripe_index = mirror_num - 1;
                stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
                stripe_index *= map->sub_stripes;
  
 -              if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS))
 +              if (op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS)
                        num_stripes = map->sub_stripes;
 -              else if (rw & REQ_DISCARD)
 +              else if (op == REQ_OP_DISCARD)
                        num_stripes = min_t(u64, map->sub_stripes *
                                            (stripe_nr_end - stripe_nr_orig),
                                            map->num_stripes);
  
        } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
                if (need_raid_map &&
 -                  ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
 +                  (op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS ||
                     mirror_num > 1)) {
                        /* push stripe_nr back to the start of the full stripe */
                        stripe_nr = div_u64(raid56_full_stripe_start,
                        /* We distribute the parity blocks across stripes */
                        div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
                                        &stripe_index);
 -                      if (!(rw & (REQ_WRITE | REQ_DISCARD |
 -                                  REQ_GET_READ_MIRRORS)) && mirror_num <= 1)
 +                      if ((op != REQ_OP_WRITE && op != REQ_OP_DISCARD &&
 +                          op != REQ_GET_READ_MIRRORS) && mirror_num <= 1)
                                mirror_num = 1;
                }
        } else {
  
        num_alloc_stripes = num_stripes;
        if (dev_replace_is_ongoing) {
 -              if (rw & (REQ_WRITE | REQ_DISCARD))
 +              if (op == REQ_OP_WRITE || op == REQ_OP_DISCARD)
                        num_alloc_stripes <<= 1;
 -              if (rw & REQ_GET_READ_MIRRORS)
 +              if (op == REQ_GET_READ_MIRRORS)
                        num_alloc_stripes++;
                tgtdev_indexes = num_stripes;
        }
  
        /* build raid_map */
        if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK &&
 -          need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
 +          need_raid_map &&
 +          ((op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS) ||
            mirror_num > 1)) {
                u64 tmp;
                unsigned rot;
                                RAID6_Q_STRIPE;
        }
  
 -      if (rw & REQ_DISCARD) {
 +      if (op == REQ_OP_DISCARD) {
                u32 factor = 0;
                u32 sub_stripes = 0;
                u64 stripes_per_dev = 0;
                }
        }
  
 -      if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS))
 +      if (op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS)
                max_errors = btrfs_chunk_max_errors(map);
  
        if (bbio->raid_map)
                sort_parity_stripes(bbio, num_stripes);
  
        tgtdev_indexes = 0;
 -      if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) &&
 +      if (dev_replace_is_ongoing &&
 +         (op == REQ_OP_WRITE || op == REQ_OP_DISCARD) &&
            dev_replace->tgtdev != NULL) {
                int index_where_to_add;
                u64 srcdev_devid = dev_replace->srcdev->devid;
                        }
                }
                num_stripes = index_where_to_add;
 -      } else if (dev_replace_is_ongoing && (rw & REQ_GET_READ_MIRRORS) &&
 +      } else if (dev_replace_is_ongoing && (op == REQ_GET_READ_MIRRORS) &&
                   dev_replace->tgtdev != NULL) {
                u64 srcdev_devid = dev_replace->srcdev->devid;
                int index_srcdev = 0;
        return ret;
  }
  
 -int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 +int btrfs_map_block(struct btrfs_fs_info *fs_info, int op,
                      u64 logical, u64 *length,
                      struct btrfs_bio **bbio_ret, int mirror_num)
  {
 -      return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret,
 +      return __btrfs_map_block(fs_info, op, logical, length, bbio_ret,
                                 mirror_num, 0);
  }
  
  /* For Scrub/replace */
 -int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw,
 +int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int op,
                     u64 logical, u64 *length,
                     struct btrfs_bio **bbio_ret, int mirror_num,
                     int need_raid_map)
  {
 -      return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret,
 +      return __btrfs_map_block(fs_info, op, logical, length, bbio_ret,
                                 mirror_num, need_raid_map);
  }
  
@@@ -6006,13 -6012,13 +6017,13 @@@ static void btrfs_end_bio(struct bio *b
                        BUG_ON(stripe_index >= bbio->num_stripes);
                        dev = bbio->stripes[stripe_index].dev;
                        if (dev->bdev) {
 -                              if (bio->bi_rw & WRITE)
 +                              if (bio_op(bio) == REQ_OP_WRITE)
                                        btrfs_dev_stat_inc(dev,
                                                BTRFS_DEV_STAT_WRITE_ERRS);
                                else
                                        btrfs_dev_stat_inc(dev,
                                                BTRFS_DEV_STAT_READ_ERRS);
 -                              if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH)
 +                              if ((bio->bi_opf & WRITE_FLUSH) == WRITE_FLUSH)
                                        btrfs_dev_stat_inc(dev,
                                                BTRFS_DEV_STAT_FLUSH_ERRS);
                                btrfs_dev_stat_print_on_error(dev);
   */
  static noinline void btrfs_schedule_bio(struct btrfs_root *root,
                                        struct btrfs_device *device,
 -                                      int rw, struct bio *bio)
 +                                      struct bio *bio)
  {
        int should_queue = 1;
        struct btrfs_pending_bios *pending_bios;
        }
  
        /* don't bother with additional async steps for reads, right now */
 -      if (!(rw & REQ_WRITE)) {
 +      if (bio_op(bio) == REQ_OP_READ) {
                bio_get(bio);
 -              btrfsic_submit_bio(rw, bio);
 +              btrfsic_submit_bio(bio);
                bio_put(bio);
                return;
        }
        atomic_inc(&root->fs_info->nr_async_bios);
        WARN_ON(bio->bi_next);
        bio->bi_next = NULL;
 -      bio->bi_rw |= rw;
  
        spin_lock(&device->io_lock);
 -      if (bio->bi_rw & REQ_SYNC)
 +      if (bio->bi_opf & REQ_SYNC)
                pending_bios = &device->pending_sync_bios;
        else
                pending_bios = &device->pending_bios;
  
  static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
                              struct bio *bio, u64 physical, int dev_nr,
 -                            int rw, int async)
 +                            int async)
  {
        struct btrfs_device *dev = bbio->stripes[dev_nr].dev;
  
  
                rcu_read_lock();
                name = rcu_dereference(dev->name);
 -              pr_debug("btrfs_map_bio: rw %d, sector=%llu, dev=%lu "
 -                       "(%s id %llu), size=%u\n", rw,
 +              pr_debug("btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu "
 +                       "(%s id %llu), size=%u\n", bio_op(bio), bio->bi_opf,
                         (u64)bio->bi_iter.bi_sector, (u_long)dev->bdev->bd_dev,
                         name->str, dev->devid, bio->bi_iter.bi_size);
                rcu_read_unlock();
        btrfs_bio_counter_inc_noblocked(root->fs_info);
  
        if (async)
 -              btrfs_schedule_bio(root, dev, rw, bio);
 +              btrfs_schedule_bio(root, dev, bio);
        else
 -              btrfsic_submit_bio(rw, bio);
 +              btrfsic_submit_bio(bio);
  }
  
  static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
        }
  }
  
 -int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
 +int btrfs_map_bio(struct btrfs_root *root, struct bio *bio,
                  int mirror_num, int async_submit)
  {
        struct btrfs_device *dev;
        map_length = length;
  
        btrfs_bio_counter_inc_blocked(root->fs_info);
 -      ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio,
 -                            mirror_num, 1);
 +      ret = __btrfs_map_block(root->fs_info, bio_op(bio), logical,
 +                              &map_length, &bbio, mirror_num, 1);
        if (ret) {
                btrfs_bio_counter_dec(root->fs_info);
                return ret;
        atomic_set(&bbio->stripes_pending, bbio->num_stripes);
  
        if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
 -          ((rw & WRITE) || (mirror_num > 1))) {
 +          ((bio_op(bio) == REQ_OP_WRITE) || (mirror_num > 1))) {
                /* In this case, map_length has been set to the length of
                   a single stripe; not the whole write */
 -              if (rw & WRITE) {
 +              if (bio_op(bio) == REQ_OP_WRITE) {
                        ret = raid56_parity_write(root, bio, bbio, map_length);
                } else {
                        ret = raid56_parity_recover(root, bio, bbio, map_length,
  
        for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
                dev = bbio->stripes[dev_nr].dev;
 -              if (!dev || !dev->bdev || (rw & WRITE && !dev->writeable)) {
 +              if (!dev || !dev->bdev ||
 +                  (bio_op(bio) == REQ_OP_WRITE && !dev->writeable)) {
                        bbio_error(bbio, first_bio, logical);
                        continue;
                }
                        bio = first_bio;
  
                submit_stripe_bio(root, bbio, bio,
 -                                bbio->stripes[dev_nr].physical, dev_nr, rw,
 +                                bbio->stripes[dev_nr].physical, dev_nr,
                                  async_submit);
        }
        btrfs_bio_counter_dec(root->fs_info);