Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 27 May 2010 17:26:37 +0000 (10:26 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 27 May 2010 17:26:37 +0000 (10:26 -0700)
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (40 commits)
  ext4: Make fsync sync new parent directories in no-journal mode
  ext4: Drop whitespace at end of lines
  ext4: Fix compat EXT4_IOC_ADD_GROUP
  ext4: Conditionally define compat ioctl numbers
  tracing: Convert more ext4 events to DEFINE_EVENT
  ext4: Add new tracepoints to track mballoc's buddy bitmap loads
  ext4: Add a missing trace hook
  ext4: restart ext4_ext_remove_space() after transaction restart
  ext4: Clear the EXT4_EOFBLOCKS_FL flag only when warranted
  ext4: Avoid crashing on NULL ptr dereference on a filesystem error
  ext4: Use bitops to read/modify i_flags in struct ext4_inode_info
  ext4: Convert calls of ext4_error() to EXT4_ERROR_INODE()
  ext4: Convert callers of ext4_get_blocks() to use ext4_map_blocks()
  ext4: Add new abstraction ext4_map_blocks() underneath ext4_get_blocks()
  ext4: Use our own write_cache_pages()
  ext4: Show journal_checksum option
  ext4: Fix for ext4_mb_collect_stats()
  ext4: check for a good block group before loading buddy pages
  ext4: Prevent creation of files larger than RLIMIT_FSIZE using fallocate
  ext4: Remove extraneous newlines in ext4_msg() calls
  ...

Fixed up trivial conflict in fs/ext4/fsync.c

1  2 
fs/ext4/fsync.c
fs/ext4/ialloc.c
fs/ext4/inode.c
fs/ext4/xattr.c
fs/quota/dquot.c
include/linux/quotaops.h

diff --combined fs/ext4/fsync.c
  
  #include <trace/events/ext4.h>
  
+ /*
+  * If we're not journaling and this is a just-created file, we have to
+  * sync our parent directory (if it was freshly created) since
+  * otherwise it will only be written by writeback, leaving a huge
+  * window during which a crash may lose the file.  This may apply for
+  * the parent directory's parent as well, and so on recursively, if
+  * they are also freshly created.
+  */
+ static void ext4_sync_parent(struct inode *inode)
+ {
+       struct dentry *dentry = NULL;
+       while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
+               ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
+               dentry = list_entry(inode->i_dentry.next,
+                                   struct dentry, d_alias);
+               if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode)
+                       break;
+               inode = dentry->d_parent->d_inode;
+               sync_mapping_buffers(inode->i_mapping);
+       }
+ }
  /*
   * akpm: A new design for ext4_sync_file().
   *
@@@ -66,9 -89,13 +89,13 @@@ int ext4_sync_file(struct file *file, s
        ret = flush_completed_IO(inode);
        if (ret < 0)
                return ret;
-       
-       if (!journal)
-               return simple_fsync(file, dentry, datasync);
+       if (!journal) {
+               ret = simple_fsync(file, dentry, datasync);
+               if (!ret && !list_empty(&inode->i_dentry))
+                       ext4_sync_parent(inode);
+               return ret;
+       }
  
        /*
         * data=writeback,ordered:
                if (ext4_should_writeback_data(inode) &&
                    (journal->j_fs_dev != journal->j_dev) &&
                    (journal->j_flags & JBD2_BARRIER))
 -                      blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
 +                      blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
 +                                      NULL, BLKDEV_IFL_WAIT);
-               jbd2_log_wait_commit(journal, commit_tid);
+               ret = jbd2_log_wait_commit(journal, commit_tid);
        } else if (journal->j_flags & JBD2_BARRIER)
 -              blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
 +              blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
 +                      BLKDEV_IFL_WAIT);
        return ret;
  }
diff --combined fs/ext4/ialloc.c
@@@ -240,56 -240,49 +240,49 @@@ void ext4_free_inode(handle_t *handle, 
        if (fatal)
                goto error_return;
  
-       /* Ok, now we can actually update the inode bitmaps.. */
-       cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
-                                       bit, bitmap_bh->b_data);
-       if (!cleared)
-               ext4_error(sb, "bit already cleared for inode %lu", ino);
-       else {
-               gdp = ext4_get_group_desc(sb, block_group, &bh2);
+       fatal = -ESRCH;
+       gdp = ext4_get_group_desc(sb, block_group, &bh2);
+       if (gdp) {
                BUFFER_TRACE(bh2, "get_write_access");
                fatal = ext4_journal_get_write_access(handle, bh2);
-               if (fatal) goto error_return;
-               if (gdp) {
-                       ext4_lock_group(sb, block_group);
-                       count = ext4_free_inodes_count(sb, gdp) + 1;
-                       ext4_free_inodes_set(sb, gdp, count);
-                       if (is_directory) {
-                               count = ext4_used_dirs_count(sb, gdp) - 1;
-                               ext4_used_dirs_set(sb, gdp, count);
-                               if (sbi->s_log_groups_per_flex) {
-                                       ext4_group_t f;
-                                       f = ext4_flex_group(sbi, block_group);
-                                       atomic_dec(&sbi->s_flex_groups[f].used_dirs);
-                               }
+       }
+       ext4_lock_group(sb, block_group);
+       cleared = ext4_clear_bit(bit, bitmap_bh->b_data);
+       if (fatal || !cleared) {
+               ext4_unlock_group(sb, block_group);
+               goto out;
+       }
  
-                       }
-                       gdp->bg_checksum = ext4_group_desc_csum(sbi,
-                                                       block_group, gdp);
-                       ext4_unlock_group(sb, block_group);
-                       percpu_counter_inc(&sbi->s_freeinodes_counter);
-                       if (is_directory)
-                               percpu_counter_dec(&sbi->s_dirs_counter);
-                       if (sbi->s_log_groups_per_flex) {
-                               ext4_group_t f;
-                               f = ext4_flex_group(sbi, block_group);
-                               atomic_inc(&sbi->s_flex_groups[f].free_inodes);
-                       }
-               }
-               BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
-               err = ext4_handle_dirty_metadata(handle, NULL, bh2);
-               if (!fatal) fatal = err;
+       count = ext4_free_inodes_count(sb, gdp) + 1;
+       ext4_free_inodes_set(sb, gdp, count);
+       if (is_directory) {
+               count = ext4_used_dirs_count(sb, gdp) - 1;
+               ext4_used_dirs_set(sb, gdp, count);
+               percpu_counter_dec(&sbi->s_dirs_counter);
        }
-       BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata");
-       err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
-       if (!fatal)
-               fatal = err;
-       sb->s_dirt = 1;
+       gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
+       ext4_unlock_group(sb, block_group);
+       percpu_counter_inc(&sbi->s_freeinodes_counter);
+       if (sbi->s_log_groups_per_flex) {
+               ext4_group_t f = ext4_flex_group(sbi, block_group);
+               atomic_inc(&sbi->s_flex_groups[f].free_inodes);
+               if (is_directory)
+                       atomic_dec(&sbi->s_flex_groups[f].used_dirs);
+       }
+       BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
+       fatal = ext4_handle_dirty_metadata(handle, NULL, bh2);
+ out:
+       if (cleared) {
+               BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata");
+               err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
+               if (!fatal)
+                       fatal = err;
+               sb->s_dirt = 1;
+       } else
+               ext4_error(sb, "bit already cleared for inode %lu", ino);
  error_return:
        brelse(bitmap_bh);
        ext4_std_error(sb, fatal);
@@@ -499,7 -492,7 +492,7 @@@ static int find_group_orlov(struct supe
  
        if (S_ISDIR(mode) &&
            ((parent == sb->s_root->d_inode) ||
-            (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL))) {
+            (ext4_test_inode_flag(parent, EXT4_INODE_TOPDIR)))) {
                int best_ndir = inodes_per_group;
                int ret = -1;
  
@@@ -979,12 -972,16 +972,12 @@@ got
                atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes);
        }
  
 -      inode->i_uid = current_fsuid();
 -      if (test_opt(sb, GRPID))
 +      if (test_opt(sb, GRPID)) {
 +              inode->i_mode = mode;
 +              inode->i_uid = current_fsuid();
                inode->i_gid = dir->i_gid;
 -      else if (dir->i_mode & S_ISGID) {
 -              inode->i_gid = dir->i_gid;
 -              if (S_ISDIR(mode))
 -                      mode |= S_ISGID;
        } else
 -              inode->i_gid = current_fsgid();
 -      inode->i_mode = mode;
 +              inode_init_owner(inode, dir, mode);
  
        inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
        /* This is the optimal IO size (for stat), not the fs block size */
        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
                /* set extent flag only for directory, file and normal symlink*/
                if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
-                       EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
+                       ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
                        ext4_ext_tree_init(handle, inode);
                }
        }
diff --combined fs/ext4/inode.c
@@@ -149,7 -149,7 +149,7 @@@ int ext4_truncate_restart_trans(handle_
        int ret;
  
        /*
-        * Drop i_data_sem to avoid deadlock with ext4_get_blocks At this
+        * Drop i_data_sem to avoid deadlock with ext4_map_blocks.  At this
         * moment, get_block can be called only for blocks inside i_size since
         * page cache has been already dropped and writes are blocked by
         * i_mutex. So we can safely drop the i_data_sem here.
@@@ -348,9 -348,8 +348,8 @@@ static int __ext4_check_blockref(const 
                if (blk &&
                    unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
                                                    blk, 1))) {
-                       __ext4_error(inode->i_sb, function,
-                                  "invalid block reference %u "
-                                  "in inode #%lu", blk, inode->i_ino);
+                       ext4_error_inode(function, inode,
+                                        "invalid block reference %u", blk);
                        return -EIO;
                }
        }
@@@ -785,7 -784,7 +784,7 @@@ failed
        /* Allocation failed, free what we already allocated */
        ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
        for (i = 1; i <= n ; i++) {
-               /* 
+               /*
                 * branch[i].bh is newly allocated, so there is no
                 * need to revoke the block, which is why we don't
                 * need to set EXT4_FREE_BLOCKS_METADATA.
@@@ -875,7 -874,7 +874,7 @@@ static int ext4_splice_branch(handle_t 
  
  err_out:
        for (i = 1; i <= num; i++) {
-               /* 
+               /*
                 * branch[i].bh is newly allocated, so there is no
                 * need to revoke the block, which is why we don't
                 * need to set EXT4_FREE_BLOCKS_METADATA.
  }
  
  /*
-  * The ext4_ind_get_blocks() function handles non-extents inodes
+  * The ext4_ind_map_blocks() function handles non-extents inodes
   * (i.e., using the traditional indirect/double-indirect i_blocks
-  * scheme) for ext4_get_blocks().
+  * scheme) for ext4_map_blocks().
   *
   * Allocation strategy is simple: if we have to allocate something, we will
   * have to go the whole way to leaf. So let's do it before attaching anything
   * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system
   * blocks.
   */
- static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
-                              ext4_lblk_t iblock, unsigned int maxblocks,
-                              struct buffer_head *bh_result,
+ static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
+                              struct ext4_map_blocks *map,
                               int flags)
  {
        int err = -EIO;
        int count = 0;
        ext4_fsblk_t first_block = 0;
  
-       J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
+       J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
        J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
-       depth = ext4_block_to_path(inode, iblock, offsets,
+       depth = ext4_block_to_path(inode, map->m_lblk, offsets,
                                   &blocks_to_boundary);
  
        if (depth == 0)
        /* Simplest case - block found, no allocation needed */
        if (!partial) {
                first_block = le32_to_cpu(chain[depth - 1].key);
-               clear_buffer_new(bh_result);
                count++;
                /*map more blocks*/
-               while (count < maxblocks && count <= blocks_to_boundary) {
+               while (count < map->m_len && count <= blocks_to_boundary) {
                        ext4_fsblk_t blk;
  
                        blk = le32_to_cpu(*(chain[depth-1].p + count));
        /*
         * Okay, we need to do block allocation.
        */
-       goal = ext4_find_goal(inode, iblock, partial);
+       goal = ext4_find_goal(inode, map->m_lblk, partial);
  
        /* the number of blocks need to allocate for [d,t]indirect blocks */
        indirect_blks = (chain + depth) - partial - 1;
         * direct blocks to allocate for this branch.
         */
        count = ext4_blks_to_allocate(partial, indirect_blks,
-                                       maxblocks, blocks_to_boundary);
+                                     map->m_len, blocks_to_boundary);
        /*
         * Block out ext4_truncate while we alter the tree
         */
-       err = ext4_alloc_branch(handle, inode, iblock, indirect_blks,
+       err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks,
                                &count, goal,
                                offsets + (partial - chain), partial);
  
         * may need to return -EAGAIN upwards in the worst case.  --sct
         */
        if (!err)
-               err = ext4_splice_branch(handle, inode, iblock,
+               err = ext4_splice_branch(handle, inode, map->m_lblk,
                                         partial, indirect_blks, count);
        if (err)
                goto cleanup;
  
-       set_buffer_new(bh_result);
+       map->m_flags |= EXT4_MAP_NEW;
  
        ext4_update_inode_fsync_trans(handle, inode, 1);
  got_it:
-       map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
+       map->m_flags |= EXT4_MAP_MAPPED;
+       map->m_pblk = le32_to_cpu(chain[depth-1].key);
+       map->m_len = count;
        if (count > blocks_to_boundary)
-               set_buffer_boundary(bh_result);
+               map->m_flags |= EXT4_MAP_BOUNDARY;
        err = count;
        /* Clean up and exit */
        partial = chain + depth - 1;    /* the whole chain */
@@@ -1016,7 -1015,6 +1015,6 @@@ cleanup
                brelse(partial->bh);
                partial--;
        }
-       BUFFER_TRACE(bh_result, "returned");
  out:
        return err;
  }
@@@ -1061,7 -1059,7 +1059,7 @@@ static int ext4_indirect_calc_metadata_
   */
  static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
  {
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                return ext4_ext_calc_metadata_amount(inode, lblock);
  
        return ext4_indirect_calc_metadata_amount(inode, lblock);
@@@ -1076,7 -1074,6 +1074,6 @@@ void ext4_da_update_reserve_space(struc
  {
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        struct ext4_inode_info *ei = EXT4_I(inode);
-       int mdb_free = 0, allocated_meta_blocks = 0;
  
        spin_lock(&ei->i_block_reservation_lock);
        trace_ext4_da_update_reserve_space(inode, used);
  
        /* Update per-inode reservations */
        ei->i_reserved_data_blocks -= used;
-       used += ei->i_allocated_meta_blocks;
        ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
-       allocated_meta_blocks = ei->i_allocated_meta_blocks;
+       percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+                          used + ei->i_allocated_meta_blocks);
        ei->i_allocated_meta_blocks = 0;
-       percpu_counter_sub(&sbi->s_dirtyblocks_counter, used);
  
        if (ei->i_reserved_data_blocks == 0) {
                /*
                 * only when we have written all of the delayed
                 * allocation blocks.
                 */
-               mdb_free = ei->i_reserved_meta_blocks;
+               percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+                                  ei->i_reserved_meta_blocks);
                ei->i_reserved_meta_blocks = 0;
                ei->i_da_metadata_calc_len = 0;
-               percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
        }
        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
  
-       /* Update quota subsystem */
-       if (quota_claim) {
+       /* Update quota subsystem for data blocks */
+       if (quota_claim)
                dquot_claim_block(inode, used);
-               if (mdb_free)
-                       dquot_release_reservation_block(inode, mdb_free);
-       } else {
+       else {
                /*
                 * We did fallocate with an offset that is already delayed
                 * allocated. So on delayed allocated writeback we should
-                * not update the quota for allocated blocks. But then
-                * converting an fallocate region to initialized region would
-                * have caused a metadata allocation. So claim quota for
-                * that
+                * not re-claim the quota for fallocated blocks.
                 */
-               if (allocated_meta_blocks)
-                       dquot_claim_block(inode, allocated_meta_blocks);
-               dquot_release_reservation_block(inode, mdb_free + used);
+               dquot_release_reservation_block(inode, used);
        }
  
        /*
                ext4_discard_preallocations(inode);
  }
  
- static int check_block_validity(struct inode *inode, const char *msg,
-                               sector_t logical, sector_t phys, int len)
+ static int check_block_validity(struct inode *inode, const char *func,
+                               struct ext4_map_blocks *map)
  {
-       if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) {
-               __ext4_error(inode->i_sb, msg,
-                          "inode #%lu logical block %llu mapped to %llu "
-                          "(size %d)", inode->i_ino,
-                          (unsigned long long) logical,
-                          (unsigned long long) phys, len);
+       if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
+                                  map->m_len)) {
+               ext4_error_inode(func, inode,
+                          "lblock %lu mapped to illegal pblock %llu "
+                          "(length %d)", (unsigned long) map->m_lblk,
+                                map->m_pblk, map->m_len);
                return -EIO;
        }
        return 0;
@@@ -1212,15 -1201,15 +1201,15 @@@ static pgoff_t ext4_num_dirty_pages(str
  }
  
  /*
-  * The ext4_get_blocks() function tries to look up the requested blocks,
+  * The ext4_map_blocks() function tries to look up the requested blocks,
   * and returns if the blocks are already mapped.
   *
   * Otherwise it takes the write lock of the i_data_sem and allocate blocks
   * and store the allocated blocks in the result buffer head and mark it
   * mapped.
   *
-  * If file type is extents based, it will call ext4_ext_get_blocks(),
-  * Otherwise, call with ext4_ind_get_blocks() to handle indirect mapping
+  * If file type is extents based, it will call ext4_ext_map_blocks(),
+  * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping
   * based files
   *
   * On success, it returns the number of blocks being mapped or allocate.
   *
   * It returns the error in case of allocation failure.
   */
- int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
-                   unsigned int max_blocks, struct buffer_head *bh,
-                   int flags)
+ int ext4_map_blocks(handle_t *handle, struct inode *inode,
+                   struct ext4_map_blocks *map, int flags)
  {
        int retval;
  
-       clear_buffer_mapped(bh);
-       clear_buffer_unwritten(bh);
-       ext_debug("ext4_get_blocks(): inode %lu, flag %d, max_blocks %u,"
-                 "logical block %lu\n", inode->i_ino, flags, max_blocks,
-                 (unsigned long)block);
+       map->m_flags = 0;
+       ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
+                 "logical block %lu\n", inode->i_ino, flags, map->m_len,
+                 (unsigned long) map->m_lblk);
        /*
         * Try to see if we can get the block without requesting a new
         * file system block.
         */
        down_read((&EXT4_I(inode)->i_data_sem));
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
-               retval =  ext4_ext_get_blocks(handle, inode, block, max_blocks,
-                               bh, 0);
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+               retval = ext4_ext_map_blocks(handle, inode, map, 0);
        } else {
-               retval = ext4_ind_get_blocks(handle, inode, block, max_blocks,
-                                            bh, 0);
+               retval = ext4_ind_map_blocks(handle, inode, map, 0);
        }
        up_read((&EXT4_I(inode)->i_data_sem));
  
-       if (retval > 0 && buffer_mapped(bh)) {
-               int ret = check_block_validity(inode, "file system corruption",
-                                              block, bh->b_blocknr, retval);
+       if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
+               int ret = check_block_validity(inode, __func__, map);
                if (ret != 0)
                        return ret;
        }
         * ext4_ext_get_block() returns th create = 0
         * with buffer head unmapped.
         */
-       if (retval > 0 && buffer_mapped(bh))
+       if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
                return retval;
  
        /*
         * of BH_Unwritten and BH_Mapped flags being simultaneously
         * set on the buffer_head.
         */
-       clear_buffer_unwritten(bh);
+       map->m_flags &= ~EXT4_MAP_UNWRITTEN;
  
        /*
         * New blocks allocate and/or writing to uninitialized extent
         * We need to check for EXT4 here because migrate
         * could have changed the inode type in between
         */
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
-               retval =  ext4_ext_get_blocks(handle, inode, block, max_blocks,
-                                             bh, flags);
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+               retval = ext4_ext_map_blocks(handle, inode, map, flags);
        } else {
-               retval = ext4_ind_get_blocks(handle, inode, block,
-                                            max_blocks, bh, flags);
+               retval = ext4_ind_map_blocks(handle, inode, map, flags);
  
-               if (retval > 0 && buffer_new(bh)) {
+               if (retval > 0 && map->m_flags & EXT4_MAP_NEW) {
                        /*
                         * We allocated new blocks which will result in
                         * i_data's format changing.  Force the migrate
                EXT4_I(inode)->i_delalloc_reserved_flag = 0;
  
        up_write((&EXT4_I(inode)->i_data_sem));
-       if (retval > 0 && buffer_mapped(bh)) {
-               int ret = check_block_validity(inode, "file system "
-                                              "corruption after allocation",
-                                              block, bh->b_blocknr, retval);
+       if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
+               int ret = check_block_validity(inode,
+                                              "ext4_map_blocks_after_alloc",
+                                              map);
                if (ret != 0)
                        return ret;
        }
  /* Maximum number of blocks we map for direct IO at once. */
  #define DIO_MAX_BLOCKS 4096
  
int ext4_get_block(struct inode *inode, sector_t iblock,
-                  struct buffer_head *bh_result, int create)
static int _ext4_get_block(struct inode *inode, sector_t iblock,
+                          struct buffer_head *bh, int flags)
  {
        handle_t *handle = ext4_journal_current_handle();
+       struct ext4_map_blocks map;
        int ret = 0, started = 0;
-       unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
        int dio_credits;
  
-       if (create && !handle) {
+       map.m_lblk = iblock;
+       map.m_len = bh->b_size >> inode->i_blkbits;
+       if (flags && !handle) {
                /* Direct IO write... */
-               if (max_blocks > DIO_MAX_BLOCKS)
-                       max_blocks = DIO_MAX_BLOCKS;
-               dio_credits = ext4_chunk_trans_blocks(inode, max_blocks);
+               if (map.m_len > DIO_MAX_BLOCKS)
+                       map.m_len = DIO_MAX_BLOCKS;
+               dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
                handle = ext4_journal_start(inode, dio_credits);
                if (IS_ERR(handle)) {
                        ret = PTR_ERR(handle);
-                       goto out;
+                       return ret;
                }
                started = 1;
        }
  
-       ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result,
-                             create ? EXT4_GET_BLOCKS_CREATE : 0);
+       ret = ext4_map_blocks(handle, inode, &map, flags);
        if (ret > 0) {
-               bh_result->b_size = (ret << inode->i_blkbits);
+               map_bh(bh, inode->i_sb, map.m_pblk);
+               bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
+               bh->b_size = inode->i_sb->s_blocksize * map.m_len;
                ret = 0;
        }
        if (started)
                ext4_journal_stop(handle);
- out:
        return ret;
  }
  
+ int ext4_get_block(struct inode *inode, sector_t iblock,
+                  struct buffer_head *bh, int create)
+ {
+       return _ext4_get_block(inode, iblock, bh,
+                              create ? EXT4_GET_BLOCKS_CREATE : 0);
+ }
  /*
   * `handle' can be NULL if create is zero
   */
  struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
                                ext4_lblk_t block, int create, int *errp)
  {
-       struct buffer_head dummy;
+       struct ext4_map_blocks map;
+       struct buffer_head *bh;
        int fatal = 0, err;
-       int flags = 0;
  
        J_ASSERT(handle != NULL || create == 0);
  
-       dummy.b_state = 0;
-       dummy.b_blocknr = -1000;
-       buffer_trace_init(&dummy.b_history);
-       if (create)
-               flags |= EXT4_GET_BLOCKS_CREATE;
-       err = ext4_get_blocks(handle, inode, block, 1, &dummy, flags);
-       /*
-        * ext4_get_blocks() returns number of blocks mapped. 0 in
-        * case of a HOLE.
-        */
-       if (err > 0) {
-               if (err > 1)
-                       WARN_ON(1);
-               err = 0;
+       map.m_lblk = block;
+       map.m_len = 1;
+       err = ext4_map_blocks(handle, inode, &map,
+                             create ? EXT4_GET_BLOCKS_CREATE : 0);
+       if (err < 0)
+               *errp = err;
+       if (err <= 0)
+               return NULL;
+       *errp = 0;
+       bh = sb_getblk(inode->i_sb, map.m_pblk);
+       if (!bh) {
+               *errp = -EIO;
+               return NULL;
        }
-       *errp = err;
-       if (!err && buffer_mapped(&dummy)) {
-               struct buffer_head *bh;
-               bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
-               if (!bh) {
-                       *errp = -EIO;
-                       goto err;
-               }
-               if (buffer_new(&dummy)) {
-                       J_ASSERT(create != 0);
-                       J_ASSERT(handle != NULL);
+       if (map.m_flags & EXT4_MAP_NEW) {
+               J_ASSERT(create != 0);
+               J_ASSERT(handle != NULL);
  
-                       /*
-                        * Now that we do not always journal data, we should
-                        * keep in mind whether this should always journal the
-                        * new buffer as metadata.  For now, regular file
-                        * writes use ext4_get_block instead, so it's not a
-                        * problem.
-                        */
-                       lock_buffer(bh);
-                       BUFFER_TRACE(bh, "call get_create_access");
-                       fatal = ext4_journal_get_create_access(handle, bh);
-                       if (!fatal && !buffer_uptodate(bh)) {
-                               memset(bh->b_data, 0, inode->i_sb->s_blocksize);
-                               set_buffer_uptodate(bh);
-                       }
-                       unlock_buffer(bh);
-                       BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
-                       err = ext4_handle_dirty_metadata(handle, inode, bh);
-                       if (!fatal)
-                               fatal = err;
-               } else {
-                       BUFFER_TRACE(bh, "not a new buffer");
-               }
-               if (fatal) {
-                       *errp = fatal;
-                       brelse(bh);
-                       bh = NULL;
+               /*
+                * Now that we do not always journal data, we should
+                * keep in mind whether this should always journal the
+                * new buffer as metadata.  For now, regular file
+                * writes use ext4_get_block instead, so it's not a
+                * problem.
+                */
+               lock_buffer(bh);
+               BUFFER_TRACE(bh, "call get_create_access");
+               fatal = ext4_journal_get_create_access(handle, bh);
+               if (!fatal && !buffer_uptodate(bh)) {
+                       memset(bh->b_data, 0, inode->i_sb->s_blocksize);
+                       set_buffer_uptodate(bh);
                }
-               return bh;
+               unlock_buffer(bh);
+               BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+               err = ext4_handle_dirty_metadata(handle, inode, bh);
+               if (!fatal)
+                       fatal = err;
+       } else {
+               BUFFER_TRACE(bh, "not a new buffer");
        }
- err:
-       return NULL;
+       if (fatal) {
+               *errp = fatal;
+               brelse(bh);
+               bh = NULL;
+       }
+       return bh;
  }
  
  struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
@@@ -1860,7 -1841,7 +1841,7 @@@ static int ext4_da_reserve_space(struc
        int retries = 0;
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        struct ext4_inode_info *ei = EXT4_I(inode);
-       unsigned long md_needed, md_reserved;
+       unsigned long md_needed;
        int ret;
  
        /*
         */
  repeat:
        spin_lock(&ei->i_block_reservation_lock);
-       md_reserved = ei->i_reserved_meta_blocks;
        md_needed = ext4_calc_metadata_amount(inode, lblock);
        trace_ext4_da_reserve_space(inode, md_needed);
        spin_unlock(&ei->i_block_reservation_lock);
  
        /*
-        * Make quota reservation here to prevent quota overflow
-        * later. Real quota accounting is done at pages writeout
-        * time.
+        * We will charge metadata quota at writeout time; this saves
+        * us from metadata over-estimation, though we may go over by
+        * a small amount in the end.  Here we just reserve for data.
         */
-       ret = dquot_reserve_block(inode, md_needed + 1);
+       ret = dquot_reserve_block(inode, 1);
        if (ret)
                return ret;
+       /*
+        * We do still charge estimated metadata to the sb though;
+        * we cannot afford to run out of free blocks.
+        */
        if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
-               dquot_release_reservation_block(inode, md_needed + 1);
+               dquot_release_reservation_block(inode, 1);
                if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
                        yield();
                        goto repeat;
@@@ -1910,6 -1893,7 +1893,7 @@@ static void ext4_da_release_space(struc
  
        spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
  
+       trace_ext4_da_release_space(inode, to_free);
        if (unlikely(to_free > ei->i_reserved_data_blocks)) {
                /*
                 * if there aren't enough reserved blocks, then the
                 * only when we have written all of the delayed
                 * allocation blocks.
                 */
-               to_free += ei->i_reserved_meta_blocks;
+               percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+                                  ei->i_reserved_meta_blocks);
                ei->i_reserved_meta_blocks = 0;
                ei->i_da_metadata_calc_len = 0;
        }
  
-       /* update fs dirty blocks counter */
+       /* update fs dirty data blocks counter */
        percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
  
        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
@@@ -2042,28 -2027,23 +2027,23 @@@ static int mpage_da_submit_io(struct mp
  /*
   * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers
   *
-  * @mpd->inode - inode to walk through
-  * @exbh->b_blocknr - first block on a disk
-  * @exbh->b_size - amount of space in bytes
-  * @logical - first logical block to start assignment with
-  *
   * the function goes through all passed space and put actual disk
   * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
   */
- static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
-                                struct buffer_head *exbh)
+ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd,
+                                struct ext4_map_blocks *map)
  {
        struct inode *inode = mpd->inode;
        struct address_space *mapping = inode->i_mapping;
-       int blocks = exbh->b_size >> inode->i_blkbits;
-       sector_t pblock = exbh->b_blocknr, cur_logical;
+       int blocks = map->m_len;
+       sector_t pblock = map->m_pblk, cur_logical;
        struct buffer_head *head, *bh;
        pgoff_t index, end;
        struct pagevec pvec;
        int nr_pages, i;
  
-       index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
-       end = (logical + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+       index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+       end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
        cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
  
        pagevec_init(&pvec, 0);
  
                        /* skip blocks out of the range */
                        do {
-                               if (cur_logical >= logical)
+                               if (cur_logical >= map->m_lblk)
                                        break;
                                cur_logical++;
                        } while ((bh = bh->b_this_page) != head);
  
                        do {
-                               if (cur_logical >= logical + blocks)
+                               if (cur_logical >= map->m_lblk + blocks)
                                        break;
  
-                               if (buffer_delay(bh) ||
-                                               buffer_unwritten(bh)) {
+                               if (buffer_delay(bh) || buffer_unwritten(bh)) {
  
                                        BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
  
                                } else if (buffer_mapped(bh))
                                        BUG_ON(bh->b_blocknr != pblock);
  
-                               if (buffer_uninit(exbh))
+                               if (map->m_flags & EXT4_MAP_UNINIT)
                                        set_buffer_uninit(bh);
                                cur_logical++;
                                pblock++;
  }
  
  
- /*
-  * __unmap_underlying_blocks - just a helper function to unmap
-  * set of blocks described by @bh
-  */
- static inline void __unmap_underlying_blocks(struct inode *inode,
-                                            struct buffer_head *bh)
- {
-       struct block_device *bdev = inode->i_sb->s_bdev;
-       int blocks, i;
-       blocks = bh->b_size >> inode->i_blkbits;
-       for (i = 0; i < blocks; i++)
-               unmap_underlying_metadata(bdev, bh->b_blocknr + i);
- }
  static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
                                        sector_t logical, long blk_cnt)
  {
@@@ -2206,7 -2170,7 +2170,7 @@@ static void ext4_print_free_blocks(stru
  static int mpage_da_map_blocks(struct mpage_da_data *mpd)
  {
        int err, blks, get_blocks_flags;
-       struct buffer_head new;
+       struct ext4_map_blocks map;
        sector_t next = mpd->b_blocknr;
        unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
        loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
         * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting
         * variables are updated after the blocks have been allocated.
         */
-       new.b_state = 0;
+       map.m_lblk = next;
+       map.m_len = max_blocks;
        get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
        if (ext4_should_dioread_nolock(mpd->inode))
                get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
        if (mpd->b_state & (1 << BH_Delay))
                get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
  
-       blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks,
-                              &new, get_blocks_flags);
+       blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags);
        if (blks < 0) {
                err = blks;
                /*
                ext4_msg(mpd->inode->i_sb, KERN_CRIT,
                         "delayed block allocation failed for inode %lu at "
                         "logical offset %llu with max blocks %zd with "
-                        "error %d\n", mpd->inode->i_ino,
+                        "error %d", mpd->inode->i_ino,
                         (unsigned long long) next,
                         mpd->b_size >> mpd->inode->i_blkbits, err);
                printk(KERN_CRIT "This should not happen!!  "
        }
        BUG_ON(blks == 0);
  
-       new.b_size = (blks << mpd->inode->i_blkbits);
+       if (map.m_flags & EXT4_MAP_NEW) {
+               struct block_device *bdev = mpd->inode->i_sb->s_bdev;
+               int i;
  
-       if (buffer_new(&new))
-               __unmap_underlying_blocks(mpd->inode, &new);
+               for (i = 0; i < map.m_len; i++)
+                       unmap_underlying_metadata(bdev, map.m_pblk + i);
+       }
  
        /*
         * If blocks are delayed marked, we need to
         */
        if ((mpd->b_state & (1 << BH_Delay)) ||
            (mpd->b_state & (1 << BH_Unwritten)))
-               mpage_put_bnr_to_bhs(mpd, next, &new);
+               mpage_put_bnr_to_bhs(mpd, &map);
  
        if (ext4_should_order_data(mpd->inode)) {
                err = ext4_jbd2_file_inode(handle, mpd->inode);
@@@ -2349,8 -2316,17 +2316,17 @@@ static void mpage_add_bh_to_extent(stru
        sector_t next;
        int nrblocks = mpd->b_size >> mpd->inode->i_blkbits;
  
+       /*
+        * XXX Don't go larger than mballoc is willing to allocate
+        * This is a stopgap solution.  We eventually need to fold
+        * mpage_da_submit_io() into this function and then call
+        * ext4_get_blocks() multiple times in a loop
+        */
+       if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize)
+               goto flush_it;
        /* check if thereserved journal credits might overflow */
-       if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) {
+       if (!(ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS))) {
                if (nrblocks >= EXT4_MAX_TRANS_DATA) {
                        /*
                         * With non-extent format we are limited by the journal
@@@ -2423,17 -2399,6 +2399,6 @@@ static int __mpage_da_writepage(struct 
        struct buffer_head *bh, *head;
        sector_t logical;
  
-       if (mpd->io_done) {
-               /*
-                * Rest of the page in the page_vec
-                * redirty then and skip then. We will
-                * try to write them again after
-                * starting a new transaction
-                */
-               redirty_page_for_writepage(wbc, page);
-               unlock_page(page);
-               return MPAGE_DA_EXTENT_TAIL;
-       }
        /*
         * Can we merge this page to current extent?
         */
   * initialized properly.
   */
  static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
-                                 struct buffer_head *bh_result, int create)
+                                 struct buffer_head *bh, int create)
  {
+       struct ext4_map_blocks map;
        int ret = 0;
        sector_t invalid_block = ~((sector_t) 0xffff);
  
                invalid_block = ~0;
  
        BUG_ON(create == 0);
-       BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
+       BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
+       map.m_lblk = iblock;
+       map.m_len = 1;
  
        /*
         * first, we need to know whether the block is allocated already
         * preallocated blocks are unmapped but should treated
         * the same as allocated blocks.
         */
-       ret = ext4_get_blocks(NULL, inode, iblock, 1,  bh_result, 0);
-       if ((ret == 0) && !buffer_delay(bh_result)) {
-               /* the block isn't (pre)allocated yet, let's reserve space */
+       ret = ext4_map_blocks(NULL, inode, &map, 0);
+       if (ret < 0)
+               return ret;
+       if (ret == 0) {
+               if (buffer_delay(bh))
+                       return 0; /* Not sure this could or should happen */
                /*
                 * XXX: __block_prepare_write() unmaps passed block,
                 * is it OK?
                        /* not enough space to reserve */
                        return ret;
  
-               map_bh(bh_result, inode->i_sb, invalid_block);
-               set_buffer_new(bh_result);
-               set_buffer_delay(bh_result);
-       } else if (ret > 0) {
-               bh_result->b_size = (ret << inode->i_blkbits);
-               if (buffer_unwritten(bh_result)) {
-                       /* A delayed write to unwritten bh should
-                        * be marked new and mapped.  Mapped ensures
-                        * that we don't do get_block multiple times
-                        * when we write to the same offset and new
-                        * ensures that we do proper zero out for
-                        * partial write.
-                        */
-                       set_buffer_new(bh_result);
-                       set_buffer_mapped(bh_result);
-               }
-               ret = 0;
+               map_bh(bh, inode->i_sb, invalid_block);
+               set_buffer_new(bh);
+               set_buffer_delay(bh);
+               return 0;
        }
  
-       return ret;
+       map_bh(bh, inode->i_sb, map.m_pblk);
+       bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
+       if (buffer_unwritten(bh)) {
+               /* A delayed write to unwritten bh should be marked
+                * new and mapped.  Mapped ensures that we don't do
+                * get_block multiple times when we write to the same
+                * offset and new ensures that we do proper zero out
+                * for partial write.
+                */
+               set_buffer_new(bh);
+               set_buffer_mapped(bh);
+       }
+       return 0;
  }
  
  /*
  static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
                                   struct buffer_head *bh_result, int create)
  {
-       int ret = 0;
-       unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
        BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
-       /*
-        * we don't want to do block allocation in writepage
-        * so call get_block_wrap with create = 0
-        */
-       ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0);
-       if (ret > 0) {
-               bh_result->b_size = (ret << inode->i_blkbits);
-               ret = 0;
-       }
-       return ret;
+       return _ext4_get_block(inode, iblock, bh_result, 0);
  }
  
  static int bget_one(handle_t *handle, struct buffer_head *bh)
@@@ -2821,13 -2780,131 +2780,131 @@@ static int ext4_da_writepages_trans_blo
         * number of contiguous block. So we will limit
         * number of contiguous block to a sane value
         */
-       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) &&
+       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) &&
            (max_blocks > EXT4_MAX_TRANS_DATA))
                max_blocks = EXT4_MAX_TRANS_DATA;
  
        return ext4_chunk_trans_blocks(inode, max_blocks);
  }
  
+ /*
+  * write_cache_pages_da - walk the list of dirty pages of the given
+  * address space and call the callback function (which usually writes
+  * the pages).
+  *
+  * This is a forked version of write_cache_pages().  Differences:
+  *    Range cyclic is ignored.
+  *    no_nrwrite_index_update is always presumed true
+  */
+ static int write_cache_pages_da(struct address_space *mapping,
+                               struct writeback_control *wbc,
+                               struct mpage_da_data *mpd)
+ {
+       int ret = 0;
+       int done = 0;
+       struct pagevec pvec;
+       int nr_pages;
+       pgoff_t index;
+       pgoff_t end;            /* Inclusive */
+       long nr_to_write = wbc->nr_to_write;
+       pagevec_init(&pvec, 0);
+       index = wbc->range_start >> PAGE_CACHE_SHIFT;
+       end = wbc->range_end >> PAGE_CACHE_SHIFT;
+       while (!done && (index <= end)) {
+               int i;
+               nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+                             PAGECACHE_TAG_DIRTY,
+                             min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+               if (nr_pages == 0)
+                       break;
+               for (i = 0; i < nr_pages; i++) {
+                       struct page *page = pvec.pages[i];
+                       /*
+                        * At this point, the page may be truncated or
+                        * invalidated (changing page->mapping to NULL), or
+                        * even swizzled back from swapper_space to tmpfs file
+                        * mapping. However, page->index will not change
+                        * because we have a reference on the page.
+                        */
+                       if (page->index > end) {
+                               done = 1;
+                               break;
+                       }
+                       lock_page(page);
+                       /*
+                        * Page truncated or invalidated. We can freely skip it
+                        * then, even for data integrity operations: the page
+                        * has disappeared concurrently, so there could be no
+                        * real expectation of this data interity operation
+                        * even if there is now a new, dirty page at the same
+                        * pagecache address.
+                        */
+                       if (unlikely(page->mapping != mapping)) {
+ continue_unlock:
+                               unlock_page(page);
+                               continue;
+                       }
+                       if (!PageDirty(page)) {
+                               /* someone wrote it for us */
+                               goto continue_unlock;
+                       }
+                       if (PageWriteback(page)) {
+                               if (wbc->sync_mode != WB_SYNC_NONE)
+                                       wait_on_page_writeback(page);
+                               else
+                                       goto continue_unlock;
+                       }
+                       BUG_ON(PageWriteback(page));
+                       if (!clear_page_dirty_for_io(page))
+                               goto continue_unlock;
+                       ret = __mpage_da_writepage(page, wbc, mpd);
+                       if (unlikely(ret)) {
+                               if (ret == AOP_WRITEPAGE_ACTIVATE) {
+                                       unlock_page(page);
+                                       ret = 0;
+                               } else {
+                                       done = 1;
+                                       break;
+                               }
+                       }
+                       if (nr_to_write > 0) {
+                               nr_to_write--;
+                               if (nr_to_write == 0 &&
+                                   wbc->sync_mode == WB_SYNC_NONE) {
+                                       /*
+                                        * We stop writing back only if we are
+                                        * not doing integrity sync. In case of
+                                        * integrity sync we have to keep going
+                                        * because someone may be concurrently
+                                        * dirtying pages, and we might have
+                                        * synced a lot of newly appeared dirty
+                                        * pages, but have not synced all of the
+                                        * old dirty pages.
+                                        */
+                                       done = 1;
+                                       break;
+                               }
+                       }
+               }
+               pagevec_release(&pvec);
+               cond_resched();
+       }
+       return ret;
+ }
  static int ext4_da_writepages(struct address_space *mapping,
                              struct writeback_control *wbc)
  {
        handle_t *handle = NULL;
        struct mpage_da_data mpd;
        struct inode *inode = mapping->host;
-       int no_nrwrite_index_update;
        int pages_written = 0;
        long pages_skipped;
        unsigned int max_pages;
        mpd.wbc = wbc;
        mpd.inode = mapping->host;
  
-       /*
-        * we don't want write_cache_pages to update
-        * nr_to_write and writeback_index
-        */
-       no_nrwrite_index_update = wbc->no_nrwrite_index_update;
-       wbc->no_nrwrite_index_update = 1;
        pages_skipped = wbc->pages_skipped;
  
  retry:
                if (IS_ERR(handle)) {
                        ret = PTR_ERR(handle);
                        ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
-                              "%ld pages, ino %lu; err %d\n", __func__,
+                              "%ld pages, ino %lu; err %d", __func__,
                                wbc->nr_to_write, inode->i_ino, ret);
                        goto out_writepages;
                }
                mpd.io_done = 0;
                mpd.pages_written = 0;
                mpd.retval = 0;
-               ret = write_cache_pages(mapping, wbc, __mpage_da_writepage,
-                                       &mpd);
+               ret = write_cache_pages_da(mapping, wbc, &mpd);
                /*
                 * If we have a contiguous extent of pages and we
                 * haven't done the I/O yet, map the blocks and submit
        if (pages_skipped != wbc->pages_skipped)
                ext4_msg(inode->i_sb, KERN_CRIT,
                         "This should not happen leaving %s "
-                        "with nr_to_write = %ld ret = %d\n",
+                        "with nr_to_write = %ld ret = %d",
                         __func__, wbc->nr_to_write, ret);
  
        /* Update index */
                mapping->writeback_index = index;
  
  out_writepages:
-       if (!no_nrwrite_index_update)
-               wbc->no_nrwrite_index_update = 0;
        wbc->nr_to_write -= nr_to_writebump;
        wbc->range_start = range_start;
        trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
@@@ -3076,7 -3143,7 +3143,7 @@@ static int ext4_da_write_begin(struct f
                               loff_t pos, unsigned len, unsigned flags,
                               struct page **pagep, void **fsdata)
  {
-       int ret, retries = 0, quota_retries = 0;
+       int ret, retries = 0;
        struct page *page;
        pgoff_t index;
        unsigned from, to;
@@@ -3135,22 -3202,6 +3202,6 @@@ retry
  
        if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
                goto retry;
-       if ((ret == -EDQUOT) &&
-           EXT4_I(inode)->i_reserved_meta_blocks &&
-           (quota_retries++ < 3)) {
-               /*
-                * Since we often over-estimate the number of meta
-                * data blocks required, we may sometimes get a
-                * spurios out of quota error even though there would
-                * be enough space once we write the data blocks and
-                * find out how many meta data blocks were _really_
-                * required.  So try forcing the inode write to see if
-                * that helps.
-                */
-               write_inode_now(inode, (quota_retries == 3));
-               goto retry;
-       }
  out:
        return ret;
  }
        return ret;
  }
  
+ /*
+  * ext4_get_block used when preparing for a DIO write or buffer write.
+  * We allocate an uinitialized extent if blocks haven't been allocated.
+  * The extent will be converted to initialized after the IO is complete.
+  */
  static int ext4_get_block_write(struct inode *inode, sector_t iblock,
                   struct buffer_head *bh_result, int create)
  {
-       handle_t *handle = ext4_journal_current_handle();
-       int ret = 0;
-       unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
-       int dio_credits;
-       int started = 0;
        ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
                   inode->i_ino, create);
-       /*
-        * ext4_get_block in prepare for a DIO write or buffer write.
-        * We allocate an uinitialized extent if blocks haven't been allocated.
-        * The extent will be converted to initialized after IO complete.
-        */
-       create = EXT4_GET_BLOCKS_IO_CREATE_EXT;
-       if (!handle) {
-               if (max_blocks > DIO_MAX_BLOCKS)
-                       max_blocks = DIO_MAX_BLOCKS;
-               dio_credits = ext4_chunk_trans_blocks(inode, max_blocks);
-               handle = ext4_journal_start(inode, dio_credits);
-               if (IS_ERR(handle)) {
-                       ret = PTR_ERR(handle);
-                       goto out;
-               }
-               started = 1;
-       }
-       ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result,
-                             create);
-       if (ret > 0) {
-               bh_result->b_size = (ret << inode->i_blkbits);
-               ret = 0;
-       }
-       if (started)
-               ext4_journal_stop(handle);
- out:
-       return ret;
+       return _ext4_get_block(inode, iblock, bh_result,
+                              EXT4_GET_BLOCKS_IO_CREATE_EXT);
  }
  
  static void dump_completed_IO(struct inode * inode)
@@@ -3973,7 -3996,7 +3996,7 @@@ static ssize_t ext4_direct_IO(int rw, s
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
  
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
  
        return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
@@@ -4302,10 -4325,9 +4325,9 @@@ static int ext4_clear_blocks(handle_t *
  
        if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
                                   count)) {
-               ext4_error(inode->i_sb, "inode #%lu: "
-                          "attempt to clear blocks %llu len %lu, invalid",
-                          inode->i_ino, (unsigned long long) block_to_free,
-                          count);
+               EXT4_ERROR_INODE(inode, "attempt to clear invalid "
+                                "blocks %llu len %lu",
+                                (unsigned long long) block_to_free, count);
                return 1;
        }
  
@@@ -4410,11 -4432,10 +4432,10 @@@ static void ext4_free_data(handle_t *ha
                if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh))
                        ext4_handle_dirty_metadata(handle, inode, this_bh);
                else
-                       ext4_error(inode->i_sb,
-                                  "circular indirect block detected, "
-                                  "inode=%lu, block=%llu",
-                                  inode->i_ino,
-                                  (unsigned long long) this_bh->b_blocknr);
+                       EXT4_ERROR_INODE(inode,
+                                        "circular indirect block detected at "
+                                        "block %llu",
+                               (unsigned long long) this_bh->b_blocknr);
        }
  }
  
@@@ -4452,11 -4473,10 +4473,10 @@@ static void ext4_free_branches(handle_
  
                        if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
                                                   nr, 1)) {
-                               ext4_error(inode->i_sb,
-                                          "indirect mapped block in inode "
-                                          "#%lu invalid (level %d, blk #%lu)",
-                                          inode->i_ino, depth,
-                                          (unsigned long) nr);
+                               EXT4_ERROR_INODE(inode,
+                                                "invalid indirect mapped "
+                                                "block %lu (level %d)",
+                                                (unsigned long) nr, depth);
                                break;
                        }
  
                         * (should be rare).
                         */
                        if (!bh) {
-                               ext4_error(inode->i_sb,
-                                          "Read failure, inode=%lu, block=%llu",
-                                          inode->i_ino, nr);
+                               EXT4_ERROR_INODE(inode,
+                                                "Read failure block=%llu",
+                                                (unsigned long long) nr);
                                continue;
                        }
  
@@@ -4612,12 -4632,12 +4632,12 @@@ void ext4_truncate(struct inode *inode
        if (!ext4_can_truncate(inode))
                return;
  
-       EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL;
+       ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
  
        if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
                ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
  
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                ext4_ext_truncate(inode);
                return;
        }
@@@ -4785,8 -4805,8 +4805,8 @@@ static int __ext4_get_inode_loc(struct 
  
        bh = sb_getblk(sb, block);
        if (!bh) {
-               ext4_error(sb, "unable to read inode block - "
-                          "inode=%lu, block=%llu", inode->i_ino, block);
+               EXT4_ERROR_INODE(inode, "unable to read inode block - "
+                                "block %llu", block);
                return -EIO;
        }
        if (!buffer_uptodate(bh)) {
@@@ -4884,8 -4904,8 +4904,8 @@@ make_io
                submit_bh(READ_META, bh);
                wait_on_buffer(bh);
                if (!buffer_uptodate(bh)) {
-                       ext4_error(sb, "unable to read inode block - inode=%lu,"
-                                  " block=%llu", inode->i_ino, block);
+                       EXT4_ERROR_INODE(inode, "unable to read inode "
+                                        "block %llu", block);
                        brelse(bh);
                        return -EIO;
                }
@@@ -5096,8 -5116,8 +5116,8 @@@ struct inode *ext4_iget(struct super_bl
        ret = 0;
        if (ei->i_file_acl &&
            !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
-               ext4_error(sb, "bad extended attribute block %llu inode #%lu",
-                          ei->i_file_acl, inode->i_ino);
+               EXT4_ERROR_INODE(inode, "bad extended attribute block %llu",
+                                ei->i_file_acl);
                ret = -EIO;
                goto bad_inode;
        } else if (ei->i_flags & EXT4_EXTENTS_FL) {
                           new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
        } else {
                ret = -EIO;
-               ext4_error(inode->i_sb, "bogus i_mode (%o) for inode=%lu",
-                          inode->i_mode, inode->i_ino);
+               EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode);
                goto bad_inode;
        }
        brelse(iloc.bh);
@@@ -5381,9 -5400,9 +5400,9 @@@ int ext4_write_inode(struct inode *inod
                if (wbc->sync_mode == WB_SYNC_ALL)
                        sync_dirty_buffer(iloc.bh);
                if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
-                       ext4_error(inode->i_sb, "IO error syncing inode, "
-                                  "inode=%lu, block=%llu", inode->i_ino,
-                                  (unsigned long long)iloc.bh->b_blocknr);
+                       EXT4_ERROR_INODE(inode,
+                               "IO error syncing inode (block=%llu)",
+                               (unsigned long long) iloc.bh->b_blocknr);
                        err = -EIO;
                }
                brelse(iloc.bh);
@@@ -5425,7 -5444,7 +5444,7 @@@ int ext4_setattr(struct dentry *dentry
        if (error)
                return error;
  
 -      if (ia_valid & ATTR_SIZE)
 +      if (is_quota_modification(inode, attr))
                dquot_initialize(inode);
        if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
                (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
        }
  
        if (attr->ia_valid & ATTR_SIZE) {
-               if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) {
+               if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
                        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
  
                        if (attr->ia_size > sbi->s_bitmap_maxbytes) {
        if (S_ISREG(inode->i_mode) &&
            attr->ia_valid & ATTR_SIZE &&
            (attr->ia_size < inode->i_size ||
-            (EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) {
+            (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))) {
                handle_t *handle;
  
                handle = ext4_journal_start(inode, 3);
                        }
                }
                /* ext4_truncate will clear the flag */
-               if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))
+               if ((ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))
                        ext4_truncate(inode);
        }
  
@@@ -5576,7 -5595,7 +5595,7 @@@ static int ext4_indirect_trans_blocks(s
  
  static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
  {
-       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                return ext4_indirect_trans_blocks(inode, nrblocks, chunk);
        return ext4_ext_index_trans_blocks(inode, nrblocks, chunk);
  }
@@@ -5911,9 -5930,9 +5930,9 @@@ int ext4_change_inode_journal_flag(stru
         */
  
        if (val)
-               EXT4_I(inode)->i_flags |= EXT4_JOURNAL_DATA_FL;
+               ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
        else
-               EXT4_I(inode)->i_flags &= ~EXT4_JOURNAL_DATA_FL;
+               ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
        ext4_set_aops(inode);
  
        jbd2_journal_unlock_updates(journal);
diff --combined fs/ext4/xattr.c
@@@ -97,7 -97,7 +97,7 @@@ static int ext4_xattr_list(struct dentr
  
  static struct mb_cache *ext4_xattr_cache;
  
 -static struct xattr_handler *ext4_xattr_handler_map[] = {
 +static const struct xattr_handler *ext4_xattr_handler_map[] = {
        [EXT4_XATTR_INDEX_USER]              = &ext4_xattr_user_handler,
  #ifdef CONFIG_EXT4_FS_POSIX_ACL
        [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS]  = &ext4_xattr_acl_access_handler,
  #endif
  };
  
 -struct xattr_handler *ext4_xattr_handlers[] = {
 +const struct xattr_handler *ext4_xattr_handlers[] = {
        &ext4_xattr_user_handler,
        &ext4_xattr_trusted_handler,
  #ifdef CONFIG_EXT4_FS_POSIX_ACL
        NULL
  };
  
 -static inline struct xattr_handler *
 +static inline const struct xattr_handler *
  ext4_xattr_handler(int name_index)
  {
 -      struct xattr_handler *handler = NULL;
 +      const struct xattr_handler *handler = NULL;
  
        if (name_index > 0 && name_index < ARRAY_SIZE(ext4_xattr_handler_map))
                handler = ext4_xattr_handler_map[name_index];
@@@ -228,9 -228,8 +228,8 @@@ ext4_xattr_block_get(struct inode *inod
                atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
        if (ext4_xattr_check_block(bh)) {
  bad_block:
-               ext4_error(inode->i_sb,
-                          "inode %lu: bad block %llu", inode->i_ino,
-                          EXT4_I(inode)->i_file_acl);
+               EXT4_ERROR_INODE(inode, "bad block %llu",
+                                EXT4_I(inode)->i_file_acl);
                error = -EIO;
                goto cleanup;
        }
@@@ -332,7 -331,7 +331,7 @@@ ext4_xattr_list_entries(struct dentry *
        size_t rest = buffer_size;
  
        for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
 -              struct xattr_handler *handler =
 +              const struct xattr_handler *handler =
                        ext4_xattr_handler(entry->e_name_index);
  
                if (handler) {
@@@ -372,9 -371,8 +371,8 @@@ ext4_xattr_block_list(struct dentry *de
        ea_bdebug(bh, "b_count=%d, refcount=%d",
                atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
        if (ext4_xattr_check_block(bh)) {
-               ext4_error(inode->i_sb,
-                          "inode %lu: bad block %llu", inode->i_ino,
-                          EXT4_I(inode)->i_file_acl);
+               EXT4_ERROR_INODE(inode, "bad block %llu",
+                                EXT4_I(inode)->i_file_acl);
                error = -EIO;
                goto cleanup;
        }
@@@ -666,8 -664,8 +664,8 @@@ ext4_xattr_block_find(struct inode *ino
                        atomic_read(&(bs->bh->b_count)),
                        le32_to_cpu(BHDR(bs->bh)->h_refcount));
                if (ext4_xattr_check_block(bs->bh)) {
-                       ext4_error(sb, "inode %lu: bad block %llu",
-                                  inode->i_ino, EXT4_I(inode)->i_file_acl);
+                       EXT4_ERROR_INODE(inode, "bad block %llu",
+                                        EXT4_I(inode)->i_file_acl);
                        error = -EIO;
                        goto cleanup;
                }
@@@ -820,7 -818,7 +818,7 @@@ inserted
                                                EXT4_I(inode)->i_block_group);
  
                        /* non-extent files can't have physical blocks past 2^32 */
-                       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+                       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                                goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
  
                        block = ext4_new_meta_blocks(handle, inode,
                        if (error)
                                goto cleanup;
  
-                       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+                       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                                BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS);
  
                        ea_idebug(inode, "creating block %d", block);
@@@ -880,8 -878,8 +878,8 @@@ cleanup_dquot
        goto cleanup;
  
  bad_block:
-       ext4_error(inode->i_sb, "inode %lu: bad block %llu",
-                  inode->i_ino, EXT4_I(inode)->i_file_acl);
+       EXT4_ERROR_INODE(inode, "bad block %llu",
+                        EXT4_I(inode)->i_file_acl);
        goto cleanup;
  
  #undef header
@@@ -1194,8 -1192,8 +1192,8 @@@ retry
                if (!bh)
                        goto cleanup;
                if (ext4_xattr_check_block(bh)) {
-                       ext4_error(inode->i_sb, "inode %lu: bad block %llu",
-                                  inode->i_ino, EXT4_I(inode)->i_file_acl);
+                       EXT4_ERROR_INODE(inode, "bad block %llu",
+                                        EXT4_I(inode)->i_file_acl);
                        error = -EIO;
                        goto cleanup;
                }
@@@ -1372,14 -1370,14 +1370,14 @@@ ext4_xattr_delete_inode(handle_t *handl
                goto cleanup;
        bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
        if (!bh) {
-               ext4_error(inode->i_sb, "inode %lu: block %llu read error",
-                          inode->i_ino, EXT4_I(inode)->i_file_acl);
+               EXT4_ERROR_INODE(inode, "block %llu read error",
+                                EXT4_I(inode)->i_file_acl);
                goto cleanup;
        }
        if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
            BHDR(bh)->h_blocks != cpu_to_le32(1)) {
-               ext4_error(inode->i_sb, "inode %lu: bad block %llu",
-                          inode->i_ino, EXT4_I(inode)->i_file_acl);
+               EXT4_ERROR_INODE(inode, "bad block %llu",
+                                EXT4_I(inode)->i_file_acl);
                goto cleanup;
        }
        ext4_xattr_release_block(handle, inode, bh);
@@@ -1504,9 -1502,8 +1502,8 @@@ again
                }
                bh = sb_bread(inode->i_sb, ce->e_block);
                if (!bh) {
-                       ext4_error(inode->i_sb,
-                               "inode %lu: block %lu read error",
-                               inode->i_ino, (unsigned long) ce->e_block);
+                       EXT4_ERROR_INODE(inode, "block %lu read error",
+                                        (unsigned long) ce->e_block);
                } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
                                EXT4_XATTR_REFCOUNT_MAX) {
                        ea_idebug(inode, "block %lu refcount %d>=%d",
diff --combined fs/quota/dquot.c
@@@ -82,7 -82,7 +82,7 @@@
  
  /*
   * There are three quota SMP locks. dq_list_lock protects all lists with quotas
 - * and quota formats, dqstats structure containing statistics about the lists
 + * and quota formats.
   * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and
   * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes.
   * i_blocks and i_bytes updates itself are guarded by i_lock acquired directly
@@@ -132,9 -132,7 +132,9 @@@ static __cacheline_aligned_in_smp DEFIN
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
  EXPORT_SYMBOL(dq_data_lock);
  
 +#if defined(CONFIG_QUOTA_DEBUG) || defined(CONFIG_PRINT_QUOTA_WARNING)
  static char *quotatypes[] = INITQFNAMES;
 +#endif
  static struct quota_format_type *quota_formats;       /* List of registered formats */
  static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES;
  
@@@ -228,10 -226,6 +228,10 @@@ static struct hlist_head *dquot_hash
  
  struct dqstats dqstats;
  EXPORT_SYMBOL(dqstats);
 +#ifdef CONFIG_SMP
 +struct dqstats *dqstats_pcpu;
 +EXPORT_SYMBOL(dqstats_pcpu);
 +#endif
  
  static qsize_t inode_get_rsv_space(struct inode *inode);
  static void __dquot_initialize(struct inode *inode, int type);
@@@ -279,7 -273,7 +279,7 @@@ static struct dquot *find_dquot(unsigne
  static inline void put_dquot_last(struct dquot *dquot)
  {
        list_add_tail(&dquot->dq_free, &free_dquots);
 -      dqstats.free_dquots++;
 +      dqstats_inc(DQST_FREE_DQUOTS);
  }
  
  static inline void remove_free_dquot(struct dquot *dquot)
        if (list_empty(&dquot->dq_free))
                return;
        list_del_init(&dquot->dq_free);
 -      dqstats.free_dquots--;
 +      dqstats_dec(DQST_FREE_DQUOTS);
  }
  
  static inline void put_inuse(struct dquot *dquot)
        /* We add to the back of inuse list so we don't have to restart
         * when traversing this list and we block */
        list_add_tail(&dquot->dq_inuse, &inuse_list);
 -      dqstats.allocated_dquots++;
 +      dqstats_inc(DQST_ALLOC_DQUOTS);
  }
  
  static inline void remove_inuse(struct dquot *dquot)
  {
 -      dqstats.allocated_dquots--;
 +      dqstats_dec(DQST_ALLOC_DQUOTS);
        list_del(&dquot->dq_inuse);
  }
  /*
@@@ -323,23 -317,14 +323,23 @@@ static inline int mark_dquot_dirty(stru
        return dquot->dq_sb->dq_op->mark_dirty(dquot);
  }
  
 +/* Mark dquot dirty in atomic manner, and return it's old dirty flag state */
  int dquot_mark_dquot_dirty(struct dquot *dquot)
  {
 +      int ret = 1;
 +
 +      /* If quota is dirty already, we don't have to acquire dq_list_lock */
 +      if (test_bit(DQ_MOD_B, &dquot->dq_flags))
 +              return 1;
 +
        spin_lock(&dq_list_lock);
 -      if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags))
 +      if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) {
                list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)->
                                info[dquot->dq_type].dqi_dirty_list);
 +              ret = 0;
 +      }
        spin_unlock(&dq_list_lock);
 -      return 0;
 +      return ret;
  }
  EXPORT_SYMBOL(dquot_mark_dquot_dirty);
  
@@@ -565,8 -550,8 +565,8 @@@ int dquot_scan_active(struct super_bloc
                        continue;
                /* Now we have active dquot so we can just increase use count */
                atomic_inc(&dquot->dq_count);
 -              dqstats.lookups++;
                spin_unlock(&dq_list_lock);
 +              dqstats_inc(DQST_LOOKUPS);
                dqput(old_dquot);
                old_dquot = dquot;
                ret = fn(dquot, priv);
@@@ -611,8 -596,8 +611,8 @@@ int vfs_quota_sync(struct super_block *
                         * holding reference so we can safely just increase
                         * use count */
                        atomic_inc(&dquot->dq_count);
 -                      dqstats.lookups++;
                        spin_unlock(&dq_list_lock);
 +                      dqstats_inc(DQST_LOOKUPS);
                        sb->dq_op->write_dquot(dquot);
                        dqput(dquot);
                        spin_lock(&dq_list_lock);
                if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt)
                    && info_dirty(&dqopt->info[cnt]))
                        sb->dq_op->write_info(sb, cnt);
 -      spin_lock(&dq_list_lock);
 -      dqstats.syncs++;
 -      spin_unlock(&dq_list_lock);
 +      dqstats_inc(DQST_SYNCS);
        mutex_unlock(&dqopt->dqonoff_mutex);
  
        if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE))
@@@ -676,22 -663,6 +676,22 @@@ static void prune_dqcache(int count
        }
  }
  
 +static int dqstats_read(unsigned int type)
 +{
 +      int count = 0;
 +#ifdef CONFIG_SMP
 +      int cpu;
 +      for_each_possible_cpu(cpu)
 +              count += per_cpu_ptr(dqstats_pcpu, cpu)->stat[type];
 +      /* Statistics reading is racy, but absolute accuracy isn't required */
 +      if (count < 0)
 +              count = 0;
 +#else
 +      count = dqstats.stat[type];
 +#endif
 +      return count;
 +}
 +
  /*
   * This is called from kswapd when we think we need some
   * more memory
@@@ -704,7 -675,7 +704,7 @@@ static int shrink_dqcache_memory(int nr
                prune_dqcache(nr);
                spin_unlock(&dq_list_lock);
        }
 -      return (dqstats.free_dquots / 100) * sysctl_vfs_cache_pressure;
 +      return (dqstats_read(DQST_FREE_DQUOTS)/100) * sysctl_vfs_cache_pressure;
  }
  
  static struct shrinker dqcache_shrinker = {
@@@ -732,7 -703,10 +732,7 @@@ void dqput(struct dquot *dquot
                BUG();
        }
  #endif
 -      
 -      spin_lock(&dq_list_lock);
 -      dqstats.drops++;
 -      spin_unlock(&dq_list_lock);
 +      dqstats_inc(DQST_DROPS);
  we_slept:
        spin_lock(&dq_list_lock);
        if (atomic_read(&dquot->dq_count) > 1) {
@@@ -849,15 -823,15 +849,15 @@@ we_slept
                put_inuse(dquot);
                /* hash it first so it can be found */
                insert_dquot_hash(dquot);
 -              dqstats.lookups++;
                spin_unlock(&dq_list_lock);
 +              dqstats_inc(DQST_LOOKUPS);
        } else {
                if (!atomic_read(&dquot->dq_count))
                        remove_free_dquot(dquot);
                atomic_inc(&dquot->dq_count);
 -              dqstats.cache_hits++;
 -              dqstats.lookups++;
                spin_unlock(&dq_list_lock);
 +              dqstats_inc(DQST_CACHE_HITS);
 +              dqstats_inc(DQST_LOOKUPS);
        }
        /* Wait for dq_lock - after this we know that either dquot_release() is
         * already finished or it will be canceled due to dq_count > 1 test */
@@@ -1514,11 -1488,13 +1514,13 @@@ static void inode_decr_space(struct ino
  /*
   * This operation can block, but only after everything is updated
   */
- int __dquot_alloc_space(struct inode *inode, qsize_t number,
-               int warn, int reserve)
+ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
  {
        int cnt, ret = 0;
        char warntype[MAXQUOTAS];
+       int warn = flags & DQUOT_SPACE_WARN;
+       int reserve = flags & DQUOT_SPACE_RESERVE;
+       int nofail = flags & DQUOT_SPACE_NOFAIL;
  
        /*
         * First test before acquiring mutex - solves deadlocks when we
                        continue;
                ret = check_bdq(inode->i_dquot[cnt], number, !warn,
                                warntype+cnt);
-               if (ret) {
+               if (ret && !nofail) {
                        spin_unlock(&dq_data_lock);
                        goto out_flush_warn;
                }
@@@ -1638,10 -1614,11 +1640,11 @@@ EXPORT_SYMBOL(dquot_claim_space_nodirty
  /*
   * This operation can block, but only after everything is updated
   */
- void __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
+ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
  {
        unsigned int cnt;
        char warntype[MAXQUOTAS];
+       int reserve = flags & DQUOT_SPACE_RESERVE;
  
        /* First test before acquiring mutex - solves deadlocks when we
           * re-enter the quota code and are already holding the mutex */
@@@ -1703,19 -1680,16 +1706,19 @@@ EXPORT_SYMBOL(dquot_free_inode)
  
  /*
   * Transfer the number of inode and blocks from one diskquota to an other.
 + * On success, dquot references in transfer_to are consumed and references
 + * to original dquots that need to be released are placed there. On failure,
 + * references are kept untouched.
   *
   * This operation can block, but only after everything is updated
   * A transaction must be started when entering this function.
 + *
   */
 -static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask)
 +int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
  {
        qsize_t space, cur_space;
        qsize_t rsv_space = 0;
 -      struct dquot *transfer_from[MAXQUOTAS];
 -      struct dquot *transfer_to[MAXQUOTAS];
 +      struct dquot *transfer_from[MAXQUOTAS] = {};
        int cnt, ret = 0;
        char warntype_to[MAXQUOTAS];
        char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
        if (IS_NOQUOTA(inode))
                return 0;
        /* Initialize the arrays */
 -      for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 -              transfer_from[cnt] = NULL;
 -              transfer_to[cnt] = NULL;
 +      for (cnt = 0; cnt < MAXQUOTAS; cnt++)
                warntype_to[cnt] = QUOTA_NL_NOWARN;
 -      }
 -      for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 -              if (mask & (1 << cnt))
 -                      transfer_to[cnt] = dqget(inode->i_sb, chid[cnt], cnt);
 -      }
        down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
        if (IS_NOQUOTA(inode)) {        /* File without quota accounting? */
                up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 -              goto put_all;
 +              return 0;
        }
        spin_lock(&dq_data_lock);
        cur_space = inode_get_bytes(inode);
  
        mark_all_dquot_dirty(transfer_from);
        mark_all_dquot_dirty(transfer_to);
 -      /* The reference we got is transferred to the inode */
 +      /* Pass back references to put */
        for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 -              transfer_to[cnt] = NULL;
 -warn_put_all:
 +              transfer_to[cnt] = transfer_from[cnt];
 +warn:
        flush_warnings(transfer_to, warntype_to);
        flush_warnings(transfer_from, warntype_from_inodes);
        flush_warnings(transfer_from, warntype_from_space);
 -put_all:
 -      dqput_all(transfer_from);
 -      dqput_all(transfer_to);
        return ret;
  over_quota:
        spin_unlock(&dq_data_lock);
        up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 -      /* Clear dquot pointers we don't want to dqput() */
 -      for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 -              transfer_from[cnt] = NULL;
 -      goto warn_put_all;
 +      goto warn;
  }
 +EXPORT_SYMBOL(__dquot_transfer);
  
  /* Wrapper for transferring ownership of an inode for uid/gid only
   * Called from FSXXX_setattr()
   */
  int dquot_transfer(struct inode *inode, struct iattr *iattr)
  {
 -      qid_t chid[MAXQUOTAS];
 -      unsigned long mask = 0;
 +      struct dquot *transfer_to[MAXQUOTAS] = {};
 +      struct super_block *sb = inode->i_sb;
 +      int ret;
  
 -      if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) {
 -              mask |= 1 << USRQUOTA;
 -              chid[USRQUOTA] = iattr->ia_uid;
 -      }
 -      if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) {
 -              mask |= 1 << GRPQUOTA;
 -              chid[GRPQUOTA] = iattr->ia_gid;
 -      }
 -      if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) {
 -              dquot_initialize(inode);
 -              return __dquot_transfer(inode, chid, mask);
 -      }
 -      return 0;
 +      if (!sb_any_quota_active(sb) || IS_NOQUOTA(inode))
 +              return 0;
 +
 +      if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid)
 +              transfer_to[USRQUOTA] = dqget(sb, iattr->ia_uid, USRQUOTA);
 +      if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)
 +              transfer_to[GRPQUOTA] = dqget(sb, iattr->ia_uid, GRPQUOTA);
 +
 +      ret = __dquot_transfer(inode, transfer_to);
 +      dqput_all(transfer_to);
 +      return ret;
  }
  EXPORT_SYMBOL(dquot_transfer);
  
@@@ -2291,30 -2278,25 +2294,30 @@@ static inline qsize_t stoqb(qsize_t spa
  }
  
  /* Generic routine for getting common part of quota structure */
 -static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di)
 +static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
  {
        struct mem_dqblk *dm = &dquot->dq_dqb;
  
 +      memset(di, 0, sizeof(*di));
 +      di->d_version = FS_DQUOT_VERSION;
 +      di->d_flags = dquot->dq_type == USRQUOTA ?
 +                      XFS_USER_QUOTA : XFS_GROUP_QUOTA;
 +      di->d_id = dquot->dq_id;
 +
        spin_lock(&dq_data_lock);
 -      di->dqb_bhardlimit = stoqb(dm->dqb_bhardlimit);
 -      di->dqb_bsoftlimit = stoqb(dm->dqb_bsoftlimit);
 -      di->dqb_curspace = dm->dqb_curspace + dm->dqb_rsvspace;
 -      di->dqb_ihardlimit = dm->dqb_ihardlimit;
 -      di->dqb_isoftlimit = dm->dqb_isoftlimit;
 -      di->dqb_curinodes = dm->dqb_curinodes;
 -      di->dqb_btime = dm->dqb_btime;
 -      di->dqb_itime = dm->dqb_itime;
 -      di->dqb_valid = QIF_ALL;
 +      di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit);
 +      di->d_blk_softlimit = stoqb(dm->dqb_bsoftlimit);
 +      di->d_ino_hardlimit = dm->dqb_ihardlimit;
 +      di->d_ino_softlimit = dm->dqb_isoftlimit;
 +      di->d_bcount = dm->dqb_curspace + dm->dqb_rsvspace;
 +      di->d_icount = dm->dqb_curinodes;
 +      di->d_btimer = dm->dqb_btime;
 +      di->d_itimer = dm->dqb_itime;
        spin_unlock(&dq_data_lock);
  }
  
  int vfs_get_dqblk(struct super_block *sb, int type, qid_t id,
 -                struct if_dqblk *di)
 +                struct fs_disk_quota *di)
  {
        struct dquot *dquot;
  
  }
  EXPORT_SYMBOL(vfs_get_dqblk);
  
 +#define VFS_FS_DQ_MASK \
 +      (FS_DQ_BCOUNT | FS_DQ_BSOFT | FS_DQ_BHARD | \
 +       FS_DQ_ICOUNT | FS_DQ_ISOFT | FS_DQ_IHARD | \
 +       FS_DQ_BTIMER | FS_DQ_ITIMER)
 +
  /* Generic routine for setting common part of quota structure */
 -static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
 +static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
  {
        struct mem_dqblk *dm = &dquot->dq_dqb;
        int check_blim = 0, check_ilim = 0;
        struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type];
  
 -      if ((di->dqb_valid & QIF_BLIMITS &&
 -           (di->dqb_bhardlimit > dqi->dqi_maxblimit ||
 -            di->dqb_bsoftlimit > dqi->dqi_maxblimit)) ||
 -          (di->dqb_valid & QIF_ILIMITS &&
 -           (di->dqb_ihardlimit > dqi->dqi_maxilimit ||
 -            di->dqb_isoftlimit > dqi->dqi_maxilimit)))
 +      if (di->d_fieldmask & ~VFS_FS_DQ_MASK)
 +              return -EINVAL;
 +
 +      if (((di->d_fieldmask & FS_DQ_BSOFT) &&
 +           (di->d_blk_softlimit > dqi->dqi_maxblimit)) ||
 +          ((di->d_fieldmask & FS_DQ_BHARD) &&
 +           (di->d_blk_hardlimit > dqi->dqi_maxblimit)) ||
 +          ((di->d_fieldmask & FS_DQ_ISOFT) &&
 +           (di->d_ino_softlimit > dqi->dqi_maxilimit)) ||
 +          ((di->d_fieldmask & FS_DQ_IHARD) &&
 +           (di->d_ino_hardlimit > dqi->dqi_maxilimit)))
                return -ERANGE;
  
        spin_lock(&dq_data_lock);
 -      if (di->dqb_valid & QIF_SPACE) {
 -              dm->dqb_curspace = di->dqb_curspace - dm->dqb_rsvspace;
 +      if (di->d_fieldmask & FS_DQ_BCOUNT) {
 +              dm->dqb_curspace = di->d_bcount - dm->dqb_rsvspace;
                check_blim = 1;
                set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags);
        }
 -      if (di->dqb_valid & QIF_BLIMITS) {
 -              dm->dqb_bsoftlimit = qbtos(di->dqb_bsoftlimit);
 -              dm->dqb_bhardlimit = qbtos(di->dqb_bhardlimit);
 +
 +      if (di->d_fieldmask & FS_DQ_BSOFT)
 +              dm->dqb_bsoftlimit = qbtos(di->d_blk_softlimit);
 +      if (di->d_fieldmask & FS_DQ_BHARD)
 +              dm->dqb_bhardlimit = qbtos(di->d_blk_hardlimit);
 +      if (di->d_fieldmask & (FS_DQ_BSOFT | FS_DQ_BHARD)) {
                check_blim = 1;
                set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags);
        }
 -      if (di->dqb_valid & QIF_INODES) {
 -              dm->dqb_curinodes = di->dqb_curinodes;
 +
 +      if (di->d_fieldmask & FS_DQ_ICOUNT) {
 +              dm->dqb_curinodes = di->d_icount;
                check_ilim = 1;
                set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags);
        }
 -      if (di->dqb_valid & QIF_ILIMITS) {
 -              dm->dqb_isoftlimit = di->dqb_isoftlimit;
 -              dm->dqb_ihardlimit = di->dqb_ihardlimit;
 +
 +      if (di->d_fieldmask & FS_DQ_ISOFT)
 +              dm->dqb_isoftlimit = di->d_ino_softlimit;
 +      if (di->d_fieldmask & FS_DQ_IHARD)
 +              dm->dqb_ihardlimit = di->d_ino_hardlimit;
 +      if (di->d_fieldmask & (FS_DQ_ISOFT | FS_DQ_IHARD)) {
                check_ilim = 1;
                set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags);
        }
 -      if (di->dqb_valid & QIF_BTIME) {
 -              dm->dqb_btime = di->dqb_btime;
 +
 +      if (di->d_fieldmask & FS_DQ_BTIMER) {
 +              dm->dqb_btime = di->d_btimer;
                check_blim = 1;
                set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags);
        }
 -      if (di->dqb_valid & QIF_ITIME) {
 -              dm->dqb_itime = di->dqb_itime;
 +
 +      if (di->d_fieldmask & FS_DQ_ITIMER) {
 +              dm->dqb_itime = di->d_itimer;
                check_ilim = 1;
                set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
        }
                    dm->dqb_curspace < dm->dqb_bsoftlimit) {
                        dm->dqb_btime = 0;
                        clear_bit(DQ_BLKS_B, &dquot->dq_flags);
 -              } else if (!(di->dqb_valid & QIF_BTIME))
 +              } else if (!(di->d_fieldmask & FS_DQ_BTIMER))
                        /* Set grace only if user hasn't provided his own... */
                        dm->dqb_btime = get_seconds() + dqi->dqi_bgrace;
        }
                    dm->dqb_curinodes < dm->dqb_isoftlimit) {
                        dm->dqb_itime = 0;
                        clear_bit(DQ_INODES_B, &dquot->dq_flags);
 -              } else if (!(di->dqb_valid & QIF_ITIME))
 +              } else if (!(di->d_fieldmask & FS_DQ_ITIMER))
                        /* Set grace only if user hasn't provided his own... */
                        dm->dqb_itime = get_seconds() + dqi->dqi_igrace;
        }
  }
  
  int vfs_set_dqblk(struct super_block *sb, int type, qid_t id,
 -                struct if_dqblk *di)
 +                struct fs_disk_quota *di)
  {
        struct dquot *dquot;
        int rc;
@@@ -2505,74 -2468,62 +2508,74 @@@ const struct quotactl_ops vfs_quotactl_
        .set_dqblk      = vfs_set_dqblk
  };
  
 +
 +static int do_proc_dqstats(struct ctl_table *table, int write,
 +                   void __user *buffer, size_t *lenp, loff_t *ppos)
 +{
 +#ifdef CONFIG_SMP
 +      /* Update global table */
 +      unsigned int type = (int *)table->data - dqstats.stat;
 +      dqstats.stat[type] = dqstats_read(type);
 +#endif
 +      return proc_dointvec(table, write, buffer, lenp, ppos);
 +}
 +
  static ctl_table fs_dqstats_table[] = {
        {
                .procname       = "lookups",
 -              .data           = &dqstats.lookups,
 +              .data           = &dqstats.stat[DQST_LOOKUPS],
                .maxlen         = sizeof(int),
                .mode           = 0444,
 -              .proc_handler   = proc_dointvec,
 +              .proc_handler   = do_proc_dqstats,
        },
        {
                .procname       = "drops",
 -              .data           = &dqstats.drops,
 +              .data           = &dqstats.stat[DQST_DROPS],
                .maxlen         = sizeof(int),
                .mode           = 0444,
 -              .proc_handler   = proc_dointvec,
 +              .proc_handler   = do_proc_dqstats,
        },
        {
                .procname       = "reads",
 -              .data           = &dqstats.reads,
 +              .data           = &dqstats.stat[DQST_READS],
                .maxlen         = sizeof(int),
                .mode           = 0444,
 -              .proc_handler   = proc_dointvec,
 +              .proc_handler   = do_proc_dqstats,
        },
        {
                .procname       = "writes",
 -              .data           = &dqstats.writes,
 +              .data           = &dqstats.stat[DQST_WRITES],
                .maxlen         = sizeof(int),
                .mode           = 0444,
 -              .proc_handler   = proc_dointvec,
 +              .proc_handler   = do_proc_dqstats,
        },
        {
                .procname       = "cache_hits",
 -              .data           = &dqstats.cache_hits,
 +              .data           = &dqstats.stat[DQST_CACHE_HITS],
                .maxlen         = sizeof(int),
                .mode           = 0444,
 -              .proc_handler   = proc_dointvec,
 +              .proc_handler   = do_proc_dqstats,
        },
        {
                .procname       = "allocated_dquots",
 -              .data           = &dqstats.allocated_dquots,
 +              .data           = &dqstats.stat[DQST_ALLOC_DQUOTS],
                .maxlen         = sizeof(int),
                .mode           = 0444,
 -              .proc_handler   = proc_dointvec,
 +              .proc_handler   = do_proc_dqstats,
        },
        {
                .procname       = "free_dquots",
 -              .data           = &dqstats.free_dquots,
 +              .data           = &dqstats.stat[DQST_FREE_DQUOTS],
                .maxlen         = sizeof(int),
                .mode           = 0444,
 -              .proc_handler   = proc_dointvec,
 +              .proc_handler   = do_proc_dqstats,
        },
        {
                .procname       = "syncs",
 -              .data           = &dqstats.syncs,
 +              .data           = &dqstats.stat[DQST_SYNCS],
                .maxlen         = sizeof(int),
                .mode           = 0444,
 -              .proc_handler   = proc_dointvec,
 +              .proc_handler   = do_proc_dqstats,
        },
  #ifdef CONFIG_PRINT_QUOTA_WARNING
        {
@@@ -2624,13 -2575,6 +2627,13 @@@ static int __init dquot_init(void
        if (!dquot_hash)
                panic("Cannot create dquot hash table");
  
 +#ifdef CONFIG_SMP
 +      dqstats_pcpu = alloc_percpu(struct dqstats);
 +      if (!dqstats_pcpu)
 +              panic("Cannot create dquot stats table");
 +#endif
 +      memset(&dqstats, 0, sizeof(struct dqstats));
 +
        /* Find power-of-two hlist_heads which can fit into allocation */
        nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head);
        dq_hash_bits = 0;
diff --combined include/linux/quotaops.h
@@@ -9,19 -9,15 +9,23 @@@
  
  #include <linux/fs.h>
  
+ #define DQUOT_SPACE_WARN      0x1
+ #define DQUOT_SPACE_RESERVE   0x2
+ #define DQUOT_SPACE_NOFAIL    0x4
  static inline struct quota_info *sb_dqopt(struct super_block *sb)
  {
        return &sb->s_dquot;
  }
  
 +/* i_mutex must being held */
 +static inline bool is_quota_modification(struct inode *inode, struct iattr *ia)
 +{
 +      return (ia->ia_valid & ATTR_SIZE && ia->ia_size != inode->i_size) ||
 +              (ia->ia_valid & ATTR_UID && ia->ia_uid != inode->i_uid) ||
 +              (ia->ia_valid & ATTR_GID && ia->ia_gid != inode->i_gid);
 +}
 +
  #if defined(CONFIG_QUOTA)
  
  /*
@@@ -41,9 -37,8 +45,8 @@@ int dquot_scan_active(struct super_bloc
  struct dquot *dquot_alloc(struct super_block *sb, int type);
  void dquot_destroy(struct dquot *dquot);
  
- int __dquot_alloc_space(struct inode *inode, qsize_t number,
-               int warn, int reserve);
- void __dquot_free_space(struct inode *inode, qsize_t number, int reserve);
+ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags);
+ void __dquot_free_space(struct inode *inode, qsize_t number, int flags);
  
  int dquot_alloc_inode(const struct inode *inode);
  
@@@ -71,12 -66,9 +74,12 @@@ int vfs_quota_disable(struct super_bloc
  int vfs_quota_sync(struct super_block *sb, int type, int wait);
  int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
  int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
 -int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
 -int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
 +int vfs_get_dqblk(struct super_block *sb, int type, qid_t id,
 +              struct fs_disk_quota *di);
 +int vfs_set_dqblk(struct super_block *sb, int type, qid_t id,
 +              struct fs_disk_quota *di);
  
 +int __dquot_transfer(struct inode *inode, struct dquot **transfer_to);
  int dquot_transfer(struct inode *inode, struct iattr *iattr);
  int vfs_dq_quota_on_remount(struct super_block *sb);
  
@@@ -242,17 -234,17 +245,17 @@@ static inline int dquot_transfer(struc
  }
  
  static inline int __dquot_alloc_space(struct inode *inode, qsize_t number,
-               int warn, int reserve)
+               int flags)
  {
-       if (!reserve)
+       if (!(flags & DQUOT_SPACE_RESERVE))
                inode_add_bytes(inode, number);
        return 0;
  }
  
  static inline void __dquot_free_space(struct inode *inode, qsize_t number,
-               int reserve)
+               int flags)
  {
-       if (!reserve)
+       if (!(flags & DQUOT_SPACE_RESERVE))
                inode_sub_bytes(inode, number);
  }
  
@@@ -268,7 -260,13 +271,13 @@@ static inline int dquot_claim_space_nod
  
  static inline int dquot_alloc_space_nodirty(struct inode *inode, qsize_t nr)
  {
-       return __dquot_alloc_space(inode, nr, 1, 0);
+       return __dquot_alloc_space(inode, nr, DQUOT_SPACE_WARN);
+ }
+ static inline void dquot_alloc_space_nofail(struct inode *inode, qsize_t nr)
+ {
+       __dquot_alloc_space(inode, nr, DQUOT_SPACE_WARN|DQUOT_SPACE_NOFAIL);
+       mark_inode_dirty(inode);
  }
  
  static inline int dquot_alloc_space(struct inode *inode, qsize_t nr)
@@@ -286,6 -284,11 +295,11 @@@ static inline int dquot_alloc_block_nod
        return dquot_alloc_space_nodirty(inode, nr << inode->i_blkbits);
  }
  
+ static inline void dquot_alloc_block_nofail(struct inode *inode, qsize_t nr)
+ {
+       dquot_alloc_space_nofail(inode, nr << inode->i_blkbits);
+ }
  static inline int dquot_alloc_block(struct inode *inode, qsize_t nr)
  {
        return dquot_alloc_space(inode, nr << inode->i_blkbits);
  
  static inline int dquot_prealloc_block_nodirty(struct inode *inode, qsize_t nr)
  {
-       return __dquot_alloc_space(inode, nr << inode->i_blkbits, 0, 0);
+       return __dquot_alloc_space(inode, nr << inode->i_blkbits, 0);
  }
  
  static inline int dquot_prealloc_block(struct inode *inode, qsize_t nr)
  
  static inline int dquot_reserve_block(struct inode *inode, qsize_t nr)
  {
-       return __dquot_alloc_space(inode, nr << inode->i_blkbits, 1, 1);
+       return __dquot_alloc_space(inode, nr << inode->i_blkbits,
+                               DQUOT_SPACE_WARN|DQUOT_SPACE_RESERVE);
  }
  
  static inline int dquot_claim_block(struct inode *inode, qsize_t nr)
@@@ -345,7 -349,7 +360,7 @@@ static inline void dquot_free_block(str
  static inline void dquot_release_reservation_block(struct inode *inode,
                qsize_t nr)
  {
-       __dquot_free_space(inode, nr << inode->i_blkbits, 1);
+       __dquot_free_space(inode, nr << inode->i_blkbits, DQUOT_SPACE_RESERVE);
  }
  
  #endif /* _LINUX_QUOTAOPS_ */