Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 27 May 2010 17:26:37 +0000 (10:26 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 27 May 2010 17:26:37 +0000 (10:26 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 27 May 2010 17:26:37 +0000 (10:26 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 27 May 2010 17:26:37 +0000 (10:26 -0700)
diff --combined fs/ext4/fsync.c

index ef3d980,e187f87..b6a74f9
--- 1/fs/ext4/fsync.c
--- 2/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@@ -34,6 -34,29 +34,29 @@@
   
   #include <trace/events/ext4.h>
   
+ /*
+  * If we're not journaling and this is a just-created file, we have to
+  * sync our parent directory (if it was freshly created) since
+  * otherwise it will only be written by writeback, leaving a huge
+  * window during which a crash may lose the file.  This may apply for
+  * the parent directory's parent as well, and so on recursively, if
+  * they are also freshly created.
+  */
+ static void ext4_sync_parent(struct inode *inode)
+ {
+       struct dentry *dentry = NULL;
+ 
+       while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
+               ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
+               dentry = list_entry(inode->i_dentry.next,
+                                   struct dentry, d_alias);
+               if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode)
+                       break;
+               inode = dentry->d_parent->d_inode;
+               sync_mapping_buffers(inode->i_mapping);
+       }
+ }
+ 
   /*
    * akpm: A new design for ext4_sync_file().
    *
@@@ -66,9 -89,13 +89,13 @@@ int ext4_sync_file(struct file *file, s
         ret = flush_completed_IO(inode);
         if (ret < 0)
                 return ret;
-       
-       if (!journal)
-               return simple_fsync(file, dentry, datasync);
+ 
+       if (!journal) {
+               ret = simple_fsync(file, dentry, datasync);
+               if (!ret && !list_empty(&inode->i_dentry))
+                       ext4_sync_parent(inode);
+               return ret;
+       }
   
         /*
          * data=writeback,ordered:
@@@ -100,11 -127,9 +127,11 @@@
                 if (ext4_should_writeback_data(inode) &&
                     (journal->j_fs_dev != journal->j_dev) &&
                     (journal->j_flags & JBD2_BARRIER))
- -                      blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
+ +                      blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
+ +                                      NULL, BLKDEV_IFL_WAIT);
-               jbd2_log_wait_commit(journal, commit_tid);
+               ret = jbd2_log_wait_commit(journal, commit_tid);
         } else if (journal->j_flags & JBD2_BARRIER)
- -              blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
+ +              blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
+ +                      BLKDEV_IFL_WAIT);
         return ret;
   }
diff --combined fs/ext4/ialloc.c

index 1a0e183,7f6b582..25c4b31
--- 1/fs/ext4/ialloc.c
--- 2/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@@ -240,56 -240,49 +240,49 @@@ void ext4_free_inode(handle_t *handle, 
         if (fatal)
                 goto error_return;
   
-       /* Ok, now we can actually update the inode bitmaps.. */
-       cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
-                                       bit, bitmap_bh->b_data);
-       if (!cleared)
-               ext4_error(sb, "bit already cleared for inode %lu", ino);
-       else {
-               gdp = ext4_get_group_desc(sb, block_group, &bh2);
- 
+       fatal = -ESRCH;
+       gdp = ext4_get_group_desc(sb, block_group, &bh2);
+       if (gdp) {
                 BUFFER_TRACE(bh2, "get_write_access");
                 fatal = ext4_journal_get_write_access(handle, bh2);
-               if (fatal) goto error_return;
- 
-               if (gdp) {
-                       ext4_lock_group(sb, block_group);
-                       count = ext4_free_inodes_count(sb, gdp) + 1;
-                       ext4_free_inodes_set(sb, gdp, count);
-                       if (is_directory) {
-                               count = ext4_used_dirs_count(sb, gdp) - 1;
-                               ext4_used_dirs_set(sb, gdp, count);
-                               if (sbi->s_log_groups_per_flex) {
-                                       ext4_group_t f;
- 
-                                       f = ext4_flex_group(sbi, block_group);
-                                       atomic_dec(&sbi->s_flex_groups[f].used_dirs);
-                               }
+       }
+       ext4_lock_group(sb, block_group);
+       cleared = ext4_clear_bit(bit, bitmap_bh->b_data);
+       if (fatal || !cleared) {
+               ext4_unlock_group(sb, block_group);
+               goto out;
+       }
   
-                       }
-                       gdp->bg_checksum = ext4_group_desc_csum(sbi,
-                                                       block_group, gdp);
-                       ext4_unlock_group(sb, block_group);
-                       percpu_counter_inc(&sbi->s_freeinodes_counter);
-                       if (is_directory)
-                               percpu_counter_dec(&sbi->s_dirs_counter);
- 
-                       if (sbi->s_log_groups_per_flex) {
-                               ext4_group_t f;
- 
-                               f = ext4_flex_group(sbi, block_group);
-                               atomic_inc(&sbi->s_flex_groups[f].free_inodes);
-                       }
-               }
-               BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
-               err = ext4_handle_dirty_metadata(handle, NULL, bh2);
-               if (!fatal) fatal = err;
+       count = ext4_free_inodes_count(sb, gdp) + 1;
+       ext4_free_inodes_set(sb, gdp, count);
+       if (is_directory) {
+               count = ext4_used_dirs_count(sb, gdp) - 1;
+               ext4_used_dirs_set(sb, gdp, count);
+               percpu_counter_dec(&sbi->s_dirs_counter);
         }
-       BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata");
-       err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
-       if (!fatal)
-               fatal = err;
-       sb->s_dirt = 1;
+       gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
+       ext4_unlock_group(sb, block_group);
+ 
+       percpu_counter_inc(&sbi->s_freeinodes_counter);
+       if (sbi->s_log_groups_per_flex) {
+               ext4_group_t f = ext4_flex_group(sbi, block_group);
+ 
+               atomic_inc(&sbi->s_flex_groups[f].free_inodes);
+               if (is_directory)
+                       atomic_dec(&sbi->s_flex_groups[f].used_dirs);
+       }
+       BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
+       fatal = ext4_handle_dirty_metadata(handle, NULL, bh2);
+ out:
+       if (cleared) {
+               BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata");
+               err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
+               if (!fatal)
+                       fatal = err;
+               sb->s_dirt = 1;
+       } else
+               ext4_error(sb, "bit already cleared for inode %lu", ino);
+ 
   error_return:
         brelse(bitmap_bh);
         ext4_std_error(sb, fatal);
@@@ -499,7 -492,7 +492,7 @@@ static int find_group_orlov(struct supe
   
         if (S_ISDIR(mode) &&
             ((parent == sb->s_root->d_inode) ||
-            (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL))) {
+            (ext4_test_inode_flag(parent, EXT4_INODE_TOPDIR)))) {
                 int best_ndir = inodes_per_group;
                 int ret = -1;
   
@@@ -979,12 -972,16 +972,12 @@@ got
                 atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes);
         }
   
- -      inode->i_uid = current_fsuid();
- -      if (test_opt(sb, GRPID))
+ +      if (test_opt(sb, GRPID)) {
+ +              inode->i_mode = mode;
+ +              inode->i_uid = current_fsuid();
                 inode->i_gid = dir->i_gid;
- -      else if (dir->i_mode & S_ISGID) {
- -              inode->i_gid = dir->i_gid;
- -              if (S_ISDIR(mode))
- -                      mode |= S_ISGID;
         } else
- -              inode->i_gid = current_fsgid();
- -      inode->i_mode = mode;
+ +              inode_init_owner(inode, dir, mode);
   
         inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
         /* This is the optimal IO size (for stat), not the fs block size */
@@@ -1041,7 -1038,7 +1034,7 @@@
         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
                 /* set extent flag only for directory, file and normal symlink*/
                 if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
-                       EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
+                       ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
                         ext4_ext_tree_init(handle, inode);
                 }
         }
diff --combined fs/ext4/inode.c

index 3e0f6af,502b07d..19df61c
--- 1/fs/ext4/inode.c
--- 2/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@@ -149,7 -149,7 +149,7 @@@ int ext4_truncate_restart_trans(handle_
         int ret;
   
         /*
-        * Drop i_data_sem to avoid deadlock with ext4_get_blocks At this
+        * Drop i_data_sem to avoid deadlock with ext4_map_blocks.  At this
          * moment, get_block can be called only for blocks inside i_size since
          * page cache has been already dropped and writes are blocked by
          * i_mutex. So we can safely drop the i_data_sem here.
@@@ -348,9 -348,8 +348,8 @@@ static int __ext4_check_blockref(const 
                 if (blk &&
                     unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
                                                     blk, 1))) {
-                       __ext4_error(inode->i_sb, function,
-                                  "invalid block reference %u "
-                                  "in inode #%lu", blk, inode->i_ino);
+                       ext4_error_inode(function, inode,
+                                        "invalid block reference %u", blk);
                         return -EIO;
                 }
         }
@@@ -785,7 -784,7 +784,7 @@@ failed
         /* Allocation failed, free what we already allocated */
         ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
         for (i = 1; i <= n ; i++) {
-               /* 
+               /*
                  * branch[i].bh is newly allocated, so there is no
                  * need to revoke the block, which is why we don't
                  * need to set EXT4_FREE_BLOCKS_METADATA.
@@@ -875,7 -874,7 +874,7 @@@ static int ext4_splice_branch(handle_t 
   
   err_out:
         for (i = 1; i <= num; i++) {
-               /* 
+               /*
                  * branch[i].bh is newly allocated, so there is no
                  * need to revoke the block, which is why we don't
                  * need to set EXT4_FREE_BLOCKS_METADATA.
@@@ -890,9 -889,9 +889,9 @@@
   }
   
   /*
-  * The ext4_ind_get_blocks() function handles non-extents inodes
+  * The ext4_ind_map_blocks() function handles non-extents inodes
    * (i.e., using the traditional indirect/double-indirect i_blocks
-  * scheme) for ext4_get_blocks().
+  * scheme) for ext4_map_blocks().
    *
    * Allocation strategy is simple: if we have to allocate something, we will
    * have to go the whole way to leaf. So let's do it before attaching anything
@@@ -917,9 -916,8 +916,8 @@@
    * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system
    * blocks.
    */
- static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
-                              ext4_lblk_t iblock, unsigned int maxblocks,
-                              struct buffer_head *bh_result,
+ static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
+                              struct ext4_map_blocks *map,
                                int flags)
   {
         int err = -EIO;
@@@ -933,9 -931,9 +931,9 @@@
         int count = 0;
         ext4_fsblk_t first_block = 0;
   
-       J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
+       J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
         J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
-       depth = ext4_block_to_path(inode, iblock, offsets,
+       depth = ext4_block_to_path(inode, map->m_lblk, offsets,
                                    &blocks_to_boundary);
   
         if (depth == 0)
@@@ -946,10 -944,9 +944,9 @@@
         /* Simplest case - block found, no allocation needed */
         if (!partial) {
                 first_block = le32_to_cpu(chain[depth - 1].key);
-               clear_buffer_new(bh_result);
                 count++;
                 /*map more blocks*/
-               while (count < maxblocks && count <= blocks_to_boundary) {
+               while (count < map->m_len && count <= blocks_to_boundary) {
                         ext4_fsblk_t blk;
   
                         blk = le32_to_cpu(*(chain[depth-1].p + count));
@@@ -969,7 -966,7 +966,7 @@@
         /*
          * Okay, we need to do block allocation.
         */
-       goal = ext4_find_goal(inode, iblock, partial);
+       goal = ext4_find_goal(inode, map->m_lblk, partial);
   
         /* the number of blocks need to allocate for [d,t]indirect blocks */
         indirect_blks = (chain + depth) - partial - 1;
@@@ -979,11 -976,11 +976,11 @@@
          * direct blocks to allocate for this branch.
          */
         count = ext4_blks_to_allocate(partial, indirect_blks,
-                                       maxblocks, blocks_to_boundary);
+                                     map->m_len, blocks_to_boundary);
         /*
          * Block out ext4_truncate while we alter the tree
          */
-       err = ext4_alloc_branch(handle, inode, iblock, indirect_blks,
+       err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks,
                                 &count, goal,
                                 offsets + (partial - chain), partial);
   
@@@ -995,18 -992,20 +992,20 @@@
          * may need to return -EAGAIN upwards in the worst case.  --sct
          */
         if (!err)
-               err = ext4_splice_branch(handle, inode, iblock,
+               err = ext4_splice_branch(handle, inode, map->m_lblk,
                                          partial, indirect_blks, count);
         if (err)
                 goto cleanup;
   
-       set_buffer_new(bh_result);
+       map->m_flags |= EXT4_MAP_NEW;
   
         ext4_update_inode_fsync_trans(handle, inode, 1);
   got_it:
-       map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
+       map->m_flags |= EXT4_MAP_MAPPED;
+       map->m_pblk = le32_to_cpu(chain[depth-1].key);
+       map->m_len = count;
         if (count > blocks_to_boundary)
-               set_buffer_boundary(bh_result);
+               map->m_flags |= EXT4_MAP_BOUNDARY;
         err = count;
         /* Clean up and exit */
         partial = chain + depth - 1;    /* the whole chain */
@@@ -1016,7 -1015,6 +1015,6 @@@ cleanup
                 brelse(partial->bh);
                 partial--;
         }
-       BUFFER_TRACE(bh_result, "returned");
   out:
         return err;
   }
@@@ -1061,7 -1059,7 +1059,7 @@@ static int ext4_indirect_calc_metadata_
    */
   static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
   {
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                 return ext4_ext_calc_metadata_amount(inode, lblock);
   
         return ext4_indirect_calc_metadata_amount(inode, lblock);
@@@ -1076,7 -1074,6 +1074,6 @@@ void ext4_da_update_reserve_space(struc
   {
         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
         struct ext4_inode_info *ei = EXT4_I(inode);
-       int mdb_free = 0, allocated_meta_blocks = 0;
   
         spin_lock(&ei->i_block_reservation_lock);
         trace_ext4_da_update_reserve_space(inode, used);
@@@ -1091,11 -1088,10 +1088,10 @@@
   
         /* Update per-inode reservations */
         ei->i_reserved_data_blocks -= used;
-       used += ei->i_allocated_meta_blocks;
         ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
-       allocated_meta_blocks = ei->i_allocated_meta_blocks;
+       percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+                          used + ei->i_allocated_meta_blocks);
         ei->i_allocated_meta_blocks = 0;
-       percpu_counter_sub(&sbi->s_dirtyblocks_counter, used);
   
         if (ei->i_reserved_data_blocks == 0) {
                 /*
@@@ -1103,30 -1099,23 +1099,23 @@@
                  * only when we have written all of the delayed
                  * allocation blocks.
                  */
-               mdb_free = ei->i_reserved_meta_blocks;
+               percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+                                  ei->i_reserved_meta_blocks);
                 ei->i_reserved_meta_blocks = 0;
                 ei->i_da_metadata_calc_len = 0;
-               percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
         }
         spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
   
-       /* Update quota subsystem */
-       if (quota_claim) {
+       /* Update quota subsystem for data blocks */
+       if (quota_claim)
                 dquot_claim_block(inode, used);
-               if (mdb_free)
-                       dquot_release_reservation_block(inode, mdb_free);
-       } else {
+       else {
                 /*
                  * We did fallocate with an offset that is already delayed
                  * allocated. So on delayed allocated writeback we should
-                * not update the quota for allocated blocks. But then
-                * converting an fallocate region to initialized region would
-                * have caused a metadata allocation. So claim quota for
-                * that
+                * not re-claim the quota for fallocated blocks.
                  */
-               if (allocated_meta_blocks)
-                       dquot_claim_block(inode, allocated_meta_blocks);
-               dquot_release_reservation_block(inode, mdb_free + used);
+               dquot_release_reservation_block(inode, used);
         }
   
         /*
@@@ -1139,15 -1128,15 +1128,15 @@@
                 ext4_discard_preallocations(inode);
   }
   
- static int check_block_validity(struct inode *inode, const char *msg,
-                               sector_t logical, sector_t phys, int len)
+ static int check_block_validity(struct inode *inode, const char *func,
+                               struct ext4_map_blocks *map)
   {
-       if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) {
-               __ext4_error(inode->i_sb, msg,
-                          "inode #%lu logical block %llu mapped to %llu "
-                          "(size %d)", inode->i_ino,
-                          (unsigned long long) logical,
-                          (unsigned long long) phys, len);
+       if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
+                                  map->m_len)) {
+               ext4_error_inode(func, inode,
+                          "lblock %lu mapped to illegal pblock %llu "
+                          "(length %d)", (unsigned long) map->m_lblk,
+                                map->m_pblk, map->m_len);
                 return -EIO;
         }
         return 0;
@@@ -1212,15 -1201,15 +1201,15 @@@ static pgoff_t ext4_num_dirty_pages(str
   }
   
   /*
-  * The ext4_get_blocks() function tries to look up the requested blocks,
+  * The ext4_map_blocks() function tries to look up the requested blocks,
    * and returns if the blocks are already mapped.
    *
    * Otherwise it takes the write lock of the i_data_sem and allocate blocks
    * and store the allocated blocks in the result buffer head and mark it
    * mapped.
    *
-  * If file type is extents based, it will call ext4_ext_get_blocks(),
-  * Otherwise, call with ext4_ind_get_blocks() to handle indirect mapping
+  * If file type is extents based, it will call ext4_ext_map_blocks(),
+  * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping
    * based files
    *
    * On success, it returns the number of blocks being mapped or allocate.
@@@ -1233,35 -1222,29 +1222,29 @@@
    *
    * It returns the error in case of allocation failure.
    */
- int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
-                   unsigned int max_blocks, struct buffer_head *bh,
-                   int flags)
+ int ext4_map_blocks(handle_t *handle, struct inode *inode,
+                   struct ext4_map_blocks *map, int flags)
   {
         int retval;
   
-       clear_buffer_mapped(bh);
-       clear_buffer_unwritten(bh);
- 
-       ext_debug("ext4_get_blocks(): inode %lu, flag %d, max_blocks %u,"
-                 "logical block %lu\n", inode->i_ino, flags, max_blocks,
-                 (unsigned long)block);
+       map->m_flags = 0;
+       ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
+                 "logical block %lu\n", inode->i_ino, flags, map->m_len,
+                 (unsigned long) map->m_lblk);
         /*
          * Try to see if we can get the block without requesting a new
          * file system block.
          */
         down_read((&EXT4_I(inode)->i_data_sem));
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
-               retval =  ext4_ext_get_blocks(handle, inode, block, max_blocks,
-                               bh, 0);
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+               retval = ext4_ext_map_blocks(handle, inode, map, 0);
         } else {
-               retval = ext4_ind_get_blocks(handle, inode, block, max_blocks,
-                                            bh, 0);
+               retval = ext4_ind_map_blocks(handle, inode, map, 0);
         }
         up_read((&EXT4_I(inode)->i_data_sem));
   
-       if (retval > 0 && buffer_mapped(bh)) {
-               int ret = check_block_validity(inode, "file system corruption",
-                                              block, bh->b_blocknr, retval);
+       if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
+               int ret = check_block_validity(inode, __func__, map);
                 if (ret != 0)
                         return ret;
         }
@@@ -1277,7 -1260,7 +1260,7 @@@
          * ext4_ext_get_block() returns th create = 0
          * with buffer head unmapped.
          */
-       if (retval > 0 && buffer_mapped(bh))
+       if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
                 return retval;
   
         /*
@@@ -1290,7 -1273,7 +1273,7 @@@
          * of BH_Unwritten and BH_Mapped flags being simultaneously
          * set on the buffer_head.
          */
-       clear_buffer_unwritten(bh);
+       map->m_flags &= ~EXT4_MAP_UNWRITTEN;
   
         /*
          * New blocks allocate and/or writing to uninitialized extent
@@@ -1312,14 -1295,12 +1295,12 @@@
          * We need to check for EXT4 here because migrate
          * could have changed the inode type in between
          */
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
-               retval =  ext4_ext_get_blocks(handle, inode, block, max_blocks,
-                                             bh, flags);
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+               retval = ext4_ext_map_blocks(handle, inode, map, flags);
         } else {
-               retval = ext4_ind_get_blocks(handle, inode, block,
-                                            max_blocks, bh, flags);
+               retval = ext4_ind_map_blocks(handle, inode, map, flags);
   
-               if (retval > 0 && buffer_new(bh)) {
+               if (retval > 0 && map->m_flags & EXT4_MAP_NEW) {
                         /*
                          * We allocated new blocks which will result in
                          * i_data's format changing.  Force the migrate
@@@ -1342,10 -1323,10 +1323,10 @@@
                 EXT4_I(inode)->i_delalloc_reserved_flag = 0;
   
         up_write((&EXT4_I(inode)->i_data_sem));
-       if (retval > 0 && buffer_mapped(bh)) {
-               int ret = check_block_validity(inode, "file system "
-                                              "corruption after allocation",
-                                              block, bh->b_blocknr, retval);
+       if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
+               int ret = check_block_validity(inode,
+                                              "ext4_map_blocks_after_alloc",
+                                              map);
                 if (ret != 0)
                         return ret;
         }
@@@ -1355,109 -1336,109 +1336,109 @@@
   /* Maximum number of blocks we map for direct IO at once. */
   #define DIO_MAX_BLOCKS 4096
   
- int ext4_get_block(struct inode *inode, sector_t iblock,
-                  struct buffer_head *bh_result, int create)
+ static int _ext4_get_block(struct inode *inode, sector_t iblock,
+                          struct buffer_head *bh, int flags)
   {
         handle_t *handle = ext4_journal_current_handle();
+       struct ext4_map_blocks map;
         int ret = 0, started = 0;
-       unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
         int dio_credits;
   
-       if (create && !handle) {
+       map.m_lblk = iblock;
+       map.m_len = bh->b_size >> inode->i_blkbits;
+ 
+       if (flags && !handle) {
                 /* Direct IO write... */
-               if (max_blocks > DIO_MAX_BLOCKS)
-                       max_blocks = DIO_MAX_BLOCKS;
-               dio_credits = ext4_chunk_trans_blocks(inode, max_blocks);
+               if (map.m_len > DIO_MAX_BLOCKS)
+                       map.m_len = DIO_MAX_BLOCKS;
+               dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
                 handle = ext4_journal_start(inode, dio_credits);
                 if (IS_ERR(handle)) {
                         ret = PTR_ERR(handle);
-                       goto out;
+                       return ret;
                 }
                 started = 1;
         }
   
-       ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result,
-                             create ? EXT4_GET_BLOCKS_CREATE : 0);
+       ret = ext4_map_blocks(handle, inode, &map, flags);
         if (ret > 0) {
-               bh_result->b_size = (ret << inode->i_blkbits);
+               map_bh(bh, inode->i_sb, map.m_pblk);
+               bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
+               bh->b_size = inode->i_sb->s_blocksize * map.m_len;
                 ret = 0;
         }
         if (started)
                 ext4_journal_stop(handle);
- out:
         return ret;
   }
   
+ int ext4_get_block(struct inode *inode, sector_t iblock,
+                  struct buffer_head *bh, int create)
+ {
+       return _ext4_get_block(inode, iblock, bh,
+                              create ? EXT4_GET_BLOCKS_CREATE : 0);
+ }
+ 
   /*
    * `handle' can be NULL if create is zero
    */
   struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
                                 ext4_lblk_t block, int create, int *errp)
   {
-       struct buffer_head dummy;
+       struct ext4_map_blocks map;
+       struct buffer_head *bh;
         int fatal = 0, err;
-       int flags = 0;
   
         J_ASSERT(handle != NULL || create == 0);
   
-       dummy.b_state = 0;
-       dummy.b_blocknr = -1000;
-       buffer_trace_init(&dummy.b_history);
-       if (create)
-               flags |= EXT4_GET_BLOCKS_CREATE;
-       err = ext4_get_blocks(handle, inode, block, 1, &dummy, flags);
-       /*
-        * ext4_get_blocks() returns number of blocks mapped. 0 in
-        * case of a HOLE.
-        */
-       if (err > 0) {
-               if (err > 1)
-                       WARN_ON(1);
-               err = 0;
+       map.m_lblk = block;
+       map.m_len = 1;
+       err = ext4_map_blocks(handle, inode, &map,
+                             create ? EXT4_GET_BLOCKS_CREATE : 0);
+ 
+       if (err < 0)
+               *errp = err;
+       if (err <= 0)
+               return NULL;
+       *errp = 0;
+ 
+       bh = sb_getblk(inode->i_sb, map.m_pblk);
+       if (!bh) {
+               *errp = -EIO;
+               return NULL;
         }
-       *errp = err;
-       if (!err && buffer_mapped(&dummy)) {
-               struct buffer_head *bh;
-               bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
-               if (!bh) {
-                       *errp = -EIO;
-                       goto err;
-               }
-               if (buffer_new(&dummy)) {
-                       J_ASSERT(create != 0);
-                       J_ASSERT(handle != NULL);
+       if (map.m_flags & EXT4_MAP_NEW) {
+               J_ASSERT(create != 0);
+               J_ASSERT(handle != NULL);
   
-                       /*
-                        * Now that we do not always journal data, we should
-                        * keep in mind whether this should always journal the
-                        * new buffer as metadata.  For now, regular file
-                        * writes use ext4_get_block instead, so it's not a
-                        * problem.
-                        */
-                       lock_buffer(bh);
-                       BUFFER_TRACE(bh, "call get_create_access");
-                       fatal = ext4_journal_get_create_access(handle, bh);
-                       if (!fatal && !buffer_uptodate(bh)) {
-                               memset(bh->b_data, 0, inode->i_sb->s_blocksize);
-                               set_buffer_uptodate(bh);
-                       }
-                       unlock_buffer(bh);
-                       BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
-                       err = ext4_handle_dirty_metadata(handle, inode, bh);
-                       if (!fatal)
-                               fatal = err;
-               } else {
-                       BUFFER_TRACE(bh, "not a new buffer");
-               }
-               if (fatal) {
-                       *errp = fatal;
-                       brelse(bh);
-                       bh = NULL;
+               /*
+                * Now that we do not always journal data, we should
+                * keep in mind whether this should always journal the
+                * new buffer as metadata.  For now, regular file
+                * writes use ext4_get_block instead, so it's not a
+                * problem.
+                */
+               lock_buffer(bh);
+               BUFFER_TRACE(bh, "call get_create_access");
+               fatal = ext4_journal_get_create_access(handle, bh);
+               if (!fatal && !buffer_uptodate(bh)) {
+                       memset(bh->b_data, 0, inode->i_sb->s_blocksize);
+                       set_buffer_uptodate(bh);
                 }
-               return bh;
+               unlock_buffer(bh);
+               BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+               err = ext4_handle_dirty_metadata(handle, inode, bh);
+               if (!fatal)
+                       fatal = err;
+       } else {
+               BUFFER_TRACE(bh, "not a new buffer");
         }
- err:
-       return NULL;
+       if (fatal) {
+               *errp = fatal;
+               brelse(bh);
+               bh = NULL;
+       }
+       return bh;
   }
   
   struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
@@@ -1860,7 -1841,7 +1841,7 @@@ static int ext4_da_reserve_space(struc
         int retries = 0;
         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
         struct ext4_inode_info *ei = EXT4_I(inode);
-       unsigned long md_needed, md_reserved;
+       unsigned long md_needed;
         int ret;
   
         /*
@@@ -1870,22 -1851,24 +1851,24 @@@
          */
   repeat:
         spin_lock(&ei->i_block_reservation_lock);
-       md_reserved = ei->i_reserved_meta_blocks;
         md_needed = ext4_calc_metadata_amount(inode, lblock);
         trace_ext4_da_reserve_space(inode, md_needed);
         spin_unlock(&ei->i_block_reservation_lock);
   
         /*
-        * Make quota reservation here to prevent quota overflow
-        * later. Real quota accounting is done at pages writeout
-        * time.
+        * We will charge metadata quota at writeout time; this saves
+        * us from metadata over-estimation, though we may go over by
+        * a small amount in the end.  Here we just reserve for data.
          */
-       ret = dquot_reserve_block(inode, md_needed + 1);
+       ret = dquot_reserve_block(inode, 1);
         if (ret)
                 return ret;
- 
+       /*
+        * We do still charge estimated metadata to the sb though;
+        * we cannot afford to run out of free blocks.
+        */
         if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
-               dquot_release_reservation_block(inode, md_needed + 1);
+               dquot_release_reservation_block(inode, 1);
                 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
                         yield();
                         goto repeat;
@@@ -1910,6 -1893,7 +1893,7 @@@ static void ext4_da_release_space(struc
   
         spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
   
+       trace_ext4_da_release_space(inode, to_free);
         if (unlikely(to_free > ei->i_reserved_data_blocks)) {
                 /*
                  * if there aren't enough reserved blocks, then the
@@@ -1932,12 -1916,13 +1916,13 @@@
                  * only when we have written all of the delayed
                  * allocation blocks.
                  */
-               to_free += ei->i_reserved_meta_blocks;
+               percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+                                  ei->i_reserved_meta_blocks);
                 ei->i_reserved_meta_blocks = 0;
                 ei->i_da_metadata_calc_len = 0;
         }
   
-       /* update fs dirty blocks counter */
+       /* update fs dirty data blocks counter */
         percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
   
         spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
@@@ -2042,28 -2027,23 +2027,23 @@@ static int mpage_da_submit_io(struct mp
   /*
    * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers
    *
-  * @mpd->inode - inode to walk through
-  * @exbh->b_blocknr - first block on a disk
-  * @exbh->b_size - amount of space in bytes
-  * @logical - first logical block to start assignment with
-  *
    * the function goes through all passed space and put actual disk
    * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
    */
- static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
-                                struct buffer_head *exbh)
+ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd,
+                                struct ext4_map_blocks *map)
   {
         struct inode *inode = mpd->inode;
         struct address_space *mapping = inode->i_mapping;
-       int blocks = exbh->b_size >> inode->i_blkbits;
-       sector_t pblock = exbh->b_blocknr, cur_logical;
+       int blocks = map->m_len;
+       sector_t pblock = map->m_pblk, cur_logical;
         struct buffer_head *head, *bh;
         pgoff_t index, end;
         struct pagevec pvec;
         int nr_pages, i;
   
-       index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
-       end = (logical + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+       index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+       end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
         cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
   
         pagevec_init(&pvec, 0);
@@@ -2090,17 -2070,16 +2070,16 @@@
   
                         /* skip blocks out of the range */
                         do {
-                               if (cur_logical >= logical)
+                               if (cur_logical >= map->m_lblk)
                                         break;
                                 cur_logical++;
                         } while ((bh = bh->b_this_page) != head);
   
                         do {
-                               if (cur_logical >= logical + blocks)
+                               if (cur_logical >= map->m_lblk + blocks)
                                         break;
   
-                               if (buffer_delay(bh) ||
-                                               buffer_unwritten(bh)) {
+                               if (buffer_delay(bh) || buffer_unwritten(bh)) {
   
                                         BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
   
@@@ -2119,7 -2098,7 +2098,7 @@@
                                 } else if (buffer_mapped(bh))
                                         BUG_ON(bh->b_blocknr != pblock);
   
-                               if (buffer_uninit(exbh))
+                               if (map->m_flags & EXT4_MAP_UNINIT)
                                         set_buffer_uninit(bh);
                                 cur_logical++;
                                 pblock++;
@@@ -2130,21 -2109,6 +2109,6 @@@
   }
   
   
- /*
-  * __unmap_underlying_blocks - just a helper function to unmap
-  * set of blocks described by @bh
-  */
- static inline void __unmap_underlying_blocks(struct inode *inode,
-                                            struct buffer_head *bh)
- {
-       struct block_device *bdev = inode->i_sb->s_bdev;
-       int blocks, i;
- 
-       blocks = bh->b_size >> inode->i_blkbits;
-       for (i = 0; i < blocks; i++)
-               unmap_underlying_metadata(bdev, bh->b_blocknr + i);
- }
- 
   static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
                                         sector_t logical, long blk_cnt)
   {
@@@ -2206,7 -2170,7 +2170,7 @@@ static void ext4_print_free_blocks(stru
   static int mpage_da_map_blocks(struct mpage_da_data *mpd)
   {
         int err, blks, get_blocks_flags;
-       struct buffer_head new;
+       struct ext4_map_blocks map;
         sector_t next = mpd->b_blocknr;
         unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
         loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
@@@ -2247,15 -2211,15 +2211,15 @@@
          * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting
          * variables are updated after the blocks have been allocated.
          */
-       new.b_state = 0;
+       map.m_lblk = next;
+       map.m_len = max_blocks;
         get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
         if (ext4_should_dioread_nolock(mpd->inode))
                 get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
         if (mpd->b_state & (1 << BH_Delay))
                 get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
   
-       blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks,
-                              &new, get_blocks_flags);
+       blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags);
         if (blks < 0) {
                 err = blks;
                 /*
@@@ -2282,7 -2246,7 +2246,7 @@@
                 ext4_msg(mpd->inode->i_sb, KERN_CRIT,
                          "delayed block allocation failed for inode %lu at "
                          "logical offset %llu with max blocks %zd with "
-                        "error %d\n", mpd->inode->i_ino,
+                        "error %d", mpd->inode->i_ino,
                          (unsigned long long) next,
                          mpd->b_size >> mpd->inode->i_blkbits, err);
                 printk(KERN_CRIT "This should not happen!!  "
@@@ -2297,10 -2261,13 +2261,13 @@@
         }
         BUG_ON(blks == 0);
   
-       new.b_size = (blks << mpd->inode->i_blkbits);
+       if (map.m_flags & EXT4_MAP_NEW) {
+               struct block_device *bdev = mpd->inode->i_sb->s_bdev;
+               int i;
   
-       if (buffer_new(&new))
-               __unmap_underlying_blocks(mpd->inode, &new);
+               for (i = 0; i < map.m_len; i++)
+                       unmap_underlying_metadata(bdev, map.m_pblk + i);
+       }
   
         /*
          * If blocks are delayed marked, we need to
@@@ -2308,7 -2275,7 +2275,7 @@@
          */
         if ((mpd->b_state & (1 << BH_Delay)) ||
             (mpd->b_state & (1 << BH_Unwritten)))
-               mpage_put_bnr_to_bhs(mpd, next, &new);
+               mpage_put_bnr_to_bhs(mpd, &map);
   
         if (ext4_should_order_data(mpd->inode)) {
                 err = ext4_jbd2_file_inode(handle, mpd->inode);
@@@ -2349,8 -2316,17 +2316,17 @@@ static void mpage_add_bh_to_extent(stru
         sector_t next;
         int nrblocks = mpd->b_size >> mpd->inode->i_blkbits;
   
+       /*
+        * XXX Don't go larger than mballoc is willing to allocate
+        * This is a stopgap solution.  We eventually need to fold
+        * mpage_da_submit_io() into this function and then call
+        * ext4_get_blocks() multiple times in a loop
+        */
+       if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize)
+               goto flush_it;
+ 
         /* check if thereserved journal credits might overflow */
-       if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) {
+       if (!(ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS))) {
                 if (nrblocks >= EXT4_MAX_TRANS_DATA) {
                         /*
                          * With non-extent format we are limited by the journal
@@@ -2423,17 -2399,6 +2399,6 @@@ static int __mpage_da_writepage(struct 
         struct buffer_head *bh, *head;
         sector_t logical;
   
-       if (mpd->io_done) {
-               /*
-                * Rest of the page in the page_vec
-                * redirty then and skip then. We will
-                * try to write them again after
-                * starting a new transaction
-                */
-               redirty_page_for_writepage(wbc, page);
-               unlock_page(page);
-               return MPAGE_DA_EXTENT_TAIL;
-       }
         /*
          * Can we merge this page to current extent?
          */
@@@ -2528,8 -2493,9 +2493,9 @@@
    * initialized properly.
    */
   static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
-                                 struct buffer_head *bh_result, int create)
+                                 struct buffer_head *bh, int create)
   {
+       struct ext4_map_blocks map;
         int ret = 0;
         sector_t invalid_block = ~((sector_t) 0xffff);
   
@@@ -2537,16 -2503,22 +2503,22 @@@
                 invalid_block = ~0;
   
         BUG_ON(create == 0);
-       BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
+       BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
+ 
+       map.m_lblk = iblock;
+       map.m_len = 1;
   
         /*
          * first, we need to know whether the block is allocated already
          * preallocated blocks are unmapped but should treated
          * the same as allocated blocks.
          */
-       ret = ext4_get_blocks(NULL, inode, iblock, 1,  bh_result, 0);
-       if ((ret == 0) && !buffer_delay(bh_result)) {
-               /* the block isn't (pre)allocated yet, let's reserve space */
+       ret = ext4_map_blocks(NULL, inode, &map, 0);
+       if (ret < 0)
+               return ret;
+       if (ret == 0) {
+               if (buffer_delay(bh))
+                       return 0; /* Not sure this could or should happen */
                 /*
                  * XXX: __block_prepare_write() unmaps passed block,
                  * is it OK?
@@@ -2556,26 -2528,26 +2528,26 @@@
                         /* not enough space to reserve */
                         return ret;
   
-               map_bh(bh_result, inode->i_sb, invalid_block);
-               set_buffer_new(bh_result);
-               set_buffer_delay(bh_result);
-       } else if (ret > 0) {
-               bh_result->b_size = (ret << inode->i_blkbits);
-               if (buffer_unwritten(bh_result)) {
-                       /* A delayed write to unwritten bh should
-                        * be marked new and mapped.  Mapped ensures
-                        * that we don't do get_block multiple times
-                        * when we write to the same offset and new
-                        * ensures that we do proper zero out for
-                        * partial write.
-                        */
-                       set_buffer_new(bh_result);
-                       set_buffer_mapped(bh_result);
-               }
-               ret = 0;
+               map_bh(bh, inode->i_sb, invalid_block);
+               set_buffer_new(bh);
+               set_buffer_delay(bh);
+               return 0;
         }
   
-       return ret;
+       map_bh(bh, inode->i_sb, map.m_pblk);
+       bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
+ 
+       if (buffer_unwritten(bh)) {
+               /* A delayed write to unwritten bh should be marked
+                * new and mapped.  Mapped ensures that we don't do
+                * get_block multiple times when we write to the same
+                * offset and new ensures that we do proper zero out
+                * for partial write.
+                */
+               set_buffer_new(bh);
+               set_buffer_mapped(bh);
+       }
+       return 0;
   }
   
   /*
@@@ -2597,21 -2569,8 +2569,8 @@@
   static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
                                    struct buffer_head *bh_result, int create)
   {
-       int ret = 0;
-       unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
- 
         BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
- 
-       /*
-        * we don't want to do block allocation in writepage
-        * so call get_block_wrap with create = 0
-        */
-       ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0);
-       if (ret > 0) {
-               bh_result->b_size = (ret << inode->i_blkbits);
-               ret = 0;
-       }
-       return ret;
+       return _ext4_get_block(inode, iblock, bh_result, 0);
   }
   
   static int bget_one(handle_t *handle, struct buffer_head *bh)
@@@ -2821,13 -2780,131 +2780,131 @@@ static int ext4_da_writepages_trans_blo
          * number of contiguous block. So we will limit
          * number of contiguous block to a sane value
          */
-       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) &&
+       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) &&
             (max_blocks > EXT4_MAX_TRANS_DATA))
                 max_blocks = EXT4_MAX_TRANS_DATA;
   
         return ext4_chunk_trans_blocks(inode, max_blocks);
   }
   
+ /*
+  * write_cache_pages_da - walk the list of dirty pages of the given
+  * address space and call the callback function (which usually writes
+  * the pages).
+  *
+  * This is a forked version of write_cache_pages().  Differences:
+  *    Range cyclic is ignored.
+  *    no_nrwrite_index_update is always presumed true
+  */
+ static int write_cache_pages_da(struct address_space *mapping,
+                               struct writeback_control *wbc,
+                               struct mpage_da_data *mpd)
+ {
+       int ret = 0;
+       int done = 0;
+       struct pagevec pvec;
+       int nr_pages;
+       pgoff_t index;
+       pgoff_t end;            /* Inclusive */
+       long nr_to_write = wbc->nr_to_write;
+ 
+       pagevec_init(&pvec, 0);
+       index = wbc->range_start >> PAGE_CACHE_SHIFT;
+       end = wbc->range_end >> PAGE_CACHE_SHIFT;
+ 
+       while (!done && (index <= end)) {
+               int i;
+ 
+               nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+                             PAGECACHE_TAG_DIRTY,
+                             min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+               if (nr_pages == 0)
+                       break;
+ 
+               for (i = 0; i < nr_pages; i++) {
+                       struct page *page = pvec.pages[i];
+ 
+                       /*
+                        * At this point, the page may be truncated or
+                        * invalidated (changing page->mapping to NULL), or
+                        * even swizzled back from swapper_space to tmpfs file
+                        * mapping. However, page->index will not change
+                        * because we have a reference on the page.
+                        */
+                       if (page->index > end) {
+                               done = 1;
+                               break;
+                       }
+ 
+                       lock_page(page);
+ 
+                       /*
+                        * Page truncated or invalidated. We can freely skip it
+                        * then, even for data integrity operations: the page
+                        * has disappeared concurrently, so there could be no
+                        * real expectation of this data interity operation
+                        * even if there is now a new, dirty page at the same
+                        * pagecache address.
+                        */
+                       if (unlikely(page->mapping != mapping)) {
+ continue_unlock:
+                               unlock_page(page);
+                               continue;
+                       }
+ 
+                       if (!PageDirty(page)) {
+                               /* someone wrote it for us */
+                               goto continue_unlock;
+                       }
+ 
+                       if (PageWriteback(page)) {
+                               if (wbc->sync_mode != WB_SYNC_NONE)
+                                       wait_on_page_writeback(page);
+                               else
+                                       goto continue_unlock;
+                       }
+ 
+                       BUG_ON(PageWriteback(page));
+                       if (!clear_page_dirty_for_io(page))
+                               goto continue_unlock;
+ 
+                       ret = __mpage_da_writepage(page, wbc, mpd);
+                       if (unlikely(ret)) {
+                               if (ret == AOP_WRITEPAGE_ACTIVATE) {
+                                       unlock_page(page);
+                                       ret = 0;
+                               } else {
+                                       done = 1;
+                                       break;
+                               }
+                       }
+ 
+                       if (nr_to_write > 0) {
+                               nr_to_write--;
+                               if (nr_to_write == 0 &&
+                                   wbc->sync_mode == WB_SYNC_NONE) {
+                                       /*
+                                        * We stop writing back only if we are
+                                        * not doing integrity sync. In case of
+                                        * integrity sync we have to keep going
+                                        * because someone may be concurrently
+                                        * dirtying pages, and we might have
+                                        * synced a lot of newly appeared dirty
+                                        * pages, but have not synced all of the
+                                        * old dirty pages.
+                                        */
+                                       done = 1;
+                                       break;
+                               }
+                       }
+               }
+               pagevec_release(&pvec);
+               cond_resched();
+       }
+       return ret;
+ }
+ 
+ 
   static int ext4_da_writepages(struct address_space *mapping,
                               struct writeback_control *wbc)
   {
@@@ -2836,7 -2913,6 +2913,6 @@@
         handle_t *handle = NULL;
         struct mpage_da_data mpd;
         struct inode *inode = mapping->host;
-       int no_nrwrite_index_update;
         int pages_written = 0;
         long pages_skipped;
         unsigned int max_pages;
@@@ -2916,12 -2992,6 +2992,6 @@@
         mpd.wbc = wbc;
         mpd.inode = mapping->host;
   
-       /*
-        * we don't want write_cache_pages to update
-        * nr_to_write and writeback_index
-        */
-       no_nrwrite_index_update = wbc->no_nrwrite_index_update;
-       wbc->no_nrwrite_index_update = 1;
         pages_skipped = wbc->pages_skipped;
   
   retry:
@@@ -2941,7 -3011,7 +3011,7 @@@
                 if (IS_ERR(handle)) {
                         ret = PTR_ERR(handle);
                         ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
-                              "%ld pages, ino %lu; err %d\n", __func__,
+                              "%ld pages, ino %lu; err %d", __func__,
                                 wbc->nr_to_write, inode->i_ino, ret);
                         goto out_writepages;
                 }
@@@ -2963,8 -3033,7 +3033,7 @@@
                 mpd.io_done = 0;
                 mpd.pages_written = 0;
                 mpd.retval = 0;
-               ret = write_cache_pages(mapping, wbc, __mpage_da_writepage,
-                                       &mpd);
+               ret = write_cache_pages_da(mapping, wbc, &mpd);
                 /*
                  * If we have a contiguous extent of pages and we
                  * haven't done the I/O yet, map the blocks and submit
@@@ -3016,7 -3085,7 +3085,7 @@@
         if (pages_skipped != wbc->pages_skipped)
                 ext4_msg(inode->i_sb, KERN_CRIT,
                          "This should not happen leaving %s "
-                        "with nr_to_write = %ld ret = %d\n",
+                        "with nr_to_write = %ld ret = %d",
                          __func__, wbc->nr_to_write, ret);
   
         /* Update index */
@@@ -3030,8 -3099,6 +3099,6 @@@
                 mapping->writeback_index = index;
   
   out_writepages:
-       if (!no_nrwrite_index_update)
-               wbc->no_nrwrite_index_update = 0;
         wbc->nr_to_write -= nr_to_writebump;
         wbc->range_start = range_start;
         trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
@@@ -3076,7 -3143,7 +3143,7 @@@ static int ext4_da_write_begin(struct f
                                loff_t pos, unsigned len, unsigned flags,
                                struct page **pagep, void **fsdata)
   {
-       int ret, retries = 0, quota_retries = 0;
+       int ret, retries = 0;
         struct page *page;
         pgoff_t index;
         unsigned from, to;
@@@ -3135,22 -3202,6 +3202,6 @@@ retry
   
         if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
                 goto retry;
- 
-       if ((ret == -EDQUOT) &&
-           EXT4_I(inode)->i_reserved_meta_blocks &&
-           (quota_retries++ < 3)) {
-               /*
-                * Since we often over-estimate the number of meta
-                * data blocks required, we may sometimes get a
-                * spurios out of quota error even though there would
-                * be enough space once we write the data blocks and
-                * find out how many meta data blocks were _really_
-                * required.  So try forcing the inode write to see if
-                * that helps.
-                */
-               write_inode_now(inode, (quota_retries == 3));
-               goto retry;
-       }
   out:
         return ret;
   }
@@@ -3546,46 -3597,18 +3597,18 @@@ out
         return ret;
   }
   
+ /*
+  * ext4_get_block used when preparing for a DIO write or buffer write.
+  * We allocate an uinitialized extent if blocks haven't been allocated.
+  * The extent will be converted to initialized after the IO is complete.
+  */
   static int ext4_get_block_write(struct inode *inode, sector_t iblock,
                    struct buffer_head *bh_result, int create)
   {
-       handle_t *handle = ext4_journal_current_handle();
-       int ret = 0;
-       unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
-       int dio_credits;
-       int started = 0;
- 
         ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
                    inode->i_ino, create);
-       /*
-        * ext4_get_block in prepare for a DIO write or buffer write.
-        * We allocate an uinitialized extent if blocks haven't been allocated.
-        * The extent will be converted to initialized after IO complete.
-        */
-       create = EXT4_GET_BLOCKS_IO_CREATE_EXT;
- 
-       if (!handle) {
-               if (max_blocks > DIO_MAX_BLOCKS)
-                       max_blocks = DIO_MAX_BLOCKS;
-               dio_credits = ext4_chunk_trans_blocks(inode, max_blocks);
-               handle = ext4_journal_start(inode, dio_credits);
-               if (IS_ERR(handle)) {
-                       ret = PTR_ERR(handle);
-                       goto out;
-               }
-               started = 1;
-       }
- 
-       ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result,
-                             create);
-       if (ret > 0) {
-               bh_result->b_size = (ret << inode->i_blkbits);
-               ret = 0;
-       }
-       if (started)
-               ext4_journal_stop(handle);
- out:
-       return ret;
+       return _ext4_get_block(inode, iblock, bh_result,
+                              EXT4_GET_BLOCKS_IO_CREATE_EXT);
   }
   
   static void dump_completed_IO(struct inode * inode)
@@@ -3973,7 -3996,7 +3996,7 @@@ static ssize_t ext4_direct_IO(int rw, s
         struct file *file = iocb->ki_filp;
         struct inode *inode = file->f_mapping->host;
   
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                 return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
   
         return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
@@@ -4302,10 -4325,9 +4325,9 @@@ static int ext4_clear_blocks(handle_t *
   
         if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
                                    count)) {
-               ext4_error(inode->i_sb, "inode #%lu: "
-                          "attempt to clear blocks %llu len %lu, invalid",
-                          inode->i_ino, (unsigned long long) block_to_free,
-                          count);
+               EXT4_ERROR_INODE(inode, "attempt to clear invalid "
+                                "blocks %llu len %lu",
+                                (unsigned long long) block_to_free, count);
                 return 1;
         }
   
@@@ -4410,11 -4432,10 +4432,10 @@@ static void ext4_free_data(handle_t *ha
                 if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh))
                         ext4_handle_dirty_metadata(handle, inode, this_bh);
                 else
-                       ext4_error(inode->i_sb,
-                                  "circular indirect block detected, "
-                                  "inode=%lu, block=%llu",
-                                  inode->i_ino,
-                                  (unsigned long long) this_bh->b_blocknr);
+                       EXT4_ERROR_INODE(inode,
+                                        "circular indirect block detected at "
+                                        "block %llu",
+                               (unsigned long long) this_bh->b_blocknr);
         }
   }
   
@@@ -4452,11 -4473,10 +4473,10 @@@ static void ext4_free_branches(handle_
   
                         if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
                                                    nr, 1)) {
-                               ext4_error(inode->i_sb,
-                                          "indirect mapped block in inode "
-                                          "#%lu invalid (level %d, blk #%lu)",
-                                          inode->i_ino, depth,
-                                          (unsigned long) nr);
+                               EXT4_ERROR_INODE(inode,
+                                                "invalid indirect mapped "
+                                                "block %lu (level %d)",
+                                                (unsigned long) nr, depth);
                                 break;
                         }
   
@@@ -4468,9 -4488,9 +4488,9 @@@
                          * (should be rare).
                          */
                         if (!bh) {
-                               ext4_error(inode->i_sb,
-                                          "Read failure, inode=%lu, block=%llu",
-                                          inode->i_ino, nr);
+                               EXT4_ERROR_INODE(inode,
+                                                "Read failure block=%llu",
+                                                (unsigned long long) nr);
                                 continue;
                         }
   
@@@ -4612,12 -4632,12 +4632,12 @@@ void ext4_truncate(struct inode *inode
         if (!ext4_can_truncate(inode))
                 return;
   
-       EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL;
+       ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
   
         if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
                 ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
   
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                 ext4_ext_truncate(inode);
                 return;
         }
@@@ -4785,8 -4805,8 +4805,8 @@@ static int __ext4_get_inode_loc(struct 
   
         bh = sb_getblk(sb, block);
         if (!bh) {
-               ext4_error(sb, "unable to read inode block - "
-                          "inode=%lu, block=%llu", inode->i_ino, block);
+               EXT4_ERROR_INODE(inode, "unable to read inode block - "
+                                "block %llu", block);
                 return -EIO;
         }
         if (!buffer_uptodate(bh)) {
@@@ -4884,8 -4904,8 +4904,8 @@@ make_io
                 submit_bh(READ_META, bh);
                 wait_on_buffer(bh);
                 if (!buffer_uptodate(bh)) {
-                       ext4_error(sb, "unable to read inode block - inode=%lu,"
-                                  " block=%llu", inode->i_ino, block);
+                       EXT4_ERROR_INODE(inode, "unable to read inode "
+                                        "block %llu", block);
                         brelse(bh);
                         return -EIO;
                 }
@@@ -5096,8 -5116,8 +5116,8 @@@ struct inode *ext4_iget(struct super_bl
         ret = 0;
         if (ei->i_file_acl &&
             !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
-               ext4_error(sb, "bad extended attribute block %llu inode #%lu",
-                          ei->i_file_acl, inode->i_ino);
+               EXT4_ERROR_INODE(inode, "bad extended attribute block %llu",
+                                ei->i_file_acl);
                 ret = -EIO;
                 goto bad_inode;
         } else if (ei->i_flags & EXT4_EXTENTS_FL) {
@@@ -5142,8 -5162,7 +5162,7 @@@
                            new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
         } else {
                 ret = -EIO;
-               ext4_error(inode->i_sb, "bogus i_mode (%o) for inode=%lu",
-                          inode->i_mode, inode->i_ino);
+               EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode);
                 goto bad_inode;
         }
         brelse(iloc.bh);
@@@ -5381,9 -5400,9 +5400,9 @@@ int ext4_write_inode(struct inode *inod
                 if (wbc->sync_mode == WB_SYNC_ALL)
                         sync_dirty_buffer(iloc.bh);
                 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
-                       ext4_error(inode->i_sb, "IO error syncing inode, "
-                                  "inode=%lu, block=%llu", inode->i_ino,
-                                  (unsigned long long)iloc.bh->b_blocknr);
+                       EXT4_ERROR_INODE(inode,
+                               "IO error syncing inode (block=%llu)",
+                               (unsigned long long) iloc.bh->b_blocknr);
                         err = -EIO;
                 }
                 brelse(iloc.bh);
@@@ -5425,7 -5444,7 +5444,7 @@@ int ext4_setattr(struct dentry *dentry
         if (error)
                 return error;
   
- -      if (ia_valid & ATTR_SIZE)
+ +      if (is_quota_modification(inode, attr))
                 dquot_initialize(inode);
         if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
                 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
@@@ -5455,7 -5474,7 +5474,7 @@@
         }
   
         if (attr->ia_valid & ATTR_SIZE) {
-               if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) {
+               if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
                         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
   
                         if (attr->ia_size > sbi->s_bitmap_maxbytes) {
@@@ -5468,7 -5487,7 +5487,7 @@@
         if (S_ISREG(inode->i_mode) &&
             attr->ia_valid & ATTR_SIZE &&
             (attr->ia_size < inode->i_size ||
-            (EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) {
+            (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))) {
                 handle_t *handle;
   
                 handle = ext4_journal_start(inode, 3);
@@@ -5500,7 -5519,7 +5519,7 @@@
                         }
                 }
                 /* ext4_truncate will clear the flag */
-               if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))
+               if ((ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))
                         ext4_truncate(inode);
         }
   
@@@ -5576,7 -5595,7 +5595,7 @@@ static int ext4_indirect_trans_blocks(s
   
   static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
   {
-       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                 return ext4_indirect_trans_blocks(inode, nrblocks, chunk);
         return ext4_ext_index_trans_blocks(inode, nrblocks, chunk);
   }
@@@ -5911,9 -5930,9 +5930,9 @@@ int ext4_change_inode_journal_flag(stru
          */
   
         if (val)
-               EXT4_I(inode)->i_flags |= EXT4_JOURNAL_DATA_FL;
+               ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
         else
-               EXT4_I(inode)->i_flags &= ~EXT4_JOURNAL_DATA_FL;
+               ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
         ext4_set_aops(inode);
   
         jbd2_journal_unlock_updates(journal);
diff --combined fs/ext4/xattr.c

index 2de0e95,47431bc..0433800
--- 1/fs/ext4/xattr.c
--- 2/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@@ -97,7 -97,7 +97,7 @@@ static int ext4_xattr_list(struct dentr
   
   static struct mb_cache *ext4_xattr_cache;
   
- -static struct xattr_handler *ext4_xattr_handler_map[] = {
+ +static const struct xattr_handler *ext4_xattr_handler_map[] = {
         [EXT4_XATTR_INDEX_USER]              = &ext4_xattr_user_handler,
   #ifdef CONFIG_EXT4_FS_POSIX_ACL
         [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS]  = &ext4_xattr_acl_access_handler,
@@@ -109,7 -109,7 +109,7 @@@
   #endif
   };
   
- -struct xattr_handler *ext4_xattr_handlers[] = {
+ +const struct xattr_handler *ext4_xattr_handlers[] = {
         &ext4_xattr_user_handler,
         &ext4_xattr_trusted_handler,
   #ifdef CONFIG_EXT4_FS_POSIX_ACL
@@@ -122,10 -122,10 +122,10 @@@
         NULL
   };
   
- -static inline struct xattr_handler *
+ +static inline const struct xattr_handler *
   ext4_xattr_handler(int name_index)
   {
- -      struct xattr_handler *handler = NULL;
+ +      const struct xattr_handler *handler = NULL;
   
         if (name_index > 0 && name_index < ARRAY_SIZE(ext4_xattr_handler_map))
                 handler = ext4_xattr_handler_map[name_index];
@@@ -228,9 -228,8 +228,8 @@@ ext4_xattr_block_get(struct inode *inod
                 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
         if (ext4_xattr_check_block(bh)) {
   bad_block:
-               ext4_error(inode->i_sb,
-                          "inode %lu: bad block %llu", inode->i_ino,
-                          EXT4_I(inode)->i_file_acl);
+               EXT4_ERROR_INODE(inode, "bad block %llu",
+                                EXT4_I(inode)->i_file_acl);
                 error = -EIO;
                 goto cleanup;
         }
@@@ -332,7 -331,7 +331,7 @@@ ext4_xattr_list_entries(struct dentry *
         size_t rest = buffer_size;
   
         for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
- -              struct xattr_handler *handler =
+ +              const struct xattr_handler *handler =
                         ext4_xattr_handler(entry->e_name_index);
   
                 if (handler) {
@@@ -372,9 -371,8 +371,8 @@@ ext4_xattr_block_list(struct dentry *de
         ea_bdebug(bh, "b_count=%d, refcount=%d",
                 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
         if (ext4_xattr_check_block(bh)) {
-               ext4_error(inode->i_sb,
-                          "inode %lu: bad block %llu", inode->i_ino,
-                          EXT4_I(inode)->i_file_acl);
+               EXT4_ERROR_INODE(inode, "bad block %llu",
+                                EXT4_I(inode)->i_file_acl);
                 error = -EIO;
                 goto cleanup;
         }
@@@ -666,8 -664,8 +664,8 @@@ ext4_xattr_block_find(struct inode *ino
                         atomic_read(&(bs->bh->b_count)),
                         le32_to_cpu(BHDR(bs->bh)->h_refcount));
                 if (ext4_xattr_check_block(bs->bh)) {
-                       ext4_error(sb, "inode %lu: bad block %llu",
-                                  inode->i_ino, EXT4_I(inode)->i_file_acl);
+                       EXT4_ERROR_INODE(inode, "bad block %llu",
+                                        EXT4_I(inode)->i_file_acl);
                         error = -EIO;
                         goto cleanup;
                 }
@@@ -820,7 -818,7 +818,7 @@@ inserted
                                                 EXT4_I(inode)->i_block_group);
   
                         /* non-extent files can't have physical blocks past 2^32 */
-                       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+                       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                                 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
   
                         block = ext4_new_meta_blocks(handle, inode,
@@@ -828,7 -826,7 +826,7 @@@
                         if (error)
                                 goto cleanup;
   
-                       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+                       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                                 BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS);
   
                         ea_idebug(inode, "creating block %d", block);
@@@ -880,8 -878,8 +878,8 @@@ cleanup_dquot
         goto cleanup;
   
   bad_block:
-       ext4_error(inode->i_sb, "inode %lu: bad block %llu",
-                  inode->i_ino, EXT4_I(inode)->i_file_acl);
+       EXT4_ERROR_INODE(inode, "bad block %llu",
+                        EXT4_I(inode)->i_file_acl);
         goto cleanup;
   
   #undef header
@@@ -1194,8 -1192,8 +1192,8 @@@ retry
                 if (!bh)
                         goto cleanup;
                 if (ext4_xattr_check_block(bh)) {
-                       ext4_error(inode->i_sb, "inode %lu: bad block %llu",
-                                  inode->i_ino, EXT4_I(inode)->i_file_acl);
+                       EXT4_ERROR_INODE(inode, "bad block %llu",
+                                        EXT4_I(inode)->i_file_acl);
                         error = -EIO;
                         goto cleanup;
                 }
@@@ -1372,14 -1370,14 +1370,14 @@@ ext4_xattr_delete_inode(handle_t *handl
                 goto cleanup;
         bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
         if (!bh) {
-               ext4_error(inode->i_sb, "inode %lu: block %llu read error",
-                          inode->i_ino, EXT4_I(inode)->i_file_acl);
+               EXT4_ERROR_INODE(inode, "block %llu read error",
+                                EXT4_I(inode)->i_file_acl);
                 goto cleanup;
         }
         if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
             BHDR(bh)->h_blocks != cpu_to_le32(1)) {
-               ext4_error(inode->i_sb, "inode %lu: bad block %llu",
-                          inode->i_ino, EXT4_I(inode)->i_file_acl);
+               EXT4_ERROR_INODE(inode, "bad block %llu",
+                                EXT4_I(inode)->i_file_acl);
                 goto cleanup;
         }
         ext4_xattr_release_block(handle, inode, bh);
@@@ -1504,9 -1502,8 +1502,8 @@@ again
                 }
                 bh = sb_bread(inode->i_sb, ce->e_block);
                 if (!bh) {
-                       ext4_error(inode->i_sb,
-                               "inode %lu: block %lu read error",
-                               inode->i_ino, (unsigned long) ce->e_block);
+                       EXT4_ERROR_INODE(inode, "block %lu read error",
+                                        (unsigned long) ce->e_block);
                 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
                                 EXT4_XATTR_REFCOUNT_MAX) {
                         ea_idebug(inode, "block %lu refcount %d>=%d",
diff --combined fs/quota/dquot.c

index 655a4c5,df6832e..1ad8bf0
--- 1/fs/quota/dquot.c
--- 2/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@@ -82,7 -82,7 +82,7 @@@
   
   /*
    * There are three quota SMP locks. dq_list_lock protects all lists with quotas
- - * and quota formats, dqstats structure containing statistics about the lists
+ + * and quota formats.
    * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and
    * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes.
    * i_blocks and i_bytes updates itself are guarded by i_lock acquired directly
@@@ -132,9 -132,7 +132,9 @@@ static __cacheline_aligned_in_smp DEFIN
   __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
   EXPORT_SYMBOL(dq_data_lock);
   
+ +#if defined(CONFIG_QUOTA_DEBUG) || defined(CONFIG_PRINT_QUOTA_WARNING)
   static char *quotatypes[] = INITQFNAMES;
+ +#endif
   static struct quota_format_type *quota_formats;       /* List of registered formats */
   static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES;
   
@@@ -228,10 -226,6 +228,10 @@@ static struct hlist_head *dquot_hash
   
   struct dqstats dqstats;
   EXPORT_SYMBOL(dqstats);
+ +#ifdef CONFIG_SMP
+ +struct dqstats *dqstats_pcpu;
+ +EXPORT_SYMBOL(dqstats_pcpu);
+ +#endif
   
   static qsize_t inode_get_rsv_space(struct inode *inode);
   static void __dquot_initialize(struct inode *inode, int type);
@@@ -279,7 -273,7 +279,7 @@@ static struct dquot *find_dquot(unsigne
   static inline void put_dquot_last(struct dquot *dquot)
   {
         list_add_tail(&dquot->dq_free, &free_dquots);
- -      dqstats.free_dquots++;
+ +      dqstats_inc(DQST_FREE_DQUOTS);
   }
   
   static inline void remove_free_dquot(struct dquot *dquot)
@@@ -287,7 -281,7 +287,7 @@@
         if (list_empty(&dquot->dq_free))
                 return;
         list_del_init(&dquot->dq_free);
- -      dqstats.free_dquots--;
+ +      dqstats_dec(DQST_FREE_DQUOTS);
   }
   
   static inline void put_inuse(struct dquot *dquot)
@@@ -295,12 -289,12 +295,12 @@@
         /* We add to the back of inuse list so we don't have to restart
          * when traversing this list and we block */
         list_add_tail(&dquot->dq_inuse, &inuse_list);
- -      dqstats.allocated_dquots++;
+ +      dqstats_inc(DQST_ALLOC_DQUOTS);
   }
   
   static inline void remove_inuse(struct dquot *dquot)
   {
- -      dqstats.allocated_dquots--;
+ +      dqstats_dec(DQST_ALLOC_DQUOTS);
         list_del(&dquot->dq_inuse);
   }
   /*
@@@ -323,23 -317,14 +323,23 @@@ static inline int mark_dquot_dirty(stru
         return dquot->dq_sb->dq_op->mark_dirty(dquot);
   }
   
+ +/* Mark dquot dirty in atomic manner, and return it's old dirty flag state */
   int dquot_mark_dquot_dirty(struct dquot *dquot)
   {
+ +      int ret = 1;
+ +
+ +      /* If quota is dirty already, we don't have to acquire dq_list_lock */
+ +      if (test_bit(DQ_MOD_B, &dquot->dq_flags))
+ +              return 1;
+ +
         spin_lock(&dq_list_lock);
- -      if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags))
+ +      if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) {
                 list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)->
                                 info[dquot->dq_type].dqi_dirty_list);
+ +              ret = 0;
+ +      }
         spin_unlock(&dq_list_lock);
- -      return 0;
+ +      return ret;
   }
   EXPORT_SYMBOL(dquot_mark_dquot_dirty);
   
@@@ -565,8 -550,8 +565,8 @@@ int dquot_scan_active(struct super_bloc
                         continue;
                 /* Now we have active dquot so we can just increase use count */
                 atomic_inc(&dquot->dq_count);
- -              dqstats.lookups++;
                 spin_unlock(&dq_list_lock);
+ +              dqstats_inc(DQST_LOOKUPS);
                 dqput(old_dquot);
                 old_dquot = dquot;
                 ret = fn(dquot, priv);
@@@ -611,8 -596,8 +611,8 @@@ int vfs_quota_sync(struct super_block *
                          * holding reference so we can safely just increase
                          * use count */
                         atomic_inc(&dquot->dq_count);
- -                      dqstats.lookups++;
                         spin_unlock(&dq_list_lock);
+ +                      dqstats_inc(DQST_LOOKUPS);
                         sb->dq_op->write_dquot(dquot);
                         dqput(dquot);
                         spin_lock(&dq_list_lock);
@@@ -624,7 -609,9 +624,7 @@@
                 if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt)
                     && info_dirty(&dqopt->info[cnt]))
                         sb->dq_op->write_info(sb, cnt);
- -      spin_lock(&dq_list_lock);
- -      dqstats.syncs++;
- -      spin_unlock(&dq_list_lock);
+ +      dqstats_inc(DQST_SYNCS);
         mutex_unlock(&dqopt->dqonoff_mutex);
   
         if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE))
@@@ -676,22 -663,6 +676,22 @@@ static void prune_dqcache(int count
         }
   }
   
+ +static int dqstats_read(unsigned int type)
+ +{
+ +      int count = 0;
+ +#ifdef CONFIG_SMP
+ +      int cpu;
+ +      for_each_possible_cpu(cpu)
+ +              count += per_cpu_ptr(dqstats_pcpu, cpu)->stat[type];
+ +      /* Statistics reading is racy, but absolute accuracy isn't required */
+ +      if (count < 0)
+ +              count = 0;
+ +#else
+ +      count = dqstats.stat[type];
+ +#endif
+ +      return count;
+ +}
+ +
   /*
    * This is called from kswapd when we think we need some
    * more memory
@@@ -704,7 -675,7 +704,7 @@@ static int shrink_dqcache_memory(int nr
                 prune_dqcache(nr);
                 spin_unlock(&dq_list_lock);
         }
- -      return (dqstats.free_dquots / 100) * sysctl_vfs_cache_pressure;
+ +      return (dqstats_read(DQST_FREE_DQUOTS)/100) * sysctl_vfs_cache_pressure;
   }
   
   static struct shrinker dqcache_shrinker = {
@@@ -732,7 -703,10 +732,7 @@@ void dqput(struct dquot *dquot
                 BUG();
         }
   #endif
- -      
- -      spin_lock(&dq_list_lock);
- -      dqstats.drops++;
- -      spin_unlock(&dq_list_lock);
+ +      dqstats_inc(DQST_DROPS);
   we_slept:
         spin_lock(&dq_list_lock);
         if (atomic_read(&dquot->dq_count) > 1) {
@@@ -849,15 -823,15 +849,15 @@@ we_slept
                 put_inuse(dquot);
                 /* hash it first so it can be found */
                 insert_dquot_hash(dquot);
- -              dqstats.lookups++;
                 spin_unlock(&dq_list_lock);
+ +              dqstats_inc(DQST_LOOKUPS);
         } else {
                 if (!atomic_read(&dquot->dq_count))
                         remove_free_dquot(dquot);
                 atomic_inc(&dquot->dq_count);
- -              dqstats.cache_hits++;
- -              dqstats.lookups++;
                 spin_unlock(&dq_list_lock);
+ +              dqstats_inc(DQST_CACHE_HITS);
+ +              dqstats_inc(DQST_LOOKUPS);
         }
         /* Wait for dq_lock - after this we know that either dquot_release() is
          * already finished or it will be canceled due to dq_count > 1 test */
@@@ -1514,11 -1488,13 +1514,13 @@@ static void inode_decr_space(struct ino
   /*
    * This operation can block, but only after everything is updated
    */
- int __dquot_alloc_space(struct inode *inode, qsize_t number,
-               int warn, int reserve)
+ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
   {
         int cnt, ret = 0;
         char warntype[MAXQUOTAS];
+       int warn = flags & DQUOT_SPACE_WARN;
+       int reserve = flags & DQUOT_SPACE_RESERVE;
+       int nofail = flags & DQUOT_SPACE_NOFAIL;
   
         /*
          * First test before acquiring mutex - solves deadlocks when we
@@@ -1539,7 -1515,7 +1541,7 @@@
                         continue;
                 ret = check_bdq(inode->i_dquot[cnt], number, !warn,
                                 warntype+cnt);
-               if (ret) {
+               if (ret && !nofail) {
                         spin_unlock(&dq_data_lock);
                         goto out_flush_warn;
                 }
@@@ -1638,10 -1614,11 +1640,11 @@@ EXPORT_SYMBOL(dquot_claim_space_nodirty
   /*
    * This operation can block, but only after everything is updated
    */
- void __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
+ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
   {
         unsigned int cnt;
         char warntype[MAXQUOTAS];
+       int reserve = flags & DQUOT_SPACE_RESERVE;
   
         /* First test before acquiring mutex - solves deadlocks when we
            * re-enter the quota code and are already holding the mutex */
@@@ -1703,19 -1680,16 +1706,19 @@@ EXPORT_SYMBOL(dquot_free_inode)
   
   /*
    * Transfer the number of inode and blocks from one diskquota to an other.
+ + * On success, dquot references in transfer_to are consumed and references
+ + * to original dquots that need to be released are placed there. On failure,
+ + * references are kept untouched.
    *
    * This operation can block, but only after everything is updated
    * A transaction must be started when entering this function.
+ + *
    */
- -static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask)
+ +int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
   {
         qsize_t space, cur_space;
         qsize_t rsv_space = 0;
- -      struct dquot *transfer_from[MAXQUOTAS];
- -      struct dquot *transfer_to[MAXQUOTAS];
+ +      struct dquot *transfer_from[MAXQUOTAS] = {};
         int cnt, ret = 0;
         char warntype_to[MAXQUOTAS];
         char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
@@@ -1725,12 -1699,19 +1728,12 @@@
         if (IS_NOQUOTA(inode))
                 return 0;
         /* Initialize the arrays */
- -      for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- -              transfer_from[cnt] = NULL;
- -              transfer_to[cnt] = NULL;
+ +      for (cnt = 0; cnt < MAXQUOTAS; cnt++)
                 warntype_to[cnt] = QUOTA_NL_NOWARN;
- -      }
- -      for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- -              if (mask & (1 << cnt))
- -                      transfer_to[cnt] = dqget(inode->i_sb, chid[cnt], cnt);
- -      }
         down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
         if (IS_NOQUOTA(inode)) {        /* File without quota accounting? */
                 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
- -              goto put_all;
+ +              return 0;
         }
         spin_lock(&dq_data_lock);
         cur_space = inode_get_bytes(inode);
@@@ -1782,41 -1763,47 +1785,41 @@@
   
         mark_all_dquot_dirty(transfer_from);
         mark_all_dquot_dirty(transfer_to);
- -      /* The reference we got is transferred to the inode */
+ +      /* Pass back references to put */
         for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- -              transfer_to[cnt] = NULL;
- -warn_put_all:
+ +              transfer_to[cnt] = transfer_from[cnt];
+ +warn:
         flush_warnings(transfer_to, warntype_to);
         flush_warnings(transfer_from, warntype_from_inodes);
         flush_warnings(transfer_from, warntype_from_space);
- -put_all:
- -      dqput_all(transfer_from);
- -      dqput_all(transfer_to);
         return ret;
   over_quota:
         spin_unlock(&dq_data_lock);
         up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
- -      /* Clear dquot pointers we don't want to dqput() */
- -      for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- -              transfer_from[cnt] = NULL;
- -      goto warn_put_all;
+ +      goto warn;
   }
+ +EXPORT_SYMBOL(__dquot_transfer);
   
   /* Wrapper for transferring ownership of an inode for uid/gid only
    * Called from FSXXX_setattr()
    */
   int dquot_transfer(struct inode *inode, struct iattr *iattr)
   {
- -      qid_t chid[MAXQUOTAS];
- -      unsigned long mask = 0;
+ +      struct dquot *transfer_to[MAXQUOTAS] = {};
+ +      struct super_block *sb = inode->i_sb;
+ +      int ret;
   
- -      if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) {
- -              mask |= 1 << USRQUOTA;
- -              chid[USRQUOTA] = iattr->ia_uid;
- -      }
- -      if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) {
- -              mask |= 1 << GRPQUOTA;
- -              chid[GRPQUOTA] = iattr->ia_gid;
- -      }
- -      if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) {
- -              dquot_initialize(inode);
- -              return __dquot_transfer(inode, chid, mask);
- -      }
- -      return 0;
+ +      if (!sb_any_quota_active(sb) || IS_NOQUOTA(inode))
+ +              return 0;
+ +
+ +      if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid)
+ +              transfer_to[USRQUOTA] = dqget(sb, iattr->ia_uid, USRQUOTA);
+ +      if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)
+ +              transfer_to[GRPQUOTA] = dqget(sb, iattr->ia_uid, GRPQUOTA);
+ +
+ +      ret = __dquot_transfer(inode, transfer_to);
+ +      dqput_all(transfer_to);
+ +      return ret;
   }
   EXPORT_SYMBOL(dquot_transfer);
   
@@@ -2291,30 -2278,25 +2294,30 @@@ static inline qsize_t stoqb(qsize_t spa
   }
   
   /* Generic routine for getting common part of quota structure */
- -static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di)
+ +static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
   {
         struct mem_dqblk *dm = &dquot->dq_dqb;
   
+ +      memset(di, 0, sizeof(*di));
+ +      di->d_version = FS_DQUOT_VERSION;
+ +      di->d_flags = dquot->dq_type == USRQUOTA ?
+ +                      XFS_USER_QUOTA : XFS_GROUP_QUOTA;
+ +      di->d_id = dquot->dq_id;
+ +
         spin_lock(&dq_data_lock);
- -      di->dqb_bhardlimit = stoqb(dm->dqb_bhardlimit);
- -      di->dqb_bsoftlimit = stoqb(dm->dqb_bsoftlimit);
- -      di->dqb_curspace = dm->dqb_curspace + dm->dqb_rsvspace;
- -      di->dqb_ihardlimit = dm->dqb_ihardlimit;
- -      di->dqb_isoftlimit = dm->dqb_isoftlimit;
- -      di->dqb_curinodes = dm->dqb_curinodes;
- -      di->dqb_btime = dm->dqb_btime;
- -      di->dqb_itime = dm->dqb_itime;
- -      di->dqb_valid = QIF_ALL;
+ +      di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit);
+ +      di->d_blk_softlimit = stoqb(dm->dqb_bsoftlimit);
+ +      di->d_ino_hardlimit = dm->dqb_ihardlimit;
+ +      di->d_ino_softlimit = dm->dqb_isoftlimit;
+ +      di->d_bcount = dm->dqb_curspace + dm->dqb_rsvspace;
+ +      di->d_icount = dm->dqb_curinodes;
+ +      di->d_btimer = dm->dqb_btime;
+ +      di->d_itimer = dm->dqb_itime;
         spin_unlock(&dq_data_lock);
   }
   
   int vfs_get_dqblk(struct super_block *sb, int type, qid_t id,
- -                struct if_dqblk *di)
+ +                struct fs_disk_quota *di)
   {
         struct dquot *dquot;
   
@@@ -2328,70 -2310,51 +2331,70 @@@
   }
   EXPORT_SYMBOL(vfs_get_dqblk);
   
+ +#define VFS_FS_DQ_MASK \
+ +      (FS_DQ_BCOUNT | FS_DQ_BSOFT | FS_DQ_BHARD | \
+ +       FS_DQ_ICOUNT | FS_DQ_ISOFT | FS_DQ_IHARD | \
+ +       FS_DQ_BTIMER | FS_DQ_ITIMER)
+ +
   /* Generic routine for setting common part of quota structure */
- -static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
+ +static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
   {
         struct mem_dqblk *dm = &dquot->dq_dqb;
         int check_blim = 0, check_ilim = 0;
         struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type];
   
- -      if ((di->dqb_valid & QIF_BLIMITS &&
- -           (di->dqb_bhardlimit > dqi->dqi_maxblimit ||
- -            di->dqb_bsoftlimit > dqi->dqi_maxblimit)) ||
- -          (di->dqb_valid & QIF_ILIMITS &&
- -           (di->dqb_ihardlimit > dqi->dqi_maxilimit ||
- -            di->dqb_isoftlimit > dqi->dqi_maxilimit)))
+ +      if (di->d_fieldmask & ~VFS_FS_DQ_MASK)
+ +              return -EINVAL;
+ +
+ +      if (((di->d_fieldmask & FS_DQ_BSOFT) &&
+ +           (di->d_blk_softlimit > dqi->dqi_maxblimit)) ||
+ +          ((di->d_fieldmask & FS_DQ_BHARD) &&
+ +           (di->d_blk_hardlimit > dqi->dqi_maxblimit)) ||
+ +          ((di->d_fieldmask & FS_DQ_ISOFT) &&
+ +           (di->d_ino_softlimit > dqi->dqi_maxilimit)) ||
+ +          ((di->d_fieldmask & FS_DQ_IHARD) &&
+ +           (di->d_ino_hardlimit > dqi->dqi_maxilimit)))
                 return -ERANGE;
   
         spin_lock(&dq_data_lock);
- -      if (di->dqb_valid & QIF_SPACE) {
- -              dm->dqb_curspace = di->dqb_curspace - dm->dqb_rsvspace;
+ +      if (di->d_fieldmask & FS_DQ_BCOUNT) {
+ +              dm->dqb_curspace = di->d_bcount - dm->dqb_rsvspace;
                 check_blim = 1;
                 set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags);
         }
- -      if (di->dqb_valid & QIF_BLIMITS) {
- -              dm->dqb_bsoftlimit = qbtos(di->dqb_bsoftlimit);
- -              dm->dqb_bhardlimit = qbtos(di->dqb_bhardlimit);
+ +
+ +      if (di->d_fieldmask & FS_DQ_BSOFT)
+ +              dm->dqb_bsoftlimit = qbtos(di->d_blk_softlimit);
+ +      if (di->d_fieldmask & FS_DQ_BHARD)
+ +              dm->dqb_bhardlimit = qbtos(di->d_blk_hardlimit);
+ +      if (di->d_fieldmask & (FS_DQ_BSOFT | FS_DQ_BHARD)) {
                 check_blim = 1;
                 set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags);
         }
- -      if (di->dqb_valid & QIF_INODES) {
- -              dm->dqb_curinodes = di->dqb_curinodes;
+ +
+ +      if (di->d_fieldmask & FS_DQ_ICOUNT) {
+ +              dm->dqb_curinodes = di->d_icount;
                 check_ilim = 1;
                 set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags);
         }
- -      if (di->dqb_valid & QIF_ILIMITS) {
- -              dm->dqb_isoftlimit = di->dqb_isoftlimit;
- -              dm->dqb_ihardlimit = di->dqb_ihardlimit;
+ +
+ +      if (di->d_fieldmask & FS_DQ_ISOFT)
+ +              dm->dqb_isoftlimit = di->d_ino_softlimit;
+ +      if (di->d_fieldmask & FS_DQ_IHARD)
+ +              dm->dqb_ihardlimit = di->d_ino_hardlimit;
+ +      if (di->d_fieldmask & (FS_DQ_ISOFT | FS_DQ_IHARD)) {
                 check_ilim = 1;
                 set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags);
         }
- -      if (di->dqb_valid & QIF_BTIME) {
- -              dm->dqb_btime = di->dqb_btime;
+ +
+ +      if (di->d_fieldmask & FS_DQ_BTIMER) {
+ +              dm->dqb_btime = di->d_btimer;
                 check_blim = 1;
                 set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags);
         }
- -      if (di->dqb_valid & QIF_ITIME) {
- -              dm->dqb_itime = di->dqb_itime;
+ +
+ +      if (di->d_fieldmask & FS_DQ_ITIMER) {
+ +              dm->dqb_itime = di->d_itimer;
                 check_ilim = 1;
                 set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
         }
@@@ -2401,7 -2364,7 +2404,7 @@@
                     dm->dqb_curspace < dm->dqb_bsoftlimit) {
                         dm->dqb_btime = 0;
                         clear_bit(DQ_BLKS_B, &dquot->dq_flags);
- -              } else if (!(di->dqb_valid & QIF_BTIME))
+ +              } else if (!(di->d_fieldmask & FS_DQ_BTIMER))
                         /* Set grace only if user hasn't provided his own... */
                         dm->dqb_btime = get_seconds() + dqi->dqi_bgrace;
         }
@@@ -2410,7 -2373,7 +2413,7 @@@
                     dm->dqb_curinodes < dm->dqb_isoftlimit) {
                         dm->dqb_itime = 0;
                         clear_bit(DQ_INODES_B, &dquot->dq_flags);
- -              } else if (!(di->dqb_valid & QIF_ITIME))
+ +              } else if (!(di->d_fieldmask & FS_DQ_ITIMER))
                         /* Set grace only if user hasn't provided his own... */
                         dm->dqb_itime = get_seconds() + dqi->dqi_igrace;
         }
@@@ -2426,7 -2389,7 +2429,7 @@@
   }
   
   int vfs_set_dqblk(struct super_block *sb, int type, qid_t id,
- -                struct if_dqblk *di)
+ +                struct fs_disk_quota *di)
   {
         struct dquot *dquot;
         int rc;
@@@ -2505,74 -2468,62 +2508,74 @@@ const struct quotactl_ops vfs_quotactl_
         .set_dqblk      = vfs_set_dqblk
   };
   
+ +
+ +static int do_proc_dqstats(struct ctl_table *table, int write,
+ +                   void __user *buffer, size_t *lenp, loff_t *ppos)
+ +{
+ +#ifdef CONFIG_SMP
+ +      /* Update global table */
+ +      unsigned int type = (int *)table->data - dqstats.stat;
+ +      dqstats.stat[type] = dqstats_read(type);
+ +#endif
+ +      return proc_dointvec(table, write, buffer, lenp, ppos);
+ +}
+ +
   static ctl_table fs_dqstats_table[] = {
         {
                 .procname       = "lookups",
- -              .data           = &dqstats.lookups,
+ +              .data           = &dqstats.stat[DQST_LOOKUPS],
                 .maxlen         = sizeof(int),
                 .mode           = 0444,
- -              .proc_handler   = proc_dointvec,
+ +              .proc_handler   = do_proc_dqstats,
         },
         {
                 .procname       = "drops",
- -              .data           = &dqstats.drops,
+ +              .data           = &dqstats.stat[DQST_DROPS],
                 .maxlen         = sizeof(int),
                 .mode           = 0444,
- -              .proc_handler   = proc_dointvec,
+ +              .proc_handler   = do_proc_dqstats,
         },
         {
                 .procname       = "reads",
- -              .data           = &dqstats.reads,
+ +              .data           = &dqstats.stat[DQST_READS],
                 .maxlen         = sizeof(int),
                 .mode           = 0444,
- -              .proc_handler   = proc_dointvec,
+ +              .proc_handler   = do_proc_dqstats,
         },
         {
                 .procname       = "writes",
- -              .data           = &dqstats.writes,
+ +              .data           = &dqstats.stat[DQST_WRITES],
                 .maxlen         = sizeof(int),
                 .mode           = 0444,
- -              .proc_handler   = proc_dointvec,
+ +              .proc_handler   = do_proc_dqstats,
         },
         {
                 .procname       = "cache_hits",
- -              .data           = &dqstats.cache_hits,
+ +              .data           = &dqstats.stat[DQST_CACHE_HITS],
                 .maxlen         = sizeof(int),
                 .mode           = 0444,
- -              .proc_handler   = proc_dointvec,
+ +              .proc_handler   = do_proc_dqstats,
         },
         {
                 .procname       = "allocated_dquots",
- -              .data           = &dqstats.allocated_dquots,
+ +              .data           = &dqstats.stat[DQST_ALLOC_DQUOTS],
                 .maxlen         = sizeof(int),
                 .mode           = 0444,
- -              .proc_handler   = proc_dointvec,
+ +              .proc_handler   = do_proc_dqstats,
         },
         {
                 .procname       = "free_dquots",
- -              .data           = &dqstats.free_dquots,
+ +              .data           = &dqstats.stat[DQST_FREE_DQUOTS],
                 .maxlen         = sizeof(int),
                 .mode           = 0444,
- -              .proc_handler   = proc_dointvec,
+ +              .proc_handler   = do_proc_dqstats,
         },
         {
                 .procname       = "syncs",
- -              .data           = &dqstats.syncs,
+ +              .data           = &dqstats.stat[DQST_SYNCS],
                 .maxlen         = sizeof(int),
                 .mode           = 0444,
- -              .proc_handler   = proc_dointvec,
+ +              .proc_handler   = do_proc_dqstats,
         },
   #ifdef CONFIG_PRINT_QUOTA_WARNING
         {
@@@ -2624,13 -2575,6 +2627,13 @@@ static int __init dquot_init(void
         if (!dquot_hash)
                 panic("Cannot create dquot hash table");
   
+ +#ifdef CONFIG_SMP
+ +      dqstats_pcpu = alloc_percpu(struct dqstats);
+ +      if (!dqstats_pcpu)
+ +              panic("Cannot create dquot stats table");
+ +#endif
+ +      memset(&dqstats, 0, sizeof(struct dqstats));
+ +
         /* Find power-of-two hlist_heads which can fit into allocation */
         nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head);
         dq_hash_bits = 0;
diff --combined include/linux/quotaops.h

index 370abb1,f8dbeb0..e38ae53
--- 1/include/linux/quotaops.h
--- 2/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@@ -9,19 -9,15 +9,23 @@@
   
   #include <linux/fs.h>
   
+ #define DQUOT_SPACE_WARN      0x1
+ #define DQUOT_SPACE_RESERVE   0x2
+ #define DQUOT_SPACE_NOFAIL    0x4
+ 
   static inline struct quota_info *sb_dqopt(struct super_block *sb)
   {
         return &sb->s_dquot;
   }
   
+ +/* i_mutex must being held */
+ +static inline bool is_quota_modification(struct inode *inode, struct iattr *ia)
+ +{
+ +      return (ia->ia_valid & ATTR_SIZE && ia->ia_size != inode->i_size) ||
+ +              (ia->ia_valid & ATTR_UID && ia->ia_uid != inode->i_uid) ||
+ +              (ia->ia_valid & ATTR_GID && ia->ia_gid != inode->i_gid);
+ +}
+ +
   #if defined(CONFIG_QUOTA)
   
   /*
@@@ -41,9 -37,8 +45,8 @@@ int dquot_scan_active(struct super_bloc
   struct dquot *dquot_alloc(struct super_block *sb, int type);
   void dquot_destroy(struct dquot *dquot);
   
- int __dquot_alloc_space(struct inode *inode, qsize_t number,
-               int warn, int reserve);
- void __dquot_free_space(struct inode *inode, qsize_t number, int reserve);
+ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags);
+ void __dquot_free_space(struct inode *inode, qsize_t number, int flags);
   
   int dquot_alloc_inode(const struct inode *inode);
   
@@@ -71,12 -66,9 +74,12 @@@ int vfs_quota_disable(struct super_bloc
   int vfs_quota_sync(struct super_block *sb, int type, int wait);
   int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
   int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
- -int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
- -int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
+ +int vfs_get_dqblk(struct super_block *sb, int type, qid_t id,
+ +              struct fs_disk_quota *di);
+ +int vfs_set_dqblk(struct super_block *sb, int type, qid_t id,
+ +              struct fs_disk_quota *di);
   
+ +int __dquot_transfer(struct inode *inode, struct dquot **transfer_to);
   int dquot_transfer(struct inode *inode, struct iattr *iattr);
   int vfs_dq_quota_on_remount(struct super_block *sb);
   
@@@ -242,17 -234,17 +245,17 @@@ static inline int dquot_transfer(struc
   }
   
   static inline int __dquot_alloc_space(struct inode *inode, qsize_t number,
-               int warn, int reserve)
+               int flags)
   {
-       if (!reserve)
+       if (!(flags & DQUOT_SPACE_RESERVE))
                 inode_add_bytes(inode, number);
         return 0;
   }
   
   static inline void __dquot_free_space(struct inode *inode, qsize_t number,
-               int reserve)
+               int flags)
   {
-       if (!reserve)
+       if (!(flags & DQUOT_SPACE_RESERVE))
                 inode_sub_bytes(inode, number);
   }
   
@@@ -268,7 -260,13 +271,13 @@@ static inline int dquot_claim_space_nod
   
   static inline int dquot_alloc_space_nodirty(struct inode *inode, qsize_t nr)
   {
-       return __dquot_alloc_space(inode, nr, 1, 0);
+       return __dquot_alloc_space(inode, nr, DQUOT_SPACE_WARN);
+ }
+ 
+ static inline void dquot_alloc_space_nofail(struct inode *inode, qsize_t nr)
+ {
+       __dquot_alloc_space(inode, nr, DQUOT_SPACE_WARN|DQUOT_SPACE_NOFAIL);
+       mark_inode_dirty(inode);
   }
   
   static inline int dquot_alloc_space(struct inode *inode, qsize_t nr)
@@@ -286,6 -284,11 +295,11 @@@ static inline int dquot_alloc_block_nod
         return dquot_alloc_space_nodirty(inode, nr << inode->i_blkbits);
   }
   
+ static inline void dquot_alloc_block_nofail(struct inode *inode, qsize_t nr)
+ {
+       dquot_alloc_space_nofail(inode, nr << inode->i_blkbits);
+ }
+ 
   static inline int dquot_alloc_block(struct inode *inode, qsize_t nr)
   {
         return dquot_alloc_space(inode, nr << inode->i_blkbits);
@@@ -293,7 -296,7 +307,7 @@@
   
   static inline int dquot_prealloc_block_nodirty(struct inode *inode, qsize_t nr)
   {
-       return __dquot_alloc_space(inode, nr << inode->i_blkbits, 0, 0);
+       return __dquot_alloc_space(inode, nr << inode->i_blkbits, 0);
   }
   
   static inline int dquot_prealloc_block(struct inode *inode, qsize_t nr)
@@@ -308,7 -311,8 +322,8 @@@
   
   static inline int dquot_reserve_block(struct inode *inode, qsize_t nr)
   {
-       return __dquot_alloc_space(inode, nr << inode->i_blkbits, 1, 1);
+       return __dquot_alloc_space(inode, nr << inode->i_blkbits,
+                               DQUOT_SPACE_WARN|DQUOT_SPACE_RESERVE);
   }
   
   static inline int dquot_claim_block(struct inode *inode, qsize_t nr)
@@@ -345,7 -349,7 +360,7 @@@ static inline void dquot_free_block(str
   static inline void dquot_release_reservation_block(struct inode *inode,
                 qsize_t nr)
   {
-       __dquot_free_space(inode, nr << inode->i_blkbits, 1);
+       __dquot_free_space(inode, nr << inode->i_blkbits, DQUOT_SPACE_RESERVE);
   }
   
   #endif /* _LINUX_QUOTAOPS_ */
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 27 May 2010 17:26:37 +0000 (10:26 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 27 May 2010 17:26:37 +0000 (10:26 -0700)
		1	2
fs/ext4/fsync.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/ialloc.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/xattr.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/quota/dquot.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/quotaops.h	patch \|	diff1 \|	diff2 \|	blob \| history