Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...

[cascardo/linux.git] / fs / ext4 / inode.c
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index 5653fa4..5cb9a21 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -657,6 +657,18 @@ has_zeroout:
         return retval;
  }
  
+static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
+{
+       struct inode *inode = bh->b_assoc_map->host;
+       /* XXX: breaks on 32-bit > 16GB. Is that even supported? */
+       loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
+       int err;
+       if (!uptodate)
+               return;
+       WARN_ON(!buffer_unwritten(bh));
+       err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
+}
+
  /* Maximum number of blocks we map for direct IO at once. */
  #define DIO_MAX_BLOCKS 4096
  
@@ -694,6 +706,11 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
  
                 map_bh(bh, inode->i_sb, map.m_pblk);
                 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
+               if (IS_DAX(inode) && buffer_unwritten(bh) && !io_end) {
+                       bh->b_assoc_map = inode->i_mapping;
+                       bh->b_private = (void *)(unsigned long)iblock;
+                       bh->b_end_io = ext4_end_io_unwritten;
+               }
                 if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
                         set_buffer_defer_completion(bh);
                 bh->b_size = inode->i_sb->s_blocksize * map.m_len;
@@ -1007,6 +1024,7 @@ static int ext4_write_end(struct file *file,
  {
         handle_t *handle = ext4_journal_current_handle();
         struct inode *inode = mapping->host;
+       loff_t old_size = inode->i_size;
         int ret = 0, ret2;
         int i_size_changed = 0;
  
@@ -1037,6 +1055,8 @@ static int ext4_write_end(struct file *file,
         unlock_page(page);
         page_cache_release(page);
  
+       if (old_size < pos)
+               pagecache_isize_extended(inode, old_size, pos);
         /*
          * Don't mark the inode dirty under page lock. First, it unnecessarily
          * makes the holding time of page lock longer. Second, it forces lock
@@ -1078,6 +1098,7 @@ static int ext4_journalled_write_end(struct file *file,
  {
         handle_t *handle = ext4_journal_current_handle();
         struct inode *inode = mapping->host;
+       loff_t old_size = inode->i_size;
         int ret = 0, ret2;
         int partial = 0;
         unsigned from, to;
@@ -1110,6 +1131,9 @@ static int ext4_journalled_write_end(struct file *file,
         unlock_page(page);
         page_cache_release(page);
  
+       if (old_size < pos)
+               pagecache_isize_extended(inode, old_size, pos);
+
         if (size_changed) {
                 ret2 = ext4_mark_inode_dirty(handle, inode);
                 if (!ret)
@@ -3010,13 +3034,14 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
                 get_block_func = ext4_get_block_write;
                 dio_flags = DIO_LOCKING;
         }
-       ret = __blockdev_direct_IO(rw, iocb, inode,
-                                  inode->i_sb->s_bdev, iter,
-                                  offset,
-                                  get_block_func,
-                                  ext4_end_io_dio,
-                                  NULL,
-                                  dio_flags);
+       if (IS_DAX(inode))
+               ret = dax_do_io(rw, iocb, inode, iter, offset, get_block_func,
+                               ext4_end_io_dio, dio_flags);
+       else
+               ret = __blockdev_direct_IO(rw, iocb, inode,
+                                          inode->i_sb->s_bdev, iter, offset,
+                                          get_block_func,
+                                          ext4_end_io_dio, NULL, dio_flags);
  
         /*
          * Put our reference to io_end. This can free the io_end structure e.g.
@@ -3180,19 +3205,12 @@ void ext4_set_aops(struct inode *inode)
                 inode->i_mapping->a_ops = &ext4_aops;
  }
  
-/*
- * ext4_block_zero_page_range() zeros out a mapping of length 'length'
- * starting from file offset 'from'.  The range to be zero'd must
- * be contained with in one block.  If the specified range exceeds
- * the end of the block it will be shortened to end of the block
- * that cooresponds to 'from'
- */
-static int ext4_block_zero_page_range(handle_t *handle,
+static int __ext4_block_zero_page_range(handle_t *handle,
                 struct address_space *mapping, loff_t from, loff_t length)
  {
         ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
         unsigned offset = from & (PAGE_CACHE_SIZE-1);
-       unsigned blocksize, max, pos;
+       unsigned blocksize, pos;
         ext4_lblk_t iblock;
         struct inode *inode = mapping->host;
         struct buffer_head *bh;
@@ -3205,14 +3223,6 @@ static int ext4_block_zero_page_range(handle_t *handle,
                 return -ENOMEM;
  
         blocksize = inode->i_sb->s_blocksize;
-       max = blocksize - (offset & (blocksize - 1));
-
-       /*
-        * correct length if it does not fall between
-        * 'from' and the end of the block
-        */
-       if (length > max || length < 0)
-               length = max;
  
         iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
  
@@ -3277,6 +3287,33 @@ unlock:
         return err;
  }
  
+/*
+ * ext4_block_zero_page_range() zeros out a mapping of length 'length'
+ * starting from file offset 'from'.  The range to be zero'd must
+ * be contained with in one block.  If the specified range exceeds
+ * the end of the block it will be shortened to end of the block
+ * that cooresponds to 'from'
+ */
+static int ext4_block_zero_page_range(handle_t *handle,
+               struct address_space *mapping, loff_t from, loff_t length)
+{
+       struct inode *inode = mapping->host;
+       unsigned offset = from & (PAGE_CACHE_SIZE-1);
+       unsigned blocksize = inode->i_sb->s_blocksize;
+       unsigned max = blocksize - (offset & (blocksize - 1));
+
+       /*
+        * correct length if it does not fall between
+        * 'from' and the end of the block
+        */
+       if (length > max || length < 0)
+               length = max;
+
+       if (IS_DAX(inode))
+               return dax_zero_page_range(inode, from, length, ext4_get_block);
+       return __ext4_block_zero_page_range(handle, mapping, from, length);
+}
+
  /*
   * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
   * up to the end of the block which corresponds to `from'.
@@ -3798,8 +3835,10 @@ void ext4_set_inode_flags(struct inode *inode)
                 new_fl |= S_NOATIME;
         if (flags & EXT4_DIRSYNC_FL)
                 new_fl |= S_DIRSYNC;
+       if (test_opt(inode->i_sb, DAX))
+               new_fl |= S_DAX;
         inode_set_flags(inode, new_fl,
-                       S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+                       S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
  }
  
  /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
@@ -4052,7 +4091,10 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
  
         if (S_ISREG(inode->i_mode)) {
                 inode->i_op = &ext4_file_inode_operations;
-               inode->i_fop = &ext4_file_operations;
+               if (test_opt(inode->i_sb, DAX))
+                       inode->i_fop = &ext4_dax_file_operations;
+               else
+                       inode->i_fop = &ext4_file_operations;
                 ext4_set_aops(inode);
         } else if (S_ISDIR(inode->i_mode)) {
                 inode->i_op = &ext4_dir_inode_operations;
@@ -4139,6 +4181,65 @@ static int ext4_inode_blocks_set(handle_t *handle,
         return 0;
  }
  
+struct other_inode {
+       unsigned long           orig_ino;
+       struct ext4_inode       *raw_inode;
+};
+
+static int other_inode_match(struct inode * inode, unsigned long ino,
+                            void *data)
+{
+       struct other_inode *oi = (struct other_inode *) data;
+
+       if ((inode->i_ino != ino) ||
+           (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
+                              I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
+           ((inode->i_state & I_DIRTY_TIME) == 0))
+               return 0;
+       spin_lock(&inode->i_lock);
+       if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
+                               I_DIRTY_SYNC | I_DIRTY_DATASYNC)) == 0) &&
+           (inode->i_state & I_DIRTY_TIME)) {
+               struct ext4_inode_info  *ei = EXT4_I(inode);
+
+               inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
+               spin_unlock(&inode->i_lock);
+
+               spin_lock(&ei->i_raw_lock);
+               EXT4_INODE_SET_XTIME(i_ctime, inode, oi->raw_inode);
+               EXT4_INODE_SET_XTIME(i_mtime, inode, oi->raw_inode);
+               EXT4_INODE_SET_XTIME(i_atime, inode, oi->raw_inode);
+               ext4_inode_csum_set(inode, oi->raw_inode, ei);
+               spin_unlock(&ei->i_raw_lock);
+               trace_ext4_other_inode_update_time(inode, oi->orig_ino);
+               return -1;
+       }
+       spin_unlock(&inode->i_lock);
+       return -1;
+}
+
+/*
+ * Opportunistically update the other time fields for other inodes in
+ * the same inode table block.
+ */
+static void ext4_update_other_inodes_time(struct super_block *sb,
+                                         unsigned long orig_ino, char *buf)
+{
+       struct other_inode oi;
+       unsigned long ino;
+       int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
+       int inode_size = EXT4_INODE_SIZE(sb);
+
+       oi.orig_ino = orig_ino;
+       ino = orig_ino & ~(inodes_per_block - 1);
+       for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
+               if (ino == orig_ino)
+                       continue;
+               oi.raw_inode = (struct ext4_inode *) buf;
+               (void) find_inode_nowait(sb, ino, other_inode_match, &oi);
+       }
+}
+
  /*
   * Post the struct inode info into an on-disk inode location in the
   * buffer-cache.  This gobbles the caller's reference to the
@@ -4248,10 +4349,11 @@ static int ext4_do_update_inode(handle_t *handle,
                                 cpu_to_le16(ei->i_extra_isize);
                 }
         }
-
         ext4_inode_csum_set(inode, raw_inode, ei);
-
         spin_unlock(&ei->i_raw_lock);
+       if (inode->i_sb->s_flags & MS_LAZYTIME)
+               ext4_update_other_inodes_time(inode->i_sb, inode->i_ino,
+                                             bh->b_data);
  
         BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
         rc = ext4_handle_dirty_metadata(handle, NULL, bh);
@@ -4534,7 +4636,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                  * Truncate pagecache after we've waited for commit
                  * in data=journal mode to make pages freeable.
                  */
-                       truncate_pagecache(inode, inode->i_size);
+               truncate_pagecache(inode, inode->i_size);
         }
         /*
          * We want to call ext4_truncate() even if attr->ia_size ==
@@ -4840,11 +4942,17 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
   * If the inode is marked synchronous, we don't honour that here - doing
   * so would cause a commit on atime updates, which we don't bother doing.
   * We handle synchronous inodes at the highest possible level.
+ *
+ * If only the I_DIRTY_TIME flag is set, we can skip everything.  If
+ * I_DIRTY_TIME and I_DIRTY_SYNC is set, the only inode fields we need
+ * to copy into the on-disk inode structure are the timestamp files.
   */
  void ext4_dirty_inode(struct inode *inode, int flags)
  {
         handle_t *handle;
  
+       if (flags == I_DIRTY_TIME)
+               return;
         handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
         if (IS_ERR(handle))
                 goto out;