mm: silently skip readahead for DAX inodes
[cascardo/linux.git] / fs / fs-writeback.c
index fe7e83a..05713a5 100644 (file)
@@ -980,6 +980,42 @@ void inode_io_list_del(struct inode *inode)
        spin_unlock(&wb->list_lock);
 }
 
+/*
+ * mark an inode as under writeback on the sb
+ */
+void sb_mark_inode_writeback(struct inode *inode)
+{
+       struct super_block *sb = inode->i_sb;
+       unsigned long flags;
+
+       if (list_empty(&inode->i_wb_list)) {
+               spin_lock_irqsave(&sb->s_inode_wblist_lock, flags);
+               if (list_empty(&inode->i_wb_list)) {
+                       list_add_tail(&inode->i_wb_list, &sb->s_inodes_wb);
+                       trace_sb_mark_inode_writeback(inode);
+               }
+               spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags);
+       }
+}
+
+/*
+ * clear an inode as under writeback on the sb
+ */
+void sb_clear_inode_writeback(struct inode *inode)
+{
+       struct super_block *sb = inode->i_sb;
+       unsigned long flags;
+
+       if (!list_empty(&inode->i_wb_list)) {
+               spin_lock_irqsave(&sb->s_inode_wblist_lock, flags);
+               if (!list_empty(&inode->i_wb_list)) {
+                       list_del_init(&inode->i_wb_list);
+                       trace_sb_clear_inode_writeback(inode);
+               }
+               spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags);
+       }
+}
+
 /*
  * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
  * furthest end of its superblock's dirty-inode list.
@@ -1291,6 +1327,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
        dirty = inode->i_state & I_DIRTY;
        if (inode->i_state & I_DIRTY_TIME) {
                if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
+                   wbc->sync_mode == WB_SYNC_ALL ||
                    unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) ||
                    unlikely(time_after(jiffies,
                                        (inode->dirtied_time_when +
@@ -1771,8 +1808,8 @@ static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb)
  */
 static unsigned long get_nr_dirty_pages(void)
 {
-       return global_page_state(NR_FILE_DIRTY) +
-               global_page_state(NR_UNSTABLE_NFS) +
+       return global_node_page_state(NR_FILE_DIRTY) +
+               global_node_page_state(NR_UNSTABLE_NFS) +
                get_nr_dirty_inodes();
 }
 
@@ -1912,6 +1949,12 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
 {
        struct backing_dev_info *bdi;
 
+       /*
+        * If we are expecting writeback progress we must submit plugged IO.
+        */
+       if (blk_needs_flush_plug(current))
+               blk_schedule_flush_plug(current);
+
        if (!nr_pages)
                nr_pages = get_nr_dirty_pages();
 
@@ -2154,7 +2197,7 @@ EXPORT_SYMBOL(__mark_inode_dirty);
  */
 static void wait_sb_inodes(struct super_block *sb)
 {
-       struct inode *inode, *old_inode = NULL;
+       LIST_HEAD(sync_list);
 
        /*
         * We need to be protected against the filesystem going from
@@ -2163,38 +2206,60 @@ static void wait_sb_inodes(struct super_block *sb)
        WARN_ON(!rwsem_is_locked(&sb->s_umount));
 
        mutex_lock(&sb->s_sync_lock);
-       spin_lock(&sb->s_inode_list_lock);
 
        /*
-        * Data integrity sync. Must wait for all pages under writeback,
-        * because there may have been pages dirtied before our sync
-        * call, but which had writeout started before we write it out.
-        * In which case, the inode may not be on the dirty list, but
-        * we still have to wait for that writeout.
+        * Splice the writeback list onto a temporary list to avoid waiting on
+        * inodes that have started writeback after this point.
+        *
+        * Use rcu_read_lock() to keep the inodes around until we have a
+        * reference. s_inode_wblist_lock protects sb->s_inodes_wb as well as
+        * the local list because inodes can be dropped from either by writeback
+        * completion.
+        */
+       rcu_read_lock();
+       spin_lock_irq(&sb->s_inode_wblist_lock);
+       list_splice_init(&sb->s_inodes_wb, &sync_list);
+
+       /*
+        * Data integrity sync. Must wait for all pages under writeback, because
+        * there may have been pages dirtied before our sync call, but which had
+        * writeout started before we write it out.  In which case, the inode
+        * may not be on the dirty list, but we still have to wait for that
+        * writeout.
         */
-       list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+       while (!list_empty(&sync_list)) {
+               struct inode *inode = list_first_entry(&sync_list, struct inode,
+                                                      i_wb_list);
                struct address_space *mapping = inode->i_mapping;
 
+               /*
+                * Move each inode back to the wb list before we drop the lock
+                * to preserve consistency between i_wb_list and the mapping
+                * writeback tag. Writeback completion is responsible to remove
+                * the inode from either list once the writeback tag is cleared.
+                */
+               list_move_tail(&inode->i_wb_list, &sb->s_inodes_wb);
+
+               /*
+                * The mapping can appear untagged while still on-list since we
+                * do not have the mapping lock. Skip it here, wb completion
+                * will remove it.
+                */
+               if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
+                       continue;
+
+               spin_unlock_irq(&sb->s_inode_wblist_lock);
+
                spin_lock(&inode->i_lock);
-               if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
-                   (mapping->nrpages == 0)) {
+               if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
                        spin_unlock(&inode->i_lock);
+
+                       spin_lock_irq(&sb->s_inode_wblist_lock);
                        continue;
                }
                __iget(inode);
                spin_unlock(&inode->i_lock);
-               spin_unlock(&sb->s_inode_list_lock);
-
-               /*
-                * We hold a reference to 'inode' so it couldn't have been
-                * removed from s_inodes list while we dropped the
-                * s_inode_list_lock.  We cannot iput the inode now as we can
-                * be holding the last reference and we cannot iput it under
-                * s_inode_list_lock. So we keep the reference and iput it
-                * later.
-                */
-               iput(old_inode);
-               old_inode = inode;
+               rcu_read_unlock();
 
                /*
                 * We keep the error status of individual mapping so that
@@ -2205,10 +2270,13 @@ static void wait_sb_inodes(struct super_block *sb)
 
                cond_resched();
 
-               spin_lock(&sb->s_inode_list_lock);
+               iput(inode);
+
+               rcu_read_lock();
+               spin_lock_irq(&sb->s_inode_wblist_lock);
        }
-       spin_unlock(&sb->s_inode_list_lock);
-       iput(old_inode);
+       spin_unlock_irq(&sb->s_inode_wblist_lock);
+       rcu_read_unlock();
        mutex_unlock(&sb->s_sync_lock);
 }