iwlwifi: make configuration structs smaller
[cascardo/linux.git] / mm / filemap.c
index 23edcce..f2479af 100644 (file)
  *    ->tree_lock              (page_remove_rmap->set_page_dirty)
  *    bdi.wb->list_lock                (page_remove_rmap->set_page_dirty)
  *    ->inode->i_lock          (page_remove_rmap->set_page_dirty)
- *    ->memcg->move_lock       (page_remove_rmap->mem_cgroup_begin_page_stat)
+ *    ->memcg->move_lock       (page_remove_rmap->lock_page_memcg)
  *    bdi.wb->list_lock                (zap_pte_range->set_page_dirty)
  *    ->inode->i_lock          (zap_pte_range->set_page_dirty)
  *    ->private_lock           (zap_pte_range->__set_page_dirty_buffers)
@@ -176,11 +176,9 @@ static void page_cache_tree_delete(struct address_space *mapping,
 /*
  * Delete a page from the page cache and free it. Caller has to make
  * sure the page is locked and that nobody else uses it - or that usage
- * is safe.  The caller must hold the mapping's tree_lock and
- * mem_cgroup_begin_page_stat().
+ * is safe.  The caller must hold the mapping's tree_lock.
  */
-void __delete_from_page_cache(struct page *page, void *shadow,
-                             struct mem_cgroup *memcg)
+void __delete_from_page_cache(struct page *page, void *shadow)
 {
        struct address_space *mapping = page->mapping;
 
@@ -195,6 +193,30 @@ void __delete_from_page_cache(struct page *page, void *shadow,
        else
                cleancache_invalidate_page(mapping, page);
 
+       VM_BUG_ON_PAGE(page_mapped(page), page);
+       if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) {
+               int mapcount;
+
+               pr_alert("BUG: Bad page cache in process %s  pfn:%05lx\n",
+                        current->comm, page_to_pfn(page));
+               dump_page(page, "still mapped when deleted");
+               dump_stack();
+               add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
+
+               mapcount = page_mapcount(page);
+               if (mapping_exiting(mapping) &&
+                   page_count(page) >= mapcount + 2) {
+                       /*
+                        * All vmas have already been torn down, so it's
+                        * a good bet that actually the page is unmapped,
+                        * and we'd prefer not to leak it: if we're wrong,
+                        * some other bad page check should catch it later.
+                        */
+                       page_mapcount_reset(page);
+                       atomic_sub(mapcount, &page->_count);
+               }
+       }
+
        page_cache_tree_delete(mapping, page, shadow);
 
        page->mapping = NULL;
@@ -205,7 +227,6 @@ void __delete_from_page_cache(struct page *page, void *shadow,
                __dec_zone_page_state(page, NR_FILE_PAGES);
        if (PageSwapBacked(page))
                __dec_zone_page_state(page, NR_SHMEM);
-       VM_BUG_ON_PAGE(page_mapped(page), page);
 
        /*
         * At this point page must be either written or cleaned by truncate.
@@ -216,8 +237,7 @@ void __delete_from_page_cache(struct page *page, void *shadow,
         * anyway will be cleared before returning page into buddy allocator.
         */
        if (WARN_ON_ONCE(PageDirty(page)))
-               account_page_cleaned(page, mapping, memcg,
-                                    inode_to_wb(mapping->host));
+               account_page_cleaned(page, mapping, inode_to_wb(mapping->host));
 }
 
 /**
@@ -231,7 +251,6 @@ void __delete_from_page_cache(struct page *page, void *shadow,
 void delete_from_page_cache(struct page *page)
 {
        struct address_space *mapping = page->mapping;
-       struct mem_cgroup *memcg;
        unsigned long flags;
 
        void (*freepage)(struct page *);
@@ -240,15 +259,13 @@ void delete_from_page_cache(struct page *page)
 
        freepage = mapping->a_ops->freepage;
 
-       memcg = mem_cgroup_begin_page_stat(page);
        spin_lock_irqsave(&mapping->tree_lock, flags);
-       __delete_from_page_cache(page, NULL, memcg);
+       __delete_from_page_cache(page, NULL);
        spin_unlock_irqrestore(&mapping->tree_lock, flags);
-       mem_cgroup_end_page_stat(memcg);
 
        if (freepage)
                freepage(page);
-       page_cache_release(page);
+       put_page(page);
 }
 EXPORT_SYMBOL(delete_from_page_cache);
 
@@ -335,8 +352,8 @@ EXPORT_SYMBOL(filemap_flush);
 static int __filemap_fdatawait_range(struct address_space *mapping,
                                     loff_t start_byte, loff_t end_byte)
 {
-       pgoff_t index = start_byte >> PAGE_CACHE_SHIFT;
-       pgoff_t end = end_byte >> PAGE_CACHE_SHIFT;
+       pgoff_t index = start_byte >> PAGE_SHIFT;
+       pgoff_t end = end_byte >> PAGE_SHIFT;
        struct pagevec pvec;
        int nr_pages;
        int ret = 0;
@@ -446,7 +463,8 @@ int filemap_write_and_wait(struct address_space *mapping)
 {
        int err = 0;
 
-       if (mapping->nrpages) {
+       if ((!dax_mapping(mapping) && mapping->nrpages) ||
+           (dax_mapping(mapping) && mapping->nrexceptional)) {
                err = filemap_fdatawrite(mapping);
                /*
                 * Even if the above returned error, the pages may be
@@ -482,13 +500,8 @@ int filemap_write_and_wait_range(struct address_space *mapping,
 {
        int err = 0;
 
-       if (dax_mapping(mapping) && mapping->nrexceptional) {
-               err = dax_writeback_mapping_range(mapping, lstart, lend);
-               if (err)
-                       return err;
-       }
-
-       if (mapping->nrpages) {
+       if ((!dax_mapping(mapping) && mapping->nrpages) ||
+           (dax_mapping(mapping) && mapping->nrexceptional)) {
                err = __filemap_fdatawrite_range(mapping, lstart, lend,
                                                 WB_SYNC_ALL);
                /* See comment of filemap_write_and_wait() */
@@ -532,19 +545,17 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
        if (!error) {
                struct address_space *mapping = old->mapping;
                void (*freepage)(struct page *);
-               struct mem_cgroup *memcg;
                unsigned long flags;
 
                pgoff_t offset = old->index;
                freepage = mapping->a_ops->freepage;
 
-               page_cache_get(new);
+               get_page(new);
                new->mapping = mapping;
                new->index = offset;
 
-               memcg = mem_cgroup_begin_page_stat(old);
                spin_lock_irqsave(&mapping->tree_lock, flags);
-               __delete_from_page_cache(old, NULL, memcg);
+               __delete_from_page_cache(old, NULL);
                error = radix_tree_insert(&mapping->page_tree, offset, new);
                BUG_ON(error);
                mapping->nrpages++;
@@ -557,12 +568,11 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
                if (PageSwapBacked(new))
                        __inc_zone_page_state(new, NR_SHMEM);
                spin_unlock_irqrestore(&mapping->tree_lock, flags);
-               mem_cgroup_end_page_stat(memcg);
-               mem_cgroup_replace_page(old, new);
+               mem_cgroup_migrate(old, new);
                radix_tree_preload_end();
                if (freepage)
                        freepage(old);
-               page_cache_release(old);
+               put_page(old);
        }
 
        return error;
@@ -576,7 +586,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
        void **slot;
        int error;
 
-       error = __radix_tree_create(&mapping->page_tree, page->index,
+       error = __radix_tree_create(&mapping->page_tree, page->index, 0,
                                    &node, &slot);
        if (error)
                return error;
@@ -641,7 +651,7 @@ static int __add_to_page_cache_locked(struct page *page,
                return error;
        }
 
-       page_cache_get(page);
+       get_page(page);
        page->mapping = mapping;
        page->index = offset;
 
@@ -665,7 +675,7 @@ err_insert:
        spin_unlock_irq(&mapping->tree_lock);
        if (!huge)
                mem_cgroup_cancel_charge(page, memcg, false);
-       page_cache_release(page);
+       put_page(page);
        return error;
 }
 
@@ -1073,7 +1083,7 @@ repeat:
                 * include/linux/pagemap.h for details.
                 */
                if (unlikely(page != *pagep)) {
-                       page_cache_release(page);
+                       put_page(page);
                        goto repeat;
                }
        }
@@ -1111,7 +1121,7 @@ repeat:
                /* Has the page been truncated? */
                if (unlikely(page->mapping != mapping)) {
                        unlock_page(page);
-                       page_cache_release(page);
+                       put_page(page);
                        goto repeat;
                }
                VM_BUG_ON_PAGE(page->index != offset, page);
@@ -1158,7 +1168,7 @@ repeat:
        if (fgp_flags & FGP_LOCK) {
                if (fgp_flags & FGP_NOWAIT) {
                        if (!trylock_page(page)) {
-                               page_cache_release(page);
+                               put_page(page);
                                return NULL;
                        }
                } else {
@@ -1168,7 +1178,7 @@ repeat:
                /* Has the page been truncated? */
                if (unlikely(page->mapping != mapping)) {
                        unlock_page(page);
-                       page_cache_release(page);
+                       put_page(page);
                        goto repeat;
                }
                VM_BUG_ON_PAGE(page->index != offset, page);
@@ -1199,7 +1209,7 @@ no_page:
                err = add_to_page_cache_lru(page, mapping, offset,
                                gfp_mask & GFP_RECLAIM_MASK);
                if (unlikely(err)) {
-                       page_cache_release(page);
+                       put_page(page);
                        page = NULL;
                        if (err == -EEXIST)
                                goto repeat;
@@ -1245,7 +1255,6 @@ unsigned find_get_entries(struct address_space *mapping,
                return 0;
 
        rcu_read_lock();
-restart:
        radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
                struct page *page;
 repeat:
@@ -1253,8 +1262,10 @@ repeat:
                if (unlikely(!page))
                        continue;
                if (radix_tree_exception(page)) {
-                       if (radix_tree_deref_retry(page))
-                               goto restart;
+                       if (radix_tree_deref_retry(page)) {
+                               slot = radix_tree_iter_retry(&iter);
+                               continue;
+                       }
                        /*
                         * A shadow entry of a recently evicted page, a swap
                         * entry from shmem/tmpfs or a DAX entry.  Return it
@@ -1267,7 +1278,7 @@ repeat:
 
                /* Has the page moved? */
                if (unlikely(page != *slot)) {
-                       page_cache_release(page);
+                       put_page(page);
                        goto repeat;
                }
 export:
@@ -1307,7 +1318,6 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
                return 0;
 
        rcu_read_lock();
-restart:
        radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
                struct page *page;
 repeat:
@@ -1317,13 +1327,8 @@ repeat:
 
                if (radix_tree_exception(page)) {
                        if (radix_tree_deref_retry(page)) {
-                               /*
-                                * Transient condition which can only trigger
-                                * when entry at index 0 moves out of or back
-                                * to root: none yet gotten, safe to restart.
-                                */
-                               WARN_ON(iter.index);
-                               goto restart;
+                               slot = radix_tree_iter_retry(&iter);
+                               continue;
                        }
                        /*
                         * A shadow entry of a recently evicted page,
@@ -1338,7 +1343,7 @@ repeat:
 
                /* Has the page moved? */
                if (unlikely(page != *slot)) {
-                       page_cache_release(page);
+                       put_page(page);
                        goto repeat;
                }
 
@@ -1374,7 +1379,6 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
                return 0;
 
        rcu_read_lock();
-restart:
        radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) {
                struct page *page;
 repeat:
@@ -1385,12 +1389,8 @@ repeat:
 
                if (radix_tree_exception(page)) {
                        if (radix_tree_deref_retry(page)) {
-                               /*
-                                * Transient condition which can only trigger
-                                * when entry at index 0 moves out of or back
-                                * to root: none yet gotten, safe to restart.
-                                */
-                               goto restart;
+                               slot = radix_tree_iter_retry(&iter);
+                               continue;
                        }
                        /*
                         * A shadow entry of a recently evicted page,
@@ -1405,7 +1405,7 @@ repeat:
 
                /* Has the page moved? */
                if (unlikely(page != *slot)) {
-                       page_cache_release(page);
+                       put_page(page);
                        goto repeat;
                }
 
@@ -1415,7 +1415,7 @@ repeat:
                 * negatives, which is just confusing to the caller.
                 */
                if (page->mapping == NULL || page->index != iter.index) {
-                       page_cache_release(page);
+                       put_page(page);
                        break;
                }
 
@@ -1450,7 +1450,6 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
                return 0;
 
        rcu_read_lock();
-restart:
        radix_tree_for_each_tagged(slot, &mapping->page_tree,
                                   &iter, *index, tag) {
                struct page *page;
@@ -1461,12 +1460,8 @@ repeat:
 
                if (radix_tree_exception(page)) {
                        if (radix_tree_deref_retry(page)) {
-                               /*
-                                * Transient condition which can only trigger
-                                * when entry at index 0 moves out of or back
-                                * to root: none yet gotten, safe to restart.
-                                */
-                               goto restart;
+                               slot = radix_tree_iter_retry(&iter);
+                               continue;
                        }
                        /*
                         * A shadow entry of a recently evicted page.
@@ -1487,7 +1482,7 @@ repeat:
 
                /* Has the page moved? */
                if (unlikely(page != *slot)) {
-                       page_cache_release(page);
+                       put_page(page);
                        goto repeat;
                }
 
@@ -1529,7 +1524,6 @@ unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
                return 0;
 
        rcu_read_lock();
-restart:
        radix_tree_for_each_tagged(slot, &mapping->page_tree,
                                   &iter, start, tag) {
                struct page *page;
@@ -1539,12 +1533,8 @@ repeat:
                        continue;
                if (radix_tree_exception(page)) {
                        if (radix_tree_deref_retry(page)) {
-                               /*
-                                * Transient condition which can only trigger
-                                * when entry at index 0 moves out of or back
-                                * to root: none yet gotten, safe to restart.
-                                */
-                               goto restart;
+                               slot = radix_tree_iter_retry(&iter);
+                               continue;
                        }
 
                        /*
@@ -1559,7 +1549,7 @@ repeat:
 
                /* Has the page moved? */
                if (unlikely(page != *slot)) {
-                       page_cache_release(page);
+                       put_page(page);
                        goto repeat;
                }
 export:
@@ -1620,11 +1610,11 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
        unsigned int prev_offset;
        int error = 0;
 
-       index = *ppos >> PAGE_CACHE_SHIFT;
-       prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT;
-       prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1);
-       last_index = (*ppos + iter->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
-       offset = *ppos & ~PAGE_CACHE_MASK;
+       index = *ppos >> PAGE_SHIFT;
+       prev_index = ra->prev_pos >> PAGE_SHIFT;
+       prev_offset = ra->prev_pos & (PAGE_SIZE-1);
+       last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
+       offset = *ppos & ~PAGE_MASK;
 
        for (;;) {
                struct page *page;
@@ -1649,7 +1639,16 @@ find_page:
                                        index, last_index - index);
                }
                if (!PageUptodate(page)) {
-                       if (inode->i_blkbits == PAGE_CACHE_SHIFT ||
+                       /*
+                        * See comment in do_read_cache_page on why
+                        * wait_on_page_locked is used to avoid unnecessarily
+                        * serialisations and why it's safe.
+                        */
+                       wait_on_page_locked_killable(page);
+                       if (PageUptodate(page))
+                               goto page_ok;
+
+                       if (inode->i_blkbits == PAGE_SHIFT ||
                                        !mapping->a_ops->is_partially_uptodate)
                                goto page_not_up_to_date;
                        if (!trylock_page(page))
@@ -1673,18 +1672,18 @@ page_ok:
                 */
 
                isize = i_size_read(inode);
-               end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
+               end_index = (isize - 1) >> PAGE_SHIFT;
                if (unlikely(!isize || index > end_index)) {
-                       page_cache_release(page);
+                       put_page(page);
                        goto out;
                }
 
                /* nr is the maximum number of bytes to copy from this page */
-               nr = PAGE_CACHE_SIZE;
+               nr = PAGE_SIZE;
                if (index == end_index) {
-                       nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
+                       nr = ((isize - 1) & ~PAGE_MASK) + 1;
                        if (nr <= offset) {
-                               page_cache_release(page);
+                               put_page(page);
                                goto out;
                        }
                }
@@ -1712,11 +1711,11 @@ page_ok:
 
                ret = copy_page_to_iter(page, offset, nr, iter);
                offset += ret;
-               index += offset >> PAGE_CACHE_SHIFT;
-               offset &= ~PAGE_CACHE_MASK;
+               index += offset >> PAGE_SHIFT;
+               offset &= ~PAGE_MASK;
                prev_offset = offset;
 
-               page_cache_release(page);
+               put_page(page);
                written += ret;
                if (!iov_iter_count(iter))
                        goto out;
@@ -1736,7 +1735,7 @@ page_not_up_to_date_locked:
                /* Did it get truncated before we got the lock? */
                if (!page->mapping) {
                        unlock_page(page);
-                       page_cache_release(page);
+                       put_page(page);
                        continue;
                }
 
@@ -1758,7 +1757,7 @@ readpage:
 
                if (unlikely(error)) {
                        if (error == AOP_TRUNCATED_PAGE) {
-                               page_cache_release(page);
+                               put_page(page);
                                error = 0;
                                goto find_page;
                        }
@@ -1775,7 +1774,7 @@ readpage:
                                         * invalidate_mapping_pages got it
                                         */
                                        unlock_page(page);
-                                       page_cache_release(page);
+                                       put_page(page);
                                        goto find_page;
                                }
                                unlock_page(page);
@@ -1790,7 +1789,7 @@ readpage:
 
 readpage_error:
                /* UHHUH! A synchronous read error occurred. Report it */
-               page_cache_release(page);
+               put_page(page);
                goto out;
 
 no_cached_page:
@@ -1806,7 +1805,7 @@ no_cached_page:
                error = add_to_page_cache_lru(page, mapping, index,
                                mapping_gfp_constraint(mapping, GFP_KERNEL));
                if (error) {
-                       page_cache_release(page);
+                       put_page(page);
                        if (error == -EEXIST) {
                                error = 0;
                                goto find_page;
@@ -1818,10 +1817,10 @@ no_cached_page:
 
 out:
        ra->prev_pos = prev_index;
-       ra->prev_pos <<= PAGE_CACHE_SHIFT;
+       ra->prev_pos <<= PAGE_SHIFT;
        ra->prev_pos |= prev_offset;
 
-       *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset;
+       *ppos = ((loff_t)index << PAGE_SHIFT) + offset;
        file_accessed(filp);
        return written ? written : error;
 }
@@ -1841,15 +1840,16 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
        ssize_t retval = 0;
        loff_t *ppos = &iocb->ki_pos;
        loff_t pos = *ppos;
+       size_t count = iov_iter_count(iter);
+
+       if (!count)
+               goto out; /* skip atime */
 
        if (iocb->ki_flags & IOCB_DIRECT) {
                struct address_space *mapping = file->f_mapping;
                struct inode *inode = mapping->host;
-               size_t count = iov_iter_count(iter);
                loff_t size;
 
-               if (!count)
-                       goto out; /* skip atime */
                size = i_size_read(inode);
                retval = filemap_write_and_wait_range(mapping, pos,
                                        pos + count - 1);
@@ -1912,7 +1912,7 @@ static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask)
                else if (ret == -EEXIST)
                        ret = 0; /* losing race to add is OK */
 
-               page_cache_release(page);
+               put_page(page);
 
        } while (ret == AOP_TRUNCATED_PAGE);
 
@@ -2022,8 +2022,8 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        loff_t size;
        int ret = 0;
 
-       size = round_up(i_size_read(inode), PAGE_CACHE_SIZE);
-       if (offset >= size >> PAGE_CACHE_SHIFT)
+       size = round_up(i_size_read(inode), PAGE_SIZE);
+       if (offset >= size >> PAGE_SHIFT)
                return VM_FAULT_SIGBUS;
 
        /*
@@ -2049,7 +2049,7 @@ retry_find:
        }
 
        if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) {
-               page_cache_release(page);
+               put_page(page);
                return ret | VM_FAULT_RETRY;
        }
 
@@ -2072,10 +2072,10 @@ retry_find:
         * Found the page and have a reference on it.
         * We must recheck i_size under page lock.
         */
-       size = round_up(i_size_read(inode), PAGE_CACHE_SIZE);
-       if (unlikely(offset >= size >> PAGE_CACHE_SHIFT)) {
+       size = round_up(i_size_read(inode), PAGE_SIZE);
+       if (unlikely(offset >= size >> PAGE_SHIFT)) {
                unlock_page(page);
-               page_cache_release(page);
+               put_page(page);
                return VM_FAULT_SIGBUS;
        }
 
@@ -2120,7 +2120,7 @@ page_not_uptodate:
                if (!PageUptodate(page))
                        error = -EIO;
        }
-       page_cache_release(page);
+       put_page(page);
 
        if (!error || error == AOP_TRUNCATED_PAGE)
                goto retry_find;
@@ -2152,10 +2152,11 @@ repeat:
                if (unlikely(!page))
                        goto next;
                if (radix_tree_exception(page)) {
-                       if (radix_tree_deref_retry(page))
-                               break;
-                       else
-                               goto next;
+                       if (radix_tree_deref_retry(page)) {
+                               slot = radix_tree_iter_retry(&iter);
+                               continue;
+                       }
+                       goto next;
                }
 
                if (!page_cache_get_speculative(page))
@@ -2163,7 +2164,7 @@ repeat:
 
                /* Has the page moved? */
                if (unlikely(page != *slot)) {
-                       page_cache_release(page);
+                       put_page(page);
                        goto repeat;
                }
 
@@ -2177,8 +2178,8 @@ repeat:
                if (page->mapping != mapping || !PageUptodate(page))
                        goto unlock;
 
-               size = round_up(i_size_read(mapping->host), PAGE_CACHE_SIZE);
-               if (page->index >= size >> PAGE_CACHE_SHIFT)
+               size = round_up(i_size_read(mapping->host), PAGE_SIZE);
+               if (page->index >= size >> PAGE_SHIFT)
                        goto unlock;
 
                pte = vmf->pte + page->index - vmf->pgoff;
@@ -2194,7 +2195,7 @@ repeat:
 unlock:
                unlock_page(page);
 skip:
-               page_cache_release(page);
+               put_page(page);
 next:
                if (iter.index == vmf->max_pgoff)
                        break;
@@ -2277,14 +2278,14 @@ static struct page *wait_on_page_read(struct page *page)
        if (!IS_ERR(page)) {
                wait_on_page_locked(page);
                if (!PageUptodate(page)) {
-                       page_cache_release(page);
+                       put_page(page);
                        page = ERR_PTR(-EIO);
                }
        }
        return page;
 }
 
-static struct page *__read_cache_page(struct address_space *mapping,
+static struct page *do_read_cache_page(struct address_space *mapping,
                                pgoff_t index,
                                int (*filler)(void *, struct page *),
                                void *data,
@@ -2300,59 +2301,80 @@ repeat:
                        return ERR_PTR(-ENOMEM);
                err = add_to_page_cache_lru(page, mapping, index, gfp);
                if (unlikely(err)) {
-                       page_cache_release(page);
+                       put_page(page);
                        if (err == -EEXIST)
                                goto repeat;
                        /* Presumably ENOMEM for radix tree node */
                        return ERR_PTR(err);
                }
+
+filler:
                err = filler(data, page);
                if (err < 0) {
-                       page_cache_release(page);
-                       page = ERR_PTR(err);
-               } else {
-                       page = wait_on_page_read(page);
+                       put_page(page);
+                       return ERR_PTR(err);
                }
-       }
-       return page;
-}
-
-static struct page *do_read_cache_page(struct address_space *mapping,
-                               pgoff_t index,
-                               int (*filler)(void *, struct page *),
-                               void *data,
-                               gfp_t gfp)
 
-{
-       struct page *page;
-       int err;
+               page = wait_on_page_read(page);
+               if (IS_ERR(page))
+                       return page;
+               goto out;
+       }
+       if (PageUptodate(page))
+               goto out;
 
-retry:
-       page = __read_cache_page(mapping, index, filler, data, gfp);
-       if (IS_ERR(page))
-               return page;
+       /*
+        * Page is not up to date and may be locked due one of the following
+        * case a: Page is being filled and the page lock is held
+        * case b: Read/write error clearing the page uptodate status
+        * case c: Truncation in progress (page locked)
+        * case d: Reclaim in progress
+        *
+        * Case a, the page will be up to date when the page is unlocked.
+        *    There is no need to serialise on the page lock here as the page
+        *    is pinned so the lock gives no additional protection. Even if the
+        *    the page is truncated, the data is still valid if PageUptodate as
+        *    it's a race vs truncate race.
+        * Case b, the page will not be up to date
+        * Case c, the page may be truncated but in itself, the data may still
+        *    be valid after IO completes as it's a read vs truncate race. The
+        *    operation must restart if the page is not uptodate on unlock but
+        *    otherwise serialising on page lock to stabilise the mapping gives
+        *    no additional guarantees to the caller as the page lock is
+        *    released before return.
+        * Case d, similar to truncation. If reclaim holds the page lock, it
+        *    will be a race with remove_mapping that determines if the mapping
+        *    is valid on unlock but otherwise the data is valid and there is
+        *    no need to serialise with page lock.
+        *
+        * As the page lock gives no additional guarantee, we optimistically
+        * wait on the page to be unlocked and check if it's up to date and
+        * use the page if it is. Otherwise, the page lock is required to
+        * distinguish between the different cases. The motivation is that we
+        * avoid spurious serialisations and wakeups when multiple processes
+        * wait on the same page for IO to complete.
+        */
+       wait_on_page_locked(page);
        if (PageUptodate(page))
                goto out;
 
+       /* Distinguish between all the cases under the safety of the lock */
        lock_page(page);
+
+       /* Case c or d, restart the operation */
        if (!page->mapping) {
                unlock_page(page);
-               page_cache_release(page);
-               goto retry;
+               put_page(page);
+               goto repeat;
        }
+
+       /* Someone else locked and filled the page in a very small window */
        if (PageUptodate(page)) {
                unlock_page(page);
                goto out;
        }
-       err = filler(data, page);
-       if (err < 0) {
-               page_cache_release(page);
-               return ERR_PTR(err);
-       } else {
-               page = wait_on_page_read(page);
-               if (IS_ERR(page))
-                       return page;
-       }
+       goto filler;
+
 out:
        mark_page_accessed(page);
        return page;
@@ -2489,7 +2511,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
        struct iov_iter data;
 
        write_len = iov_iter_count(from);
-       end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT;
+       end = (pos + write_len - 1) >> PAGE_SHIFT;
 
        written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
        if (written)
@@ -2503,7 +2525,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
         */
        if (mapping->nrpages) {
                written = invalidate_inode_pages2_range(mapping,
-                                       pos >> PAGE_CACHE_SHIFT, end);
+                                       pos >> PAGE_SHIFT, end);
                /*
                 * If a page can not be invalidated, return 0 to fall back
                 * to buffered write.
@@ -2528,7 +2550,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
         */
        if (mapping->nrpages) {
                invalidate_inode_pages2_range(mapping,
-                                             pos >> PAGE_CACHE_SHIFT, end);
+                                             pos >> PAGE_SHIFT, end);
        }
 
        if (written > 0) {
@@ -2589,8 +2611,8 @@ ssize_t generic_perform_write(struct file *file,
                size_t copied;          /* Bytes copied from user */
                void *fsdata;
 
-               offset = (pos & (PAGE_CACHE_SIZE - 1));
-               bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
+               offset = (pos & (PAGE_SIZE - 1));
+               bytes = min_t(unsigned long, PAGE_SIZE - offset,
                                                iov_iter_count(i));
 
 again:
@@ -2643,7 +2665,7 @@ again:
                         * because not all segments in the iov can be copied at
                         * once without a pagefault.
                         */
-                       bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
+                       bytes = min_t(unsigned long, PAGE_SIZE - offset,
                                                iov_iter_single_seg_count(i));
                        goto again;
                }
@@ -2730,8 +2752,8 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
                        iocb->ki_pos = endbyte + 1;
                        written += status;
                        invalidate_mapping_pages(mapping,
-                                                pos >> PAGE_CACHE_SHIFT,
-                                                endbyte >> PAGE_CACHE_SHIFT);
+                                                pos >> PAGE_SHIFT,
+                                                endbyte >> PAGE_SHIFT);
                } else {
                        /*
                         * We don't know how much we wrote, so just return