xfs: verify btree blocks as they are read from disk
[cascardo/linux.git] / fs / xfs / xfs_bmap.c
index 848ffa7..9ae7aba 100644 (file)
@@ -2437,6 +2437,7 @@ xfs_bmap_btalloc(
         * Normal allocation, done through xfs_alloc_vextent.
         */
        tryagain = isaligned = 0;
+       memset(&args, 0, sizeof(args));
        args.tp = ap->tp;
        args.mp = mp;
        args.fsbno = ap->blkno;
@@ -2661,8 +2662,9 @@ xfs_bmap_btree_to_extents(
        if ((error = xfs_btree_check_lptr(cur, cbno, 1)))
                return error;
 #endif
-       if ((error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp,
-                       XFS_BMAP_BTREE_REF)))
+       error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF,
+                               xfs_bmbt_read_verify);
+       if (error)
                return error;
        cblock = XFS_BUF_TO_BLOCK(cbp);
        if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
@@ -3082,6 +3084,7 @@ xfs_bmap_extents_to_btree(
         * Convert to a btree with two levels, one record in root.
         */
        XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
+       memset(&args, 0, sizeof(args));
        args.tp = tp;
        args.mp = mp;
        args.firstblock = *firstblock;
@@ -3237,6 +3240,7 @@ xfs_bmap_local_to_extents(
                xfs_buf_t       *bp;    /* buffer for extent block */
                xfs_bmbt_rec_host_t *ep;/* extent record pointer */
 
+               memset(&args, 0, sizeof(args));
                args.tp = tp;
                args.mp = ip->i_mount;
                args.firstblock = *firstblock;
@@ -4075,8 +4079,9 @@ xfs_bmap_read_extents(
         * pointer (leftmost) at each level.
         */
        while (level-- > 0) {
-               if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
-                               XFS_BMAP_BTREE_REF)))
+               error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+                               XFS_BMAP_BTREE_REF, xfs_bmbt_read_verify);
+               if (error)
                        return error;
                block = XFS_BUF_TO_BLOCK(bp);
                XFS_WANT_CORRUPTED_GOTO(
@@ -4121,7 +4126,8 @@ xfs_bmap_read_extents(
                 */
                nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
                if (nextbno != NULLFSBLOCK)
-                       xfs_btree_reada_bufl(mp, nextbno, 1);
+                       xfs_btree_reada_bufl(mp, nextbno, 1,
+                                            xfs_bmbt_read_verify);
                /*
                 * Copy records into the extent records.
                 */
@@ -4153,8 +4159,9 @@ xfs_bmap_read_extents(
                 */
                if (bno == NULLFSBLOCK)
                        break;
-               if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
-                               XFS_BMAP_BTREE_REF)))
+               error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+                               XFS_BMAP_BTREE_REF, xfs_bmbt_read_verify);
+               if (error)
                        return error;
                block = XFS_BUF_TO_BLOCK(bp);
        }
@@ -4616,12 +4623,11 @@ xfs_bmapi_delay(
 
 
 STATIC int
-xfs_bmapi_allocate(
-       struct xfs_bmalloca     *bma,
-       int                     flags)
+__xfs_bmapi_allocate(
+       struct xfs_bmalloca     *bma)
 {
        struct xfs_mount        *mp = bma->ip->i_mount;
-       int                     whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
+       int                     whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ?
                                                XFS_ATTR_FORK : XFS_DATA_FORK;
        struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
        int                     tmp_logflags = 0;
@@ -4654,24 +4660,27 @@ xfs_bmapi_allocate(
         * Indicate if this is the first user data in the file, or just any
         * user data.
         */
-       if (!(flags & XFS_BMAPI_METADATA)) {
+       if (!(bma->flags & XFS_BMAPI_METADATA)) {
                bma->userdata = (bma->offset == 0) ?
                        XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
        }
 
-       bma->minlen = (flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
+       bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
 
        /*
         * Only want to do the alignment at the eof if it is userdata and
         * allocation length is larger than a stripe unit.
         */
        if (mp->m_dalign && bma->length >= mp->m_dalign &&
-           !(flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
+           !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
                error = xfs_bmap_isaeof(bma, whichfork);
                if (error)
                        return error;
        }
 
+       if (bma->flags & XFS_BMAPI_STACK_SWITCH)
+               bma->stack_switch = 1;
+
        error = xfs_bmap_alloc(bma);
        if (error)
                return error;
@@ -4706,7 +4715,7 @@ xfs_bmapi_allocate(
         * A wasdelay extent has been initialized, so shouldn't be flagged
         * as unwritten.
         */
-       if (!bma->wasdel && (flags & XFS_BMAPI_PREALLOC) &&
+       if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
            xfs_sb_version_hasextflgbit(&mp->m_sb))
                bma->got.br_state = XFS_EXT_UNWRITTEN;
 
@@ -4734,6 +4743,45 @@ xfs_bmapi_allocate(
        return 0;
 }
 
+static void
+xfs_bmapi_allocate_worker(
+       struct work_struct      *work)
+{
+       struct xfs_bmalloca     *args = container_of(work,
+                                               struct xfs_bmalloca, work);
+       unsigned long           pflags;
+
+       /* we are in a transaction context here */
+       current_set_flags_nested(&pflags, PF_FSTRANS);
+
+       args->result = __xfs_bmapi_allocate(args);
+       complete(args->done);
+
+       current_restore_flags_nested(&pflags, PF_FSTRANS);
+}
+
+/*
+ * Some allocation requests often come in with little stack to work on. Push
+ * them off to a worker thread so there is lots of stack to use. Otherwise just
+ * call directly to avoid the context switch overhead here.
+ */
+int
+xfs_bmapi_allocate(
+       struct xfs_bmalloca     *args)
+{
+       DECLARE_COMPLETION_ONSTACK(done);
+
+       if (!args->stack_switch)
+               return __xfs_bmapi_allocate(args);
+
+
+       args->done = &done;
+       INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
+       queue_work(xfs_alloc_wq, &args->work);
+       wait_for_completion(&done);
+       return args->result;
+}
+
 STATIC int
 xfs_bmapi_convert_unwritten(
        struct xfs_bmalloca     *bma,
@@ -4919,6 +4967,7 @@ xfs_bmapi_write(
                        bma.conv = !!(flags & XFS_BMAPI_CONVERT);
                        bma.wasdel = wasdelay;
                        bma.offset = bno;
+                       bma.flags = flags;
 
                        /*
                         * There's a 32/64 bit type mismatch between the
@@ -4934,7 +4983,7 @@ xfs_bmapi_write(
 
                        ASSERT(len > 0);
                        ASSERT(bma.length > 0);
-                       error = xfs_bmapi_allocate(&bma, flags);
+                       error = xfs_bmapi_allocate(&bma);
                        if (error)
                                goto error0;
                        if (bma.blkno == NULLFSBLOCK)
@@ -5554,7 +5603,7 @@ xfs_getbmap(
        xfs_ilock(ip, XFS_IOLOCK_SHARED);
        if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
                if (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size) {
-                       error = xfs_flush_pages(ip, 0, -1, 0, FI_REMAPF);
+                       error = -filemap_write_and_wait(VFS_I(ip)->i_mapping);
                        if (error)
                                goto out_unlock_iolock;
                }
@@ -5823,15 +5872,16 @@ xfs_bmap_check_leaf_extents(
         */
        while (level-- > 0) {
                /* See if buf is in cur first */
+               bp_release = 0;
                bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
-               if (bp) {
-                       bp_release = 0;
-               } else {
+               if (!bp) {
                        bp_release = 1;
+                       error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
+                                               XFS_BMAP_BTREE_REF,
+                                               xfs_bmbt_read_verify);
+                       if (error)
+                               goto error_norelse;
                }
-               if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
-                               XFS_BMAP_BTREE_REF)))
-                       goto error_norelse;
                block = XFS_BUF_TO_BLOCK(bp);
                XFS_WANT_CORRUPTED_GOTO(
                        xfs_bmap_sanity_check(mp, bp, level),
@@ -5908,15 +5958,16 @@ xfs_bmap_check_leaf_extents(
                if (bno == NULLFSBLOCK)
                        break;
 
+               bp_release = 0;
                bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
-               if (bp) {
-                       bp_release = 0;
-               } else {
+               if (!bp) {
                        bp_release = 1;
+                       error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
+                                               XFS_BMAP_BTREE_REF,
+                                               xfs_bmbt_read_verify);
+                       if (error)
+                               goto error_norelse;
                }
-               if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
-                               XFS_BMAP_BTREE_REF)))
-                       goto error_norelse;
                block = XFS_BUF_TO_BLOCK(bp);
        }
        if (bp_release) {
@@ -6007,7 +6058,9 @@ xfs_bmap_count_tree(
        struct xfs_btree_block  *block, *nextblock;
        int                     numrecs;
 
-       if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF)))
+       error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF,
+                                               xfs_bmbt_read_verify);
+       if (error)
                return error;
        *count += 1;
        block = XFS_BUF_TO_BLOCK(bp);
@@ -6016,8 +6069,10 @@ xfs_bmap_count_tree(
                /* Not at node above leaves, count this level of nodes */
                nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
                while (nextbno != NULLFSBLOCK) {
-                       if ((error = xfs_btree_read_bufl(mp, tp, nextbno,
-                               0, &nbp, XFS_BMAP_BTREE_REF)))
+                       error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp,
+                                               XFS_BMAP_BTREE_REF,
+                                               xfs_bmbt_read_verify);
+                       if (error)
                                return error;
                        *count += 1;
                        nextblock = XFS_BUF_TO_BLOCK(nbp);
@@ -6046,8 +6101,10 @@ xfs_bmap_count_tree(
                        if (nextbno == NULLFSBLOCK)
                                break;
                        bno = nextbno;
-                       if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
-                               XFS_BMAP_BTREE_REF)))
+                       error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+                                               XFS_BMAP_BTREE_REF,
+                                               xfs_bmbt_read_verify);
+                       if (error)
                                return error;
                        *count += 1;
                        block = XFS_BUF_TO_BLOCK(bp);