xfs: propagate bmap updates to rmapbt
authorDarrick J. Wong <darrick.wong@oracle.com>
Wed, 3 Aug 2016 02:16:05 +0000 (12:16 +1000)
committerDave Chinner <david@fromorbit.com>
Wed, 3 Aug 2016 02:16:05 +0000 (12:16 +1000)
When we map, unmap, or convert an extent in a file's data or attr
fork, schedule a respective update in the rmapbt.  Previous versions
of this patch required a 1:1 correspondence between bmap and rmap,
but this is no longer true as we now have ability to make interval
queries against the rmapbt.

We use the deferred operations code to handle redo operations
atomically and deadlock free.  This plumbs in all five rmap actions
(map, unmap, convert extent, alloc, free); we'll use the first three
now for file data, and reflink will want the last two.  We also add
an error injection site to test log recovery.

Finally, we need to fix the bmap shift extent code to adjust the
rmaps correctly.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/libxfs/xfs_rmap.c
fs/xfs/libxfs/xfs_rmap.h
fs/xfs/xfs_bmap_util.c
fs/xfs/xfs_error.h
fs/xfs/xfs_rmap_item.c
fs/xfs/xfs_trans.h
fs/xfs/xfs_trans_rmap.c

index c4b8921..b060bca 100644 (file)
@@ -2179,6 +2179,11 @@ xfs_bmap_add_extent_delay_real(
                ASSERT(0);
        }
 
+       /* add reverse mapping */
+       error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, whichfork, new);
+       if (error)
+               goto done;
+
        /* convert to a btree if necessary */
        if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
                int     tmp_logflags;   /* partial log flag return val */
@@ -2715,6 +2720,11 @@ xfs_bmap_add_extent_unwritten_real(
                ASSERT(0);
        }
 
+       /* update reverse mappings */
+       error = xfs_rmap_convert_extent(mp, dfops, ip, XFS_DATA_FORK, new);
+       if (error)
+               goto done;
+
        /* convert to a btree if necessary */
        if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) {
                int     tmp_logflags;   /* partial log flag return val */
@@ -3107,6 +3117,11 @@ xfs_bmap_add_extent_hole_real(
                break;
        }
 
+       /* add reverse mapping */
+       error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, whichfork, new);
+       if (error)
+               goto done;
+
        /* convert to a btree if necessary */
        if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
                int     tmp_logflags;   /* partial log flag return val */
@@ -5034,6 +5049,14 @@ xfs_bmap_del_extent(
                ++*idx;
                break;
        }
+
+       /* remove reverse mapping */
+       if (!delay) {
+               error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, del);
+               if (error)
+                       goto done;
+       }
+
        /*
         * If we need to, add to list of extents to delete.
         */
@@ -5573,7 +5596,8 @@ xfs_bmse_shift_one(
        struct xfs_bmbt_rec_host        *gotp,
        struct xfs_btree_cur            *cur,
        int                             *logflags,
-       enum shift_direction            direction)
+       enum shift_direction            direction,
+       struct xfs_defer_ops            *dfops)
 {
        struct xfs_ifork                *ifp;
        struct xfs_mount                *mp;
@@ -5621,9 +5645,13 @@ xfs_bmse_shift_one(
                /* check whether to merge the extent or shift it down */
                if (xfs_bmse_can_merge(&adj_irec, &got,
                                       offset_shift_fsb)) {
-                       return xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
-                                             *current_ext, gotp, adj_irecp,
-                                             cur, logflags);
+                       error = xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
+                                              *current_ext, gotp, adj_irecp,
+                                              cur, logflags);
+                       if (error)
+                               return error;
+                       adj_irec = got;
+                       goto update_rmap;
                }
        } else {
                startoff = got.br_startoff + offset_shift_fsb;
@@ -5660,9 +5688,10 @@ update_current_ext:
                (*current_ext)--;
        xfs_bmbt_set_startoff(gotp, startoff);
        *logflags |= XFS_ILOG_CORE;
+       adj_irec = got;
        if (!cur) {
                *logflags |= XFS_ILOG_DEXT;
-               return 0;
+               goto update_rmap;
        }
 
        error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
@@ -5672,8 +5701,18 @@ update_current_ext:
        XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 
        got.br_startoff = startoff;
-       return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock,
-                              got.br_blockcount, got.br_state);
+       error = xfs_bmbt_update(cur, got.br_startoff, got.br_startblock,
+                       got.br_blockcount, got.br_state);
+       if (error)
+               return error;
+
+update_rmap:
+       /* update reverse mapping */
+       error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, &adj_irec);
+       if (error)
+               return error;
+       adj_irec.br_startoff = startoff;
+       return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &adj_irec);
 }
 
 /*
@@ -5801,7 +5840,7 @@ xfs_bmap_shift_extents(
        while (nexts++ < num_exts) {
                error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
                                           &current_ext, gotp, cur, &logflags,
-                                          direction);
+                                          direction, dfops);
                if (error)
                        goto del_cursor;
                /*
index e8ce97f..73d0540 100644 (file)
@@ -36,6 +36,8 @@
 #include "xfs_trace.h"
 #include "xfs_error.h"
 #include "xfs_extent_busy.h"
+#include "xfs_bmap.h"
+#include "xfs_inode.h"
 
 /*
  * Lookup the first record less than or equal to [bno, len, owner, offset]
@@ -1138,3 +1140,260 @@ xfs_rmap_query_range(
        return xfs_btree_query_range(cur, &low_brec, &high_brec,
                        xfs_rmap_query_range_helper, &query);
 }
+
+/* Clean up after calling xfs_rmap_finish_one. */
+void
+xfs_rmap_finish_one_cleanup(
+       struct xfs_trans        *tp,
+       struct xfs_btree_cur    *rcur,
+       int                     error)
+{
+       struct xfs_buf          *agbp;
+
+       if (rcur == NULL)
+               return;
+       agbp = rcur->bc_private.a.agbp;
+       xfs_btree_del_cursor(rcur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+       if (error)
+               xfs_trans_brelse(tp, agbp);
+}
+
+/*
+ * Process one of the deferred rmap operations.  We pass back the
+ * btree cursor to maintain our lock on the rmapbt between calls.
+ * This saves time and eliminates a buffer deadlock between the
+ * superblock and the AGF because we'll always grab them in the same
+ * order.
+ */
+int
+xfs_rmap_finish_one(
+       struct xfs_trans                *tp,
+       enum xfs_rmap_intent_type       type,
+       __uint64_t                      owner,
+       int                             whichfork,
+       xfs_fileoff_t                   startoff,
+       xfs_fsblock_t                   startblock,
+       xfs_filblks_t                   blockcount,
+       xfs_exntst_t                    state,
+       struct xfs_btree_cur            **pcur)
+{
+       struct xfs_mount                *mp = tp->t_mountp;
+       struct xfs_btree_cur            *rcur;
+       struct xfs_buf                  *agbp = NULL;
+       int                             error = 0;
+       xfs_agnumber_t                  agno;
+       struct xfs_owner_info           oinfo;
+       xfs_agblock_t                   bno;
+       bool                            unwritten;
+
+       agno = XFS_FSB_TO_AGNO(mp, startblock);
+       ASSERT(agno != NULLAGNUMBER);
+       bno = XFS_FSB_TO_AGBNO(mp, startblock);
+
+       trace_xfs_rmap_deferred(mp, agno, type, bno, owner, whichfork,
+                       startoff, blockcount, state);
+
+       if (XFS_TEST_ERROR(false, mp,
+                       XFS_ERRTAG_RMAP_FINISH_ONE,
+                       XFS_RANDOM_RMAP_FINISH_ONE))
+               return -EIO;
+
+       /*
+        * If we haven't gotten a cursor or the cursor AG doesn't match
+        * the startblock, get one now.
+        */
+       rcur = *pcur;
+       if (rcur != NULL && rcur->bc_private.a.agno != agno) {
+               xfs_rmap_finish_one_cleanup(tp, rcur, 0);
+               rcur = NULL;
+               *pcur = NULL;
+       }
+       if (rcur == NULL) {
+               /*
+                * Refresh the freelist before we start changing the
+                * rmapbt, because a shape change could cause us to
+                * allocate blocks.
+                */
+               error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
+               if (error)
+                       return error;
+               if (!agbp)
+                       return -EFSCORRUPTED;
+
+               rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
+               if (!rcur) {
+                       error = -ENOMEM;
+                       goto out_cur;
+               }
+       }
+       *pcur = rcur;
+
+       xfs_rmap_ino_owner(&oinfo, owner, whichfork, startoff);
+       unwritten = state == XFS_EXT_UNWRITTEN;
+       bno = XFS_FSB_TO_AGBNO(rcur->bc_mp, startblock);
+
+       switch (type) {
+       case XFS_RMAP_ALLOC:
+       case XFS_RMAP_MAP:
+               error = xfs_rmap_map(rcur, bno, blockcount, unwritten, &oinfo);
+               break;
+       case XFS_RMAP_FREE:
+       case XFS_RMAP_UNMAP:
+               error = xfs_rmap_unmap(rcur, bno, blockcount, unwritten,
+                               &oinfo);
+               break;
+       case XFS_RMAP_CONVERT:
+               error = xfs_rmap_convert(rcur, bno, blockcount, !unwritten,
+                               &oinfo);
+               break;
+       default:
+               ASSERT(0);
+               error = -EFSCORRUPTED;
+       }
+       return error;
+
+out_cur:
+       xfs_trans_brelse(tp, agbp);
+
+       return error;
+}
+
+/*
+ * Don't defer an rmap if we aren't an rmap filesystem.
+ */
+static bool
+xfs_rmap_update_is_needed(
+       struct xfs_mount        *mp)
+{
+       return xfs_sb_version_hasrmapbt(&mp->m_sb);
+}
+
+/*
+ * Record a rmap intent; the list is kept sorted first by AG and then by
+ * increasing age.
+ */
+static int
+__xfs_rmap_add(
+       struct xfs_mount                *mp,
+       struct xfs_defer_ops            *dfops,
+       enum xfs_rmap_intent_type       type,
+       __uint64_t                      owner,
+       int                             whichfork,
+       struct xfs_bmbt_irec            *bmap)
+{
+       struct xfs_rmap_intent  *ri;
+
+       trace_xfs_rmap_defer(mp, XFS_FSB_TO_AGNO(mp, bmap->br_startblock),
+                       type,
+                       XFS_FSB_TO_AGBNO(mp, bmap->br_startblock),
+                       owner, whichfork,
+                       bmap->br_startoff,
+                       bmap->br_blockcount,
+                       bmap->br_state);
+
+       ri = kmem_alloc(sizeof(struct xfs_rmap_intent), KM_SLEEP | KM_NOFS);
+       INIT_LIST_HEAD(&ri->ri_list);
+       ri->ri_type = type;
+       ri->ri_owner = owner;
+       ri->ri_whichfork = whichfork;
+       ri->ri_bmap = *bmap;
+
+       xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_RMAP, &ri->ri_list);
+       return 0;
+}
+
+/* Map an extent into a file. */
+int
+xfs_rmap_map_extent(
+       struct xfs_mount        *mp,
+       struct xfs_defer_ops    *dfops,
+       struct xfs_inode        *ip,
+       int                     whichfork,
+       struct xfs_bmbt_irec    *PREV)
+{
+       if (!xfs_rmap_update_is_needed(mp))
+               return 0;
+
+       return __xfs_rmap_add(mp, dfops, XFS_RMAP_MAP, ip->i_ino,
+                       whichfork, PREV);
+}
+
+/* Unmap an extent out of a file. */
+int
+xfs_rmap_unmap_extent(
+       struct xfs_mount        *mp,
+       struct xfs_defer_ops    *dfops,
+       struct xfs_inode        *ip,
+       int                     whichfork,
+       struct xfs_bmbt_irec    *PREV)
+{
+       if (!xfs_rmap_update_is_needed(mp))
+               return 0;
+
+       return __xfs_rmap_add(mp, dfops, XFS_RMAP_UNMAP, ip->i_ino,
+                       whichfork, PREV);
+}
+
+/* Convert a data fork extent from unwritten to real or vice versa. */
+int
+xfs_rmap_convert_extent(
+       struct xfs_mount        *mp,
+       struct xfs_defer_ops    *dfops,
+       struct xfs_inode        *ip,
+       int                     whichfork,
+       struct xfs_bmbt_irec    *PREV)
+{
+       if (!xfs_rmap_update_is_needed(mp))
+               return 0;
+
+       return __xfs_rmap_add(mp, dfops, XFS_RMAP_CONVERT, ip->i_ino,
+                       whichfork, PREV);
+}
+
+/* Schedule the creation of an rmap for non-file data. */
+int
+xfs_rmap_alloc_extent(
+       struct xfs_mount        *mp,
+       struct xfs_defer_ops    *dfops,
+       xfs_agnumber_t          agno,
+       xfs_agblock_t           bno,
+       xfs_extlen_t            len,
+       __uint64_t              owner)
+{
+       struct xfs_bmbt_irec    bmap;
+
+       if (!xfs_rmap_update_is_needed(mp))
+               return 0;
+
+       bmap.br_startblock = XFS_AGB_TO_FSB(mp, agno, bno);
+       bmap.br_blockcount = len;
+       bmap.br_startoff = 0;
+       bmap.br_state = XFS_EXT_NORM;
+
+       return __xfs_rmap_add(mp, dfops, XFS_RMAP_ALLOC, owner,
+                       XFS_DATA_FORK, &bmap);
+}
+
+/* Schedule the deletion of an rmap for non-file data. */
+int
+xfs_rmap_free_extent(
+       struct xfs_mount        *mp,
+       struct xfs_defer_ops    *dfops,
+       xfs_agnumber_t          agno,
+       xfs_agblock_t           bno,
+       xfs_extlen_t            len,
+       __uint64_t              owner)
+{
+       struct xfs_bmbt_irec    bmap;
+
+       if (!xfs_rmap_update_is_needed(mp))
+               return 0;
+
+       bmap.br_startblock = XFS_AGB_TO_FSB(mp, agno, bno);
+       bmap.br_blockcount = len;
+       bmap.br_startoff = 0;
+       bmap.br_state = XFS_EXT_NORM;
+
+       return __xfs_rmap_add(mp, dfops, XFS_RMAP_FREE, owner,
+                       XFS_DATA_FORK, &bmap);
+}
index c4b1133..71cf99a 100644 (file)
@@ -182,4 +182,28 @@ struct xfs_rmap_intent {
        struct xfs_bmbt_irec                    ri_bmap;
 };
 
+/* functions for updating the rmapbt based on bmbt map/unmap operations */
+int xfs_rmap_map_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
+               struct xfs_inode *ip, int whichfork,
+               struct xfs_bmbt_irec *imap);
+int xfs_rmap_unmap_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
+               struct xfs_inode *ip, int whichfork,
+               struct xfs_bmbt_irec *imap);
+int xfs_rmap_convert_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
+               struct xfs_inode *ip, int whichfork,
+               struct xfs_bmbt_irec *imap);
+int xfs_rmap_alloc_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
+               xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
+               __uint64_t owner);
+int xfs_rmap_free_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
+               xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
+               __uint64_t owner);
+
+void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp,
+               struct xfs_btree_cur *rcur, int error);
+int xfs_rmap_finish_one(struct xfs_trans *tp, enum xfs_rmap_intent_type type,
+               __uint64_t owner, int whichfork, xfs_fileoff_t startoff,
+               xfs_fsblock_t startblock, xfs_filblks_t blockcount,
+               xfs_exntst_t state, struct xfs_btree_cur **pcur);
+
 #endif /* __XFS_RMAP_H__ */
index 3bbe46b..a819d7b 100644 (file)
@@ -41,6 +41,7 @@
 #include "xfs_trace.h"
 #include "xfs_icache.h"
 #include "xfs_log.h"
+#include "xfs_rmap_btree.h"
 
 /* Kernel only BMAP related definitions and functions */
 
index da6f951..3d22470 100644 (file)
@@ -91,7 +91,8 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
 #define XFS_ERRTAG_DIOWRITE_IOERR                      20
 #define XFS_ERRTAG_BMAPIFORMAT                         21
 #define XFS_ERRTAG_FREE_EXTENT                         22
-#define XFS_ERRTAG_MAX                                 23
+#define XFS_ERRTAG_RMAP_FINISH_ONE                     23
+#define XFS_ERRTAG_MAX                                 24
 
 /*
  * Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
@@ -119,6 +120,7 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
 #define XFS_RANDOM_DIOWRITE_IOERR                      (XFS_RANDOM_DEFAULT/10)
 #define        XFS_RANDOM_BMAPIFORMAT                          XFS_RANDOM_DEFAULT
 #define XFS_RANDOM_FREE_EXTENT                         1
+#define XFS_RANDOM_RMAP_FINISH_ONE                     1
 
 #ifdef DEBUG
 extern int xfs_error_test_active;
index fecd1e4..6d6cc3b 100644 (file)
 #include "xfs_trans_resv.h"
 #include "xfs_bit.h"
 #include "xfs_mount.h"
+#include "xfs_defer.h"
 #include "xfs_trans.h"
 #include "xfs_trans_priv.h"
 #include "xfs_buf_item.h"
 #include "xfs_rmap_item.h"
 #include "xfs_log.h"
+#include "xfs_rmap.h"
 
 
 kmem_zone_t    *xfs_rui_zone;
@@ -473,6 +475,12 @@ xfs_rui_recover(
        struct xfs_map_extent           *rmap;
        xfs_fsblock_t                   startblock_fsb;
        bool                            op_ok;
+       struct xfs_rud_log_item         *rudp;
+       enum xfs_rmap_intent_type       type;
+       int                             whichfork;
+       xfs_exntst_t                    state;
+       struct xfs_trans                *tp;
+       struct xfs_btree_cur            *rcur = NULL;
 
        ASSERT(!test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags));
 
@@ -512,8 +520,53 @@ xfs_rui_recover(
                }
        }
 
-       /* XXX: do nothing for now */
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
+       if (error)
+               return error;
+       rudp = xfs_trans_get_rud(tp, ruip, ruip->rui_format.rui_nextents);
+
+       for (i = 0; i < ruip->rui_format.rui_nextents; i++) {
+               rmap = &(ruip->rui_format.rui_extents[i]);
+               state = (rmap->me_flags & XFS_RMAP_EXTENT_UNWRITTEN) ?
+                               XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
+               whichfork = (rmap->me_flags & XFS_RMAP_EXTENT_ATTR_FORK) ?
+                               XFS_ATTR_FORK : XFS_DATA_FORK;
+               switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) {
+               case XFS_RMAP_EXTENT_MAP:
+                       type = XFS_RMAP_MAP;
+                       break;
+               case XFS_RMAP_EXTENT_UNMAP:
+                       type = XFS_RMAP_UNMAP;
+                       break;
+               case XFS_RMAP_EXTENT_CONVERT:
+                       type = XFS_RMAP_CONVERT;
+                       break;
+               case XFS_RMAP_EXTENT_ALLOC:
+                       type = XFS_RMAP_ALLOC;
+                       break;
+               case XFS_RMAP_EXTENT_FREE:
+                       type = XFS_RMAP_FREE;
+                       break;
+               default:
+                       error = -EFSCORRUPTED;
+                       goto abort_error;
+               }
+               error = xfs_trans_log_finish_rmap_update(tp, rudp, type,
+                               rmap->me_owner, whichfork,
+                               rmap->me_startoff, rmap->me_startblock,
+                               rmap->me_len, state, &rcur);
+               if (error)
+                       goto abort_error;
+
+       }
+
+       xfs_rmap_finish_one_cleanup(tp, rcur, error);
        set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags);
-       xfs_rui_release(ruip);
+       error = xfs_trans_commit(tp);
+       return error;
+
+abort_error:
+       xfs_rmap_finish_one_cleanup(tp, rcur, error);
+       xfs_trans_cancel(tp);
        return error;
 }
index 07f4550..bb4b84f 100644 (file)
@@ -35,6 +35,7 @@ struct xfs_dquot_acct;
 struct xfs_busy_extent;
 struct xfs_rud_log_item;
 struct xfs_rui_log_item;
+struct xfs_btree_cur;
 
 typedef struct xfs_log_item {
        struct list_head                li_ail;         /* AIL pointers */
@@ -245,6 +246,6 @@ int xfs_trans_log_finish_rmap_update(struct xfs_trans *tp,
                struct xfs_rud_log_item *rudp, enum xfs_rmap_intent_type type,
                __uint64_t owner, int whichfork, xfs_fileoff_t startoff,
                xfs_fsblock_t startblock, xfs_filblks_t blockcount,
-               xfs_exntst_t state);
+               xfs_exntst_t state, struct xfs_btree_cur **pcur);
 
 #endif /* __XFS_TRANS_H__ */
index baab990..8341476 100644 (file)
@@ -171,14 +171,15 @@ xfs_trans_log_finish_rmap_update(
        xfs_fileoff_t                   startoff,
        xfs_fsblock_t                   startblock,
        xfs_filblks_t                   blockcount,
-       xfs_exntst_t                    state)
+       xfs_exntst_t                    state,
+       struct xfs_btree_cur            **pcur)
 {
        uint                            next_extent;
        struct xfs_map_extent           *rmap;
        int                             error;
 
-       /* XXX: actually finish the rmap update here */
-       error = -EFSCORRUPTED;
+       error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff,
+                       startblock, blockcount, state, pcur);
 
        /*
         * Mark the transaction dirty, even on error. This ensures the
@@ -276,7 +277,8 @@ xfs_rmap_update_finish_item(
                        rmap->ri_bmap.br_startoff,
                        rmap->ri_bmap.br_startblock,
                        rmap->ri_bmap.br_blockcount,
-                       rmap->ri_bmap.br_state);
+                       rmap->ri_bmap.br_state,
+                       (struct xfs_btree_cur **)state);
        kmem_free(rmap);
        return error;
 }
@@ -288,6 +290,9 @@ xfs_rmap_update_finish_cleanup(
        void                    *state,
        int                     error)
 {
+       struct xfs_btree_cur    *rcur = state;
+
+       xfs_rmap_finish_one_cleanup(tp, rcur, error);
 }
 
 /* Abort all pending RUIs. */