#include "xfs_bmap_btree.h"
#include "xfs_error.h"
#include "xfs_dir2.h"
+#include "xfs_rmap_item.h"
+#include "xfs_buf_item.h"
+#include "xfs_refcount_item.h"
+#include "xfs_bmap_item.h"
#define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1)
SHUTDOWN_META_IO_ERROR);
}
}
+
+ /*
+ * On v5 supers, a bli could be attached to update the metadata LSN.
+ * Clean it up.
+ */
+ if (bp->b_fspriv)
+ xfs_buf_item_relse(bp);
+ ASSERT(bp->b_fspriv == NULL);
+
bp->b_iodone = NULL;
xfs_buf_ioend(bp);
}
case XFS_LI_QUOTAOFF:
case XFS_LI_EFD:
case XFS_LI_EFI:
+ case XFS_LI_RUI:
+ case XFS_LI_RUD:
+ case XFS_LI_CUI:
+ case XFS_LI_CUD:
+ case XFS_LI_BUI:
+ case XFS_LI_BUD:
trace_xfs_log_recover_item_reorder_tail(log,
trans, item, pass);
list_move_tail(&item->ri_list, &inode_list);
case XFS_ABTC_CRC_MAGIC:
case XFS_ABTB_MAGIC:
case XFS_ABTC_MAGIC:
+ case XFS_RMAP_CRC_MAGIC:
+ case XFS_REFC_CRC_MAGIC:
case XFS_IBT_CRC_MAGIC:
case XFS_IBT_MAGIC: {
struct xfs_btree_block *btb = blk;
xlog_recover_validate_buf_type(
struct xfs_mount *mp,
struct xfs_buf *bp,
- xfs_buf_log_format_t *buf_f)
+ xfs_buf_log_format_t *buf_f,
+ xfs_lsn_t current_lsn)
{
struct xfs_da_blkinfo *info = bp->b_addr;
__uint32_t magic32;
__uint16_t magic16;
__uint16_t magicda;
+ char *warnmsg = NULL;
/*
* We can only do post recovery validation on items on CRC enabled
case XFS_BMAP_MAGIC:
bp->b_ops = &xfs_bmbt_buf_ops;
break;
+ case XFS_RMAP_CRC_MAGIC:
+ bp->b_ops = &xfs_rmapbt_buf_ops;
+ break;
+ case XFS_REFC_CRC_MAGIC:
+ bp->b_ops = &xfs_refcountbt_buf_ops;
+ break;
default:
- xfs_warn(mp, "Bad btree block magic!");
- ASSERT(0);
+ warnmsg = "Bad btree block magic!";
break;
}
break;
case XFS_BLFT_AGF_BUF:
if (magic32 != XFS_AGF_MAGIC) {
- xfs_warn(mp, "Bad AGF block magic!");
- ASSERT(0);
+ warnmsg = "Bad AGF block magic!";
break;
}
bp->b_ops = &xfs_agf_buf_ops;
break;
case XFS_BLFT_AGFL_BUF:
if (magic32 != XFS_AGFL_MAGIC) {
- xfs_warn(mp, "Bad AGFL block magic!");
- ASSERT(0);
+ warnmsg = "Bad AGFL block magic!";
break;
}
bp->b_ops = &xfs_agfl_buf_ops;
break;
case XFS_BLFT_AGI_BUF:
if (magic32 != XFS_AGI_MAGIC) {
- xfs_warn(mp, "Bad AGI block magic!");
- ASSERT(0);
+ warnmsg = "Bad AGI block magic!";
break;
}
bp->b_ops = &xfs_agi_buf_ops;
case XFS_BLFT_GDQUOT_BUF:
#ifdef CONFIG_XFS_QUOTA
if (magic16 != XFS_DQUOT_MAGIC) {
- xfs_warn(mp, "Bad DQUOT block magic!");
- ASSERT(0);
+ warnmsg = "Bad DQUOT block magic!";
break;
}
bp->b_ops = &xfs_dquot_buf_ops;
break;
case XFS_BLFT_DINO_BUF:
if (magic16 != XFS_DINODE_MAGIC) {
- xfs_warn(mp, "Bad INODE block magic!");
- ASSERT(0);
+ warnmsg = "Bad INODE block magic!";
break;
}
bp->b_ops = &xfs_inode_buf_ops;
break;
case XFS_BLFT_SYMLINK_BUF:
if (magic32 != XFS_SYMLINK_MAGIC) {
- xfs_warn(mp, "Bad symlink block magic!");
- ASSERT(0);
+ warnmsg = "Bad symlink block magic!";
break;
}
bp->b_ops = &xfs_symlink_buf_ops;
case XFS_BLFT_DIR_BLOCK_BUF:
if (magic32 != XFS_DIR2_BLOCK_MAGIC &&
magic32 != XFS_DIR3_BLOCK_MAGIC) {
- xfs_warn(mp, "Bad dir block magic!");
- ASSERT(0);
+ warnmsg = "Bad dir block magic!";
break;
}
bp->b_ops = &xfs_dir3_block_buf_ops;
case XFS_BLFT_DIR_DATA_BUF:
if (magic32 != XFS_DIR2_DATA_MAGIC &&
magic32 != XFS_DIR3_DATA_MAGIC) {
- xfs_warn(mp, "Bad dir data magic!");
- ASSERT(0);
+ warnmsg = "Bad dir data magic!";
break;
}
bp->b_ops = &xfs_dir3_data_buf_ops;
case XFS_BLFT_DIR_FREE_BUF:
if (magic32 != XFS_DIR2_FREE_MAGIC &&
magic32 != XFS_DIR3_FREE_MAGIC) {
- xfs_warn(mp, "Bad dir3 free magic!");
- ASSERT(0);
+ warnmsg = "Bad dir3 free magic!";
break;
}
bp->b_ops = &xfs_dir3_free_buf_ops;
case XFS_BLFT_DIR_LEAF1_BUF:
if (magicda != XFS_DIR2_LEAF1_MAGIC &&
magicda != XFS_DIR3_LEAF1_MAGIC) {
- xfs_warn(mp, "Bad dir leaf1 magic!");
- ASSERT(0);
+ warnmsg = "Bad dir leaf1 magic!";
break;
}
bp->b_ops = &xfs_dir3_leaf1_buf_ops;
case XFS_BLFT_DIR_LEAFN_BUF:
if (magicda != XFS_DIR2_LEAFN_MAGIC &&
magicda != XFS_DIR3_LEAFN_MAGIC) {
- xfs_warn(mp, "Bad dir leafn magic!");
- ASSERT(0);
+ warnmsg = "Bad dir leafn magic!";
break;
}
bp->b_ops = &xfs_dir3_leafn_buf_ops;
case XFS_BLFT_DA_NODE_BUF:
if (magicda != XFS_DA_NODE_MAGIC &&
magicda != XFS_DA3_NODE_MAGIC) {
- xfs_warn(mp, "Bad da node magic!");
- ASSERT(0);
+ warnmsg = "Bad da node magic!";
break;
}
bp->b_ops = &xfs_da3_node_buf_ops;
case XFS_BLFT_ATTR_LEAF_BUF:
if (magicda != XFS_ATTR_LEAF_MAGIC &&
magicda != XFS_ATTR3_LEAF_MAGIC) {
- xfs_warn(mp, "Bad attr leaf magic!");
- ASSERT(0);
+ warnmsg = "Bad attr leaf magic!";
break;
}
bp->b_ops = &xfs_attr3_leaf_buf_ops;
break;
case XFS_BLFT_ATTR_RMT_BUF:
if (magic32 != XFS_ATTR3_RMT_MAGIC) {
- xfs_warn(mp, "Bad attr remote magic!");
- ASSERT(0);
+ warnmsg = "Bad attr remote magic!";
break;
}
bp->b_ops = &xfs_attr3_rmt_buf_ops;
break;
case XFS_BLFT_SB_BUF:
if (magic32 != XFS_SB_MAGIC) {
- xfs_warn(mp, "Bad SB block magic!");
- ASSERT(0);
+ warnmsg = "Bad SB block magic!";
break;
}
bp->b_ops = &xfs_sb_buf_ops;
xfs_blft_from_flags(buf_f));
break;
}
+
+ /*
+ * Nothing else to do in the case of a NULL current LSN as this means
+ * the buffer is more recent than the change in the log and will be
+ * skipped.
+ */
+ if (current_lsn == NULLCOMMITLSN)
+ return;
+
+ if (warnmsg) {
+ xfs_warn(mp, warnmsg);
+ ASSERT(0);
+ }
+
+ /*
+ * We must update the metadata LSN of the buffer as it is written out to
+ * ensure that older transactions never replay over this one and corrupt
+ * the buffer. This can occur if log recovery is interrupted at some
+ * point after the current transaction completes, at which point a
+ * subsequent mount starts recovery from the beginning.
+ *
+ * Write verifiers update the metadata LSN from log items attached to
+ * the buffer. Therefore, initialize a bli purely to carry the LSN to
+ * the verifier. We'll clean it up in our ->iodone() callback.
+ */
+ if (bp->b_ops) {
+ struct xfs_buf_log_item *bip;
+
+ ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone);
+ bp->b_iodone = xlog_recover_iodone;
+ xfs_buf_item_init(bp, mp);
+ bip = bp->b_fspriv;
+ bip->bli_item.li_lsn = current_lsn;
+ }
}
/*
struct xfs_mount *mp,
xlog_recover_item_t *item,
struct xfs_buf *bp,
- xfs_buf_log_format_t *buf_f)
+ xfs_buf_log_format_t *buf_f,
+ xfs_lsn_t current_lsn)
{
int i;
int bit;
/* Shouldn't be any more regions */
ASSERT(i == item->ri_total);
- xlog_recover_validate_buf_type(mp, bp, buf_f);
+ xlog_recover_validate_buf_type(mp, bp, buf_f, current_lsn);
}
/*
if (log->l_quotaoffs_flag & type)
return false;
- xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
+ xlog_recover_do_reg_buffer(mp, item, bp, buf_f, NULLCOMMITLSN);
return true;
}
*/
lsn = xlog_recover_get_buf_lsn(mp, bp);
if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
- xlog_recover_validate_buf_type(mp, bp, buf_f);
+ trace_xfs_log_recover_buf_skip(log, buf_f);
+ xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN);
goto out_release;
}
if (!dirty)
goto out_release;
} else {
- xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
+ xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
}
/*
return 0;
}
+/*
+ * This routine is called to create an in-core extent rmap update
+ * item from the rui format structure which was logged on disk.
+ * It allocates an in-core rui, copies the extents from the format
+ * structure into it, and adds the rui to the AIL with the given
+ * LSN.
+ */
+STATIC int
+xlog_recover_rui_pass2(
+ struct xlog *log,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
+{
+ int error;
+ struct xfs_mount *mp = log->l_mp;
+ struct xfs_rui_log_item *ruip;
+ struct xfs_rui_log_format *rui_formatp;
+
+ rui_formatp = item->ri_buf[0].i_addr;
+
+ ruip = xfs_rui_init(mp, rui_formatp->rui_nextents);
+ error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format);
+ if (error) {
+ xfs_rui_item_free(ruip);
+ return error;
+ }
+ atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents);
+
+ spin_lock(&log->l_ailp->xa_lock);
+ /*
+ * The RUI has two references. One for the RUD and one for RUI to ensure
+ * it makes it into the AIL. Insert the RUI into the AIL directly and
+ * drop the RUI reference. Note that xfs_trans_ail_update() drops the
+ * AIL lock.
+ */
+ xfs_trans_ail_update(log->l_ailp, &ruip->rui_item, lsn);
+ xfs_rui_release(ruip);
+ return 0;
+}
+
+
+/*
+ * This routine is called when an RUD format structure is found in a committed
+ * transaction in the log. Its purpose is to cancel the corresponding RUI if it
+ * was still in the log. To do this it searches the AIL for the RUI with an id
+ * equal to that in the RUD format structure. If we find it we drop the RUD
+ * reference, which removes the RUI from the AIL and frees it.
+ */
+STATIC int
+xlog_recover_rud_pass2(
+ struct xlog *log,
+ struct xlog_recover_item *item)
+{
+ struct xfs_rud_log_format *rud_formatp;
+ struct xfs_rui_log_item *ruip = NULL;
+ struct xfs_log_item *lip;
+ __uint64_t rui_id;
+ struct xfs_ail_cursor cur;
+ struct xfs_ail *ailp = log->l_ailp;
+
+ rud_formatp = item->ri_buf[0].i_addr;
+ ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format));
+ rui_id = rud_formatp->rud_rui_id;
+
+ /*
+ * Search for the RUI with the id in the RUD format structure in the
+ * AIL.
+ */
+ spin_lock(&ailp->xa_lock);
+ lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
+ while (lip != NULL) {
+ if (lip->li_type == XFS_LI_RUI) {
+ ruip = (struct xfs_rui_log_item *)lip;
+ if (ruip->rui_format.rui_id == rui_id) {
+ /*
+ * Drop the RUD reference to the RUI. This
+ * removes the RUI from the AIL and frees it.
+ */
+ spin_unlock(&ailp->xa_lock);
+ xfs_rui_release(ruip);
+ spin_lock(&ailp->xa_lock);
+ break;
+ }
+ }
+ lip = xfs_trans_ail_cursor_next(ailp, &cur);
+ }
+
+ xfs_trans_ail_cursor_done(&cur);
+ spin_unlock(&ailp->xa_lock);
+
+ return 0;
+}
+
+/*
+ * Copy an CUI format buffer from the given buf, and into the destination
+ * CUI format structure. The CUI/CUD items were designed not to need any
+ * special alignment handling.
+ */
+static int
+xfs_cui_copy_format(
+ struct xfs_log_iovec *buf,
+ struct xfs_cui_log_format *dst_cui_fmt)
+{
+ struct xfs_cui_log_format *src_cui_fmt;
+ uint len;
+
+ src_cui_fmt = buf->i_addr;
+ len = xfs_cui_log_format_sizeof(src_cui_fmt->cui_nextents);
+
+ if (buf->i_len == len) {
+ memcpy(dst_cui_fmt, src_cui_fmt, len);
+ return 0;
+ }
+ return -EFSCORRUPTED;
+}
+
+/*
+ * This routine is called to create an in-core extent refcount update
+ * item from the cui format structure which was logged on disk.
+ * It allocates an in-core cui, copies the extents from the format
+ * structure into it, and adds the cui to the AIL with the given
+ * LSN.
+ */
+STATIC int
+xlog_recover_cui_pass2(
+ struct xlog *log,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
+{
+ int error;
+ struct xfs_mount *mp = log->l_mp;
+ struct xfs_cui_log_item *cuip;
+ struct xfs_cui_log_format *cui_formatp;
+
+ cui_formatp = item->ri_buf[0].i_addr;
+
+ cuip = xfs_cui_init(mp, cui_formatp->cui_nextents);
+ error = xfs_cui_copy_format(&item->ri_buf[0], &cuip->cui_format);
+ if (error) {
+ xfs_cui_item_free(cuip);
+ return error;
+ }
+ atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents);
+
+ spin_lock(&log->l_ailp->xa_lock);
+ /*
+ * The CUI has two references. One for the CUD and one for CUI to ensure
+ * it makes it into the AIL. Insert the CUI into the AIL directly and
+ * drop the CUI reference. Note that xfs_trans_ail_update() drops the
+ * AIL lock.
+ */
+ xfs_trans_ail_update(log->l_ailp, &cuip->cui_item, lsn);
+ xfs_cui_release(cuip);
+ return 0;
+}
+
+
+/*
+ * This routine is called when an CUD format structure is found in a committed
+ * transaction in the log. Its purpose is to cancel the corresponding CUI if it
+ * was still in the log. To do this it searches the AIL for the CUI with an id
+ * equal to that in the CUD format structure. If we find it we drop the CUD
+ * reference, which removes the CUI from the AIL and frees it.
+ */
+STATIC int
+xlog_recover_cud_pass2(
+ struct xlog *log,
+ struct xlog_recover_item *item)
+{
+ struct xfs_cud_log_format *cud_formatp;
+ struct xfs_cui_log_item *cuip = NULL;
+ struct xfs_log_item *lip;
+ __uint64_t cui_id;
+ struct xfs_ail_cursor cur;
+ struct xfs_ail *ailp = log->l_ailp;
+
+ cud_formatp = item->ri_buf[0].i_addr;
+ if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format))
+ return -EFSCORRUPTED;
+ cui_id = cud_formatp->cud_cui_id;
+
+ /*
+ * Search for the CUI with the id in the CUD format structure in the
+ * AIL.
+ */
+ spin_lock(&ailp->xa_lock);
+ lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
+ while (lip != NULL) {
+ if (lip->li_type == XFS_LI_CUI) {
+ cuip = (struct xfs_cui_log_item *)lip;
+ if (cuip->cui_format.cui_id == cui_id) {
+ /*
+ * Drop the CUD reference to the CUI. This
+ * removes the CUI from the AIL and frees it.
+ */
+ spin_unlock(&ailp->xa_lock);
+ xfs_cui_release(cuip);
+ spin_lock(&ailp->xa_lock);
+ break;
+ }
+ }
+ lip = xfs_trans_ail_cursor_next(ailp, &cur);
+ }
+
+ xfs_trans_ail_cursor_done(&cur);
+ spin_unlock(&ailp->xa_lock);
+
+ return 0;
+}
+
+/*
+ * Copy an BUI format buffer from the given buf, and into the destination
+ * BUI format structure. The BUI/BUD items were designed not to need any
+ * special alignment handling.
+ */
+static int
+xfs_bui_copy_format(
+ struct xfs_log_iovec *buf,
+ struct xfs_bui_log_format *dst_bui_fmt)
+{
+ struct xfs_bui_log_format *src_bui_fmt;
+ uint len;
+
+ src_bui_fmt = buf->i_addr;
+ len = xfs_bui_log_format_sizeof(src_bui_fmt->bui_nextents);
+
+ if (buf->i_len == len) {
+ memcpy(dst_bui_fmt, src_bui_fmt, len);
+ return 0;
+ }
+ return -EFSCORRUPTED;
+}
+
+/*
+ * This routine is called to create an in-core extent bmap update
+ * item from the bui format structure which was logged on disk.
+ * It allocates an in-core bui, copies the extents from the format
+ * structure into it, and adds the bui to the AIL with the given
+ * LSN.
+ */
+STATIC int
+xlog_recover_bui_pass2(
+ struct xlog *log,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
+{
+ int error;
+ struct xfs_mount *mp = log->l_mp;
+ struct xfs_bui_log_item *buip;
+ struct xfs_bui_log_format *bui_formatp;
+
+ bui_formatp = item->ri_buf[0].i_addr;
+
+ if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS)
+ return -EFSCORRUPTED;
+ buip = xfs_bui_init(mp);
+ error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format);
+ if (error) {
+ xfs_bui_item_free(buip);
+ return error;
+ }
+ atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents);
+
+ spin_lock(&log->l_ailp->xa_lock);
+ /*
+ * The RUI has two references. One for the RUD and one for RUI to ensure
+ * it makes it into the AIL. Insert the RUI into the AIL directly and
+ * drop the RUI reference. Note that xfs_trans_ail_update() drops the
+ * AIL lock.
+ */
+ xfs_trans_ail_update(log->l_ailp, &buip->bui_item, lsn);
+ xfs_bui_release(buip);
+ return 0;
+}
+
+
+/*
+ * This routine is called when an BUD format structure is found in a committed
+ * transaction in the log. Its purpose is to cancel the corresponding BUI if it
+ * was still in the log. To do this it searches the AIL for the BUI with an id
+ * equal to that in the BUD format structure. If we find it we drop the BUD
+ * reference, which removes the BUI from the AIL and frees it.
+ */
+STATIC int
+xlog_recover_bud_pass2(
+ struct xlog *log,
+ struct xlog_recover_item *item)
+{
+ struct xfs_bud_log_format *bud_formatp;
+ struct xfs_bui_log_item *buip = NULL;
+ struct xfs_log_item *lip;
+ __uint64_t bui_id;
+ struct xfs_ail_cursor cur;
+ struct xfs_ail *ailp = log->l_ailp;
+
+ bud_formatp = item->ri_buf[0].i_addr;
+ if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format))
+ return -EFSCORRUPTED;
+ bui_id = bud_formatp->bud_bui_id;
+
+ /*
+ * Search for the BUI with the id in the BUD format structure in the
+ * AIL.
+ */
+ spin_lock(&ailp->xa_lock);
+ lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
+ while (lip != NULL) {
+ if (lip->li_type == XFS_LI_BUI) {
+ buip = (struct xfs_bui_log_item *)lip;
+ if (buip->bui_format.bui_id == bui_id) {
+ /*
+ * Drop the BUD reference to the BUI. This
+ * removes the BUI from the AIL and frees it.
+ */
+ spin_unlock(&ailp->xa_lock);
+ xfs_bui_release(buip);
+ spin_lock(&ailp->xa_lock);
+ break;
+ }
+ }
+ lip = xfs_trans_ail_cursor_next(ailp, &cur);
+ }
+
+ xfs_trans_ail_cursor_done(&cur);
+ spin_unlock(&ailp->xa_lock);
+
+ return 0;
+}
+
/*
* This routine is called when an inode create format structure is found in a
* committed transaction in the log. It's purpose is to initialise the inodes
case XFS_LI_EFI:
case XFS_LI_EFD:
case XFS_LI_QUOTAOFF:
+ case XFS_LI_RUI:
+ case XFS_LI_RUD:
+ case XFS_LI_CUI:
+ case XFS_LI_CUD:
+ case XFS_LI_BUI:
+ case XFS_LI_BUD:
default:
break;
}
case XFS_LI_EFD:
case XFS_LI_DQUOT:
case XFS_LI_ICREATE:
+ case XFS_LI_RUI:
+ case XFS_LI_RUD:
+ case XFS_LI_CUI:
+ case XFS_LI_CUD:
+ case XFS_LI_BUI:
+ case XFS_LI_BUD:
/* nothing to do in pass 1 */
return 0;
default:
return xlog_recover_efi_pass2(log, item, trans->r_lsn);
case XFS_LI_EFD:
return xlog_recover_efd_pass2(log, item);
+ case XFS_LI_RUI:
+ return xlog_recover_rui_pass2(log, item, trans->r_lsn);
+ case XFS_LI_RUD:
+ return xlog_recover_rud_pass2(log, item);
+ case XFS_LI_CUI:
+ return xlog_recover_cui_pass2(log, item, trans->r_lsn);
+ case XFS_LI_CUD:
+ return xlog_recover_cud_pass2(log, item);
+ case XFS_LI_BUI:
+ return xlog_recover_bui_pass2(log, item, trans->r_lsn);
+ case XFS_LI_BUD:
+ return xlog_recover_bud_pass2(log, item);
case XFS_LI_DQUOT:
return xlog_recover_dquot_pass2(log, buffer_list, item,
trans->r_lsn);
xlog_recover_commit_trans(
struct xlog *log,
struct xlog_recover *trans,
- int pass)
+ int pass,
+ struct list_head *buffer_list)
{
int error = 0;
- int error2;
int items_queued = 0;
struct xlog_recover_item *item;
struct xlog_recover_item *next;
- LIST_HEAD (buffer_list);
LIST_HEAD (ra_list);
LIST_HEAD (done_list);
items_queued++;
if (items_queued >= XLOG_RECOVER_COMMIT_QUEUE_MAX) {
error = xlog_recover_items_pass2(log, trans,
- &buffer_list, &ra_list);
+ buffer_list, &ra_list);
list_splice_tail_init(&ra_list, &done_list);
items_queued = 0;
}
if (!list_empty(&ra_list)) {
if (!error)
error = xlog_recover_items_pass2(log, trans,
- &buffer_list, &ra_list);
+ buffer_list, &ra_list);
list_splice_tail_init(&ra_list, &done_list);
}
if (!list_empty(&done_list))
list_splice_init(&done_list, &trans->r_itemq);
- error2 = xfs_buf_delwri_submit(&buffer_list);
- return error ? error : error2;
+ return error;
}
STATIC void
char *dp,
unsigned int len,
unsigned int flags,
- int pass)
+ int pass,
+ struct list_head *buffer_list)
{
int error = 0;
bool freeit = false;
error = xlog_recover_add_to_cont_trans(log, trans, dp, len);
break;
case XLOG_COMMIT_TRANS:
- error = xlog_recover_commit_trans(log, trans, pass);
+ error = xlog_recover_commit_trans(log, trans, pass,
+ buffer_list);
/* success or fail, we are now done with this transaction. */
freeit = true;
break;
struct xlog_op_header *ohead,
char *dp,
char *end,
- int pass)
+ int pass,
+ struct list_head *buffer_list)
{
struct xlog_recover *trans;
unsigned int len;
+ int error;
/* Do we understand who wrote this op? */
if (ohead->oh_clientid != XFS_TRANSACTION &&
return 0;
}
+ /*
+ * The recovered buffer queue is drained only once we know that all
+ * recovery items for the current LSN have been processed. This is
+ * required because:
+ *
+ * - Buffer write submission updates the metadata LSN of the buffer.
+ * - Log recovery skips items with a metadata LSN >= the current LSN of
+ * the recovery item.
+ * - Separate recovery items against the same metadata buffer can share
+ * a current LSN. I.e., consider that the LSN of a recovery item is
+ * defined as the starting LSN of the first record in which its
+ * transaction appears, that a record can hold multiple transactions,
+ * and/or that a transaction can span multiple records.
+ *
+ * In other words, we are allowed to submit a buffer from log recovery
+ * once per current LSN. Otherwise, we may incorrectly skip recovery
+ * items and cause corruption.
+ *
+ * We don't know up front whether buffers are updated multiple times per
+ * LSN. Therefore, track the current LSN of each commit log record as it
+ * is processed and drain the queue when it changes. Use commit records
+ * because they are ordered correctly by the logging code.
+ */
+ if (log->l_recovery_lsn != trans->r_lsn &&
+ ohead->oh_flags & XLOG_COMMIT_TRANS) {
+ error = xfs_buf_delwri_submit(buffer_list);
+ if (error)
+ return error;
+ log->l_recovery_lsn = trans->r_lsn;
+ }
+
return xlog_recovery_process_trans(log, trans, dp, len,
- ohead->oh_flags, pass);
+ ohead->oh_flags, pass, buffer_list);
}
/*
struct hlist_head rhash[],
struct xlog_rec_header *rhead,
char *dp,
- int pass)
+ int pass,
+ struct list_head *buffer_list)
{
struct xlog_op_header *ohead;
char *end;
if (xlog_header_check_recover(log->l_mp, rhead))
return -EIO;
+ trace_xfs_log_recover_record(log, rhead, pass);
while ((dp < end) && num_logops) {
ohead = (struct xlog_op_header *)dp;
/* errors will abort recovery */
error = xlog_recover_process_ophdr(log, rhash, rhead, ohead,
- dp, end, pass);
+ dp, end, pass, buffer_list);
if (error)
return error;
return 0;
}
-/*
- * Process an extent free intent item that was recovered from
- * the log. We need to free the extents that it describes.
- */
+/* Recover the EFI if necessary. */
STATIC int
xlog_recover_process_efi(
- xfs_mount_t *mp,
- xfs_efi_log_item_t *efip)
+ struct xfs_mount *mp,
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip)
{
- xfs_efd_log_item_t *efdp;
- xfs_trans_t *tp;
- int i;
- int error = 0;
- xfs_extent_t *extp;
- xfs_fsblock_t startblock_fsb;
+ struct xfs_efi_log_item *efip;
+ int error;
+
+ /*
+ * Skip EFIs that we've already processed.
+ */
+ efip = container_of(lip, struct xfs_efi_log_item, efi_item);
+ if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags))
+ return 0;
- ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags));
+ spin_unlock(&ailp->xa_lock);
+ error = xfs_efi_recover(mp, efip);
+ spin_lock(&ailp->xa_lock);
+
+ return error;
+}
+
+/* Release the EFI since we're cancelling everything. */
+STATIC void
+xlog_recover_cancel_efi(
+ struct xfs_mount *mp,
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_efi_log_item *efip;
+
+ efip = container_of(lip, struct xfs_efi_log_item, efi_item);
+
+ spin_unlock(&ailp->xa_lock);
+ xfs_efi_release(efip);
+ spin_lock(&ailp->xa_lock);
+}
+
+/* Recover the RUI if necessary. */
+STATIC int
+xlog_recover_process_rui(
+ struct xfs_mount *mp,
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_rui_log_item *ruip;
+ int error;
/*
- * First check the validity of the extents described by the
- * EFI. If any are bad, then assume that all are bad and
- * just toss the EFI.
+ * Skip RUIs that we've already processed.
*/
- for (i = 0; i < efip->efi_format.efi_nextents; i++) {
- extp = &(efip->efi_format.efi_extents[i]);
- startblock_fsb = XFS_BB_TO_FSB(mp,
- XFS_FSB_TO_DADDR(mp, extp->ext_start));
- if ((startblock_fsb == 0) ||
- (extp->ext_len == 0) ||
- (startblock_fsb >= mp->m_sb.sb_dblocks) ||
- (extp->ext_len >= mp->m_sb.sb_agblocks)) {
- /*
- * This will pull the EFI from the AIL and
- * free the memory associated with it.
- */
- set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
- xfs_efi_release(efip);
- return -EIO;
- }
- }
+ ruip = container_of(lip, struct xfs_rui_log_item, rui_item);
+ if (test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags))
+ return 0;
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
- if (error)
- return error;
- efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
+ spin_unlock(&ailp->xa_lock);
+ error = xfs_rui_recover(mp, ruip);
+ spin_lock(&ailp->xa_lock);
- for (i = 0; i < efip->efi_format.efi_nextents; i++) {
- extp = &(efip->efi_format.efi_extents[i]);
- error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
- extp->ext_len);
- if (error)
- goto abort_error;
+ return error;
+}
- }
+/* Release the RUI since we're cancelling everything. */
+STATIC void
+xlog_recover_cancel_rui(
+ struct xfs_mount *mp,
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_rui_log_item *ruip;
+
+ ruip = container_of(lip, struct xfs_rui_log_item, rui_item);
+
+ spin_unlock(&ailp->xa_lock);
+ xfs_rui_release(ruip);
+ spin_lock(&ailp->xa_lock);
+}
+
+/* Recover the CUI if necessary. */
+STATIC int
+xlog_recover_process_cui(
+ struct xfs_mount *mp,
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_cui_log_item *cuip;
+ int error;
+
+ /*
+ * Skip CUIs that we've already processed.
+ */
+ cuip = container_of(lip, struct xfs_cui_log_item, cui_item);
+ if (test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags))
+ return 0;
+
+ spin_unlock(&ailp->xa_lock);
+ error = xfs_cui_recover(mp, cuip);
+ spin_lock(&ailp->xa_lock);
- set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
- error = xfs_trans_commit(tp);
return error;
+}
+
+/* Release the CUI since we're cancelling everything. */
+STATIC void
+xlog_recover_cancel_cui(
+ struct xfs_mount *mp,
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_cui_log_item *cuip;
+
+ cuip = container_of(lip, struct xfs_cui_log_item, cui_item);
+
+ spin_unlock(&ailp->xa_lock);
+ xfs_cui_release(cuip);
+ spin_lock(&ailp->xa_lock);
+}
+
+/* Recover the BUI if necessary. */
+STATIC int
+xlog_recover_process_bui(
+ struct xfs_mount *mp,
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_bui_log_item *buip;
+ int error;
+
+ /*
+ * Skip BUIs that we've already processed.
+ */
+ buip = container_of(lip, struct xfs_bui_log_item, bui_item);
+ if (test_bit(XFS_BUI_RECOVERED, &buip->bui_flags))
+ return 0;
+
+ spin_unlock(&ailp->xa_lock);
+ error = xfs_bui_recover(mp, buip);
+ spin_lock(&ailp->xa_lock);
-abort_error:
- xfs_trans_cancel(tp);
return error;
}
+/* Release the BUI since we're cancelling everything. */
+STATIC void
+xlog_recover_cancel_bui(
+ struct xfs_mount *mp,
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_bui_log_item *buip;
+
+ buip = container_of(lip, struct xfs_bui_log_item, bui_item);
+
+ spin_unlock(&ailp->xa_lock);
+ xfs_bui_release(buip);
+ spin_lock(&ailp->xa_lock);
+}
+
+/* Is this log item a deferred action intent? */
+static inline bool xlog_item_is_intent(struct xfs_log_item *lip)
+{
+ switch (lip->li_type) {
+ case XFS_LI_EFI:
+ case XFS_LI_RUI:
+ case XFS_LI_CUI:
+ case XFS_LI_BUI:
+ return true;
+ default:
+ return false;
+ }
+}
+
/*
- * When this is called, all of the EFIs which did not have
- * corresponding EFDs should be in the AIL. What we do now
- * is free the extents associated with each one.
+ * When this is called, all of the log intent items which did not have
+ * corresponding log done items should be in the AIL. What we do now
+ * is update the data structures associated with each one.
*
- * Since we process the EFIs in normal transactions, they
- * will be removed at some point after the commit. This prevents
- * us from just walking down the list processing each one.
- * We'll use a flag in the EFI to skip those that we've already
- * processed and use the AIL iteration mechanism's generation
- * count to try to speed this up at least a bit.
+ * Since we process the log intent items in normal transactions, they
+ * will be removed at some point after the commit. This prevents us
+ * from just walking down the list processing each one. We'll use a
+ * flag in the intent item to skip those that we've already processed
+ * and use the AIL iteration mechanism's generation count to try to
+ * speed this up at least a bit.
*
- * When we start, we know that the EFIs are the only things in
- * the AIL. As we process them, however, other items are added
- * to the AIL. Since everything added to the AIL must come after
- * everything already in the AIL, we stop processing as soon as
- * we see something other than an EFI in the AIL.
+ * When we start, we know that the intents are the only things in the
+ * AIL. As we process them, however, other items are added to the
+ * AIL.
*/
STATIC int
-xlog_recover_process_efis(
+xlog_recover_process_intents(
struct xlog *log)
{
struct xfs_log_item *lip;
- struct xfs_efi_log_item *efip;
int error = 0;
struct xfs_ail_cursor cur;
struct xfs_ail *ailp;
+ xfs_lsn_t last_lsn;
ailp = log->l_ailp;
spin_lock(&ailp->xa_lock);
lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
+ last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
while (lip != NULL) {
/*
- * We're done when we see something other than an EFI.
- * There should be no EFIs left in the AIL now.
+ * We're done when we see something other than an intent.
+ * There should be no intents left in the AIL now.
*/
- if (lip->li_type != XFS_LI_EFI) {
+ if (!xlog_item_is_intent(lip)) {
#ifdef DEBUG
for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur))
- ASSERT(lip->li_type != XFS_LI_EFI);
+ ASSERT(!xlog_item_is_intent(lip));
#endif
break;
}
/*
- * Skip EFIs that we've already processed.
+ * We should never see a redo item with a LSN higher than
+ * the last transaction we found in the log at the start
+ * of recovery.
*/
- efip = container_of(lip, struct xfs_efi_log_item, efi_item);
- if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) {
- lip = xfs_trans_ail_cursor_next(ailp, &cur);
- continue;
- }
+ ASSERT(XFS_LSN_CMP(last_lsn, lip->li_lsn) >= 0);
- spin_unlock(&ailp->xa_lock);
- error = xlog_recover_process_efi(log->l_mp, efip);
- spin_lock(&ailp->xa_lock);
+ switch (lip->li_type) {
+ case XFS_LI_EFI:
+ error = xlog_recover_process_efi(log->l_mp, ailp, lip);
+ break;
+ case XFS_LI_RUI:
+ error = xlog_recover_process_rui(log->l_mp, ailp, lip);
+ break;
+ case XFS_LI_CUI:
+ error = xlog_recover_process_cui(log->l_mp, ailp, lip);
+ break;
+ case XFS_LI_BUI:
+ error = xlog_recover_process_bui(log->l_mp, ailp, lip);
+ break;
+ }
if (error)
goto out;
lip = xfs_trans_ail_cursor_next(ailp, &cur);
}
/*
- * A cancel occurs when the mount has failed and we're bailing out. Release all
- * pending EFIs so they don't pin the AIL.
+ * A cancel occurs when the mount has failed and we're bailing out.
+ * Release all pending log intent items so they don't pin the AIL.
*/
STATIC int
-xlog_recover_cancel_efis(
+xlog_recover_cancel_intents(
struct xlog *log)
{
struct xfs_log_item *lip;
- struct xfs_efi_log_item *efip;
int error = 0;
struct xfs_ail_cursor cur;
struct xfs_ail *ailp;
lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
while (lip != NULL) {
/*
- * We're done when we see something other than an EFI.
- * There should be no EFIs left in the AIL now.
+ * We're done when we see something other than an intent.
+ * There should be no intents left in the AIL now.
*/
- if (lip->li_type != XFS_LI_EFI) {
+ if (!xlog_item_is_intent(lip)) {
#ifdef DEBUG
for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur))
- ASSERT(lip->li_type != XFS_LI_EFI);
+ ASSERT(!xlog_item_is_intent(lip));
#endif
break;
}
- efip = container_of(lip, struct xfs_efi_log_item, efi_item);
-
- spin_unlock(&ailp->xa_lock);
- xfs_efi_release(efip);
- spin_lock(&ailp->xa_lock);
+ switch (lip->li_type) {
+ case XFS_LI_EFI:
+ xlog_recover_cancel_efi(log->l_mp, ailp, lip);
+ break;
+ case XFS_LI_RUI:
+ xlog_recover_cancel_rui(log->l_mp, ailp, lip);
+ break;
+ case XFS_LI_CUI:
+ xlog_recover_cancel_cui(log->l_mp, ailp, lip);
+ break;
+ case XFS_LI_BUI:
+ xlog_recover_cancel_bui(log->l_mp, ailp, lip);
+ break;
+ }
lip = xfs_trans_ail_cursor_next(ailp, &cur);
}
if (error)
goto fail_iput;
+ xfs_iflags_clear(ip, XFS_IRECOVERY);
ASSERT(VFS_I(ip)->i_nlink == 0);
ASSERT(VFS_I(ip)->i_mode != 0);
struct hlist_head rhash[],
struct xlog_rec_header *rhead,
char *dp,
- int pass)
+ int pass,
+ struct list_head *buffer_list)
{
int error;
__le32 crc;
if (error)
return error;
- return xlog_recover_process_data(log, rhash, rhead, dp, pass);
+ return xlog_recover_process_data(log, rhash, rhead, dp, pass,
+ buffer_list);
}
STATIC int
char *offset;
xfs_buf_t *hbp, *dbp;
int error = 0, h_size, h_len;
+ int error2 = 0;
int bblks, split_bblks;
int hblks, split_hblks, wrapped_hblks;
struct hlist_head rhash[XLOG_RHASH_SIZE];
+ LIST_HEAD (buffer_list);
ASSERT(head_blk != tail_blk);
rhead_blk = 0;
}
error = xlog_recover_process(log, rhash, rhead, offset,
- pass);
+ pass, &buffer_list);
if (error)
goto bread_err2;
if (error)
goto bread_err2;
- error = xlog_recover_process(log, rhash, rhead, offset, pass);
+ error = xlog_recover_process(log, rhash, rhead, offset, pass,
+ &buffer_list);
if (error)
goto bread_err2;
bread_err1:
xlog_put_bp(hbp);
+ /*
+ * Submit buffers that have been added from the last record processed,
+ * regardless of error status.
+ */
+ if (!list_empty(&buffer_list))
+ error2 = xfs_buf_delwri_submit(&buffer_list);
+
if (error && first_bad)
*first_bad = rhead_blk;
- return error;
+ return error ? error : error2;
}
/*
xfs_warn(mp, "Failed post-recovery per-ag init: %d", error);
return error;
}
+ mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
xlog_recover_check_summary(log);
*/
if (log->l_flags & XLOG_RECOVERY_NEEDED) {
int error;
- error = xlog_recover_process_efis(log);
+ error = xlog_recover_process_intents(log);
if (error) {
- xfs_alert(log->l_mp, "Failed to recover EFIs");
+ xfs_alert(log->l_mp, "Failed to recover intents");
return error;
}
+
/*
- * Sync the log to get all the EFIs out of the AIL.
+ * Sync the log to get all the intents out of the AIL.
* This isn't absolutely necessary, but it helps in
* case the unlink transactions would have problems
- * pushing the EFIs out of the way.
+ * pushing the intents out of the way.
*/
xfs_log_force(log->l_mp, XFS_LOG_SYNC);
int error = 0;
if (log->l_flags & XLOG_RECOVERY_NEEDED)
- error = xlog_recover_cancel_efis(log);
+ error = xlog_recover_cancel_intents(log);
return error;
}