fs/xfs/libxfs/xfs_bmap_btree.c

   1 /*
   2  * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
   3  * All Rights Reserved.
   4  *
   5  * This program is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU General Public License as
   7  * published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it would be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write the Free Software Foundation,
  16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17  */
  18 #include "xfs.h"
  19 #include "xfs_fs.h"
  20 #include "xfs_shared.h"
  21 #include "xfs_format.h"
  22 #include "xfs_log_format.h"
  23 #include "xfs_trans_resv.h"
  24 #include "xfs_bit.h"
  25 #include "xfs_mount.h"
  26 #include "xfs_defer.h"
  27 #include "xfs_inode.h"
  28 #include "xfs_trans.h"
  29 #include "xfs_inode_item.h"
  30 #include "xfs_alloc.h"
  31 #include "xfs_btree.h"
  32 #include "xfs_bmap_btree.h"
  33 #include "xfs_bmap.h"
  34 #include "xfs_error.h"
  35 #include "xfs_quota.h"
  36 #include "xfs_trace.h"
  37 #include "xfs_cksum.h"
  38 #include "xfs_rmap.h"
  39
  40 /*
  41  * Determine the extent state.
  42  */
  43 /* ARGSUSED */
  44 STATIC xfs_exntst_t
  45 xfs_extent_state(
  46         xfs_filblks_t           blks,
  47         int                     extent_flag)
  48 {
  49         if (extent_flag) {
  50                 ASSERT(blks != 0);      /* saved for DMIG */
  51                 return XFS_EXT_UNWRITTEN;
  52         }
  53         return XFS_EXT_NORM;
  54 }
  55
  56 /*
  57  * Convert on-disk form of btree root to in-memory form.
  58  */
  59 void
  60 xfs_bmdr_to_bmbt(
  61         struct xfs_inode        *ip,
  62         xfs_bmdr_block_t        *dblock,
  63         int                     dblocklen,
  64         struct xfs_btree_block  *rblock,
  65         int                     rblocklen)
  66 {
  67         struct xfs_mount        *mp = ip->i_mount;
  68         int                     dmxr;
  69         xfs_bmbt_key_t          *fkp;
  70         __be64                  *fpp;
  71         xfs_bmbt_key_t          *tkp;
  72         __be64                  *tpp;
  73
  74         if (xfs_sb_version_hascrc(&mp->m_sb))
  75                 xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
  76                                  XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
  77                                  XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
  78         else
  79                 xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
  80                                  XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
  81                                  XFS_BTREE_LONG_PTRS);
  82
  83         rblock->bb_level = dblock->bb_level;
  84         ASSERT(be16_to_cpu(rblock->bb_level) > 0);
  85         rblock->bb_numrecs = dblock->bb_numrecs;
  86         dmxr = xfs_bmdr_maxrecs(dblocklen, 0);
  87         fkp = XFS_BMDR_KEY_ADDR(dblock, 1);
  88         tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
  89         fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
  90         tpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
  91         dmxr = be16_to_cpu(dblock->bb_numrecs);
  92         memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
  93         memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
  94 }
  95
  96 /*
  97  * Convert a compressed bmap extent record to an uncompressed form.
  98  * This code must be in sync with the routines xfs_bmbt_get_startoff,
  99  * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state.
 100  */
 101 STATIC void
 102 __xfs_bmbt_get_all(
 103                 __uint64_t l0,
 104                 __uint64_t l1,
 105                 xfs_bmbt_irec_t *s)
 106 {
 107         int     ext_flag;
 108         xfs_exntst_t st;
 109
 110         ext_flag = (int)(l0 >> (64 - BMBT_EXNTFLAG_BITLEN));
 111         s->br_startoff = ((xfs_fileoff_t)l0 &
 112                            xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
 113         s->br_startblock = (((xfs_fsblock_t)l0 & xfs_mask64lo(9)) << 43) |
 114                            (((xfs_fsblock_t)l1) >> 21);
 115         s->br_blockcount = (xfs_filblks_t)(l1 & xfs_mask64lo(21));
 116         /* This is xfs_extent_state() in-line */
 117         if (ext_flag) {
 118                 ASSERT(s->br_blockcount != 0);  /* saved for DMIG */
 119                 st = XFS_EXT_UNWRITTEN;
 120         } else
 121                 st = XFS_EXT_NORM;
 122         s->br_state = st;
 123 }
 124
 125 void
 126 xfs_bmbt_get_all(
 127         xfs_bmbt_rec_host_t *r,
 128         xfs_bmbt_irec_t *s)
 129 {
 130         __xfs_bmbt_get_all(r->l0, r->l1, s);
 131 }
 132
 133 /*
 134  * Extract the blockcount field from an in memory bmap extent record.
 135  */
 136 xfs_filblks_t
 137 xfs_bmbt_get_blockcount(
 138         xfs_bmbt_rec_host_t     *r)
 139 {
 140         return (xfs_filblks_t)(r->l1 & xfs_mask64lo(21));
 141 }
 142
 143 /*
 144  * Extract the startblock field from an in memory bmap extent record.
 145  */
 146 xfs_fsblock_t
 147 xfs_bmbt_get_startblock(
 148         xfs_bmbt_rec_host_t     *r)
 149 {
 150         return (((xfs_fsblock_t)r->l0 & xfs_mask64lo(9)) << 43) |
 151                (((xfs_fsblock_t)r->l1) >> 21);
 152 }
 153
 154 /*
 155  * Extract the startoff field from an in memory bmap extent record.
 156  */
 157 xfs_fileoff_t
 158 xfs_bmbt_get_startoff(
 159         xfs_bmbt_rec_host_t     *r)
 160 {
 161         return ((xfs_fileoff_t)r->l0 &
 162                  xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
 163 }
 164
 165 xfs_exntst_t
 166 xfs_bmbt_get_state(
 167         xfs_bmbt_rec_host_t     *r)
 168 {
 169         int     ext_flag;
 170
 171         ext_flag = (int)((r->l0) >> (64 - BMBT_EXNTFLAG_BITLEN));
 172         return xfs_extent_state(xfs_bmbt_get_blockcount(r),
 173                                 ext_flag);
 174 }
 175
 176 /*
 177  * Extract the blockcount field from an on disk bmap extent record.
 178  */
 179 xfs_filblks_t
 180 xfs_bmbt_disk_get_blockcount(
 181         xfs_bmbt_rec_t  *r)
 182 {
 183         return (xfs_filblks_t)(be64_to_cpu(r->l1) & xfs_mask64lo(21));
 184 }
 185
 186 /*
 187  * Extract the startoff field from a disk format bmap extent record.
 188  */
 189 xfs_fileoff_t
 190 xfs_bmbt_disk_get_startoff(
 191         xfs_bmbt_rec_t  *r)
 192 {
 193         return ((xfs_fileoff_t)be64_to_cpu(r->l0) &
 194                  xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
 195 }
 196
 197
 198 /*
 199  * Set all the fields in a bmap extent record from the arguments.
 200  */
 201 void
 202 xfs_bmbt_set_allf(
 203         xfs_bmbt_rec_host_t     *r,
 204         xfs_fileoff_t           startoff,
 205         xfs_fsblock_t           startblock,
 206         xfs_filblks_t           blockcount,
 207         xfs_exntst_t            state)
 208 {
 209         int             extent_flag = (state == XFS_EXT_NORM) ? 0 : 1;
 210
 211         ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN);
 212         ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0);
 213         ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0);
 214
 215         ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0);
 216
 217         r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
 218                 ((xfs_bmbt_rec_base_t)startoff << 9) |
 219                 ((xfs_bmbt_rec_base_t)startblock >> 43);
 220         r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) |
 221                 ((xfs_bmbt_rec_base_t)blockcount &
 222                 (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
 223 }
 224
 225 /*
 226  * Set all the fields in a bmap extent record from the uncompressed form.
 227  */
 228 void
 229 xfs_bmbt_set_all(
 230         xfs_bmbt_rec_host_t *r,
 231         xfs_bmbt_irec_t *s)
 232 {
 233         xfs_bmbt_set_allf(r, s->br_startoff, s->br_startblock,
 234                              s->br_blockcount, s->br_state);
 235 }
 236
 237
 238 /*
 239  * Set all the fields in a disk format bmap extent record from the arguments.
 240  */
 241 void
 242 xfs_bmbt_disk_set_allf(
 243         xfs_bmbt_rec_t          *r,
 244         xfs_fileoff_t           startoff,
 245         xfs_fsblock_t           startblock,
 246         xfs_filblks_t           blockcount,
 247         xfs_exntst_t            state)
 248 {
 249         int                     extent_flag = (state == XFS_EXT_NORM) ? 0 : 1;
 250
 251         ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN);
 252         ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0);
 253         ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0);
 254         ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0);
 255
 256         r->l0 = cpu_to_be64(
 257                 ((xfs_bmbt_rec_base_t)extent_flag << 63) |
 258                  ((xfs_bmbt_rec_base_t)startoff << 9) |
 259                  ((xfs_bmbt_rec_base_t)startblock >> 43));
 260         r->l1 = cpu_to_be64(
 261                 ((xfs_bmbt_rec_base_t)startblock << 21) |
 262                  ((xfs_bmbt_rec_base_t)blockcount &
 263                   (xfs_bmbt_rec_base_t)xfs_mask64lo(21)));
 264 }
 265
 266 /*
 267  * Set all the fields in a bmap extent record from the uncompressed form.
 268  */
 269 STATIC void
 270 xfs_bmbt_disk_set_all(
 271         xfs_bmbt_rec_t  *r,
 272         xfs_bmbt_irec_t *s)
 273 {
 274         xfs_bmbt_disk_set_allf(r, s->br_startoff, s->br_startblock,
 275                                   s->br_blockcount, s->br_state);
 276 }
 277
 278 /*
 279  * Set the blockcount field in a bmap extent record.
 280  */
 281 void
 282 xfs_bmbt_set_blockcount(
 283         xfs_bmbt_rec_host_t *r,
 284         xfs_filblks_t   v)
 285 {
 286         ASSERT((v & xfs_mask64hi(43)) == 0);
 287         r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64hi(43)) |
 288                   (xfs_bmbt_rec_base_t)(v & xfs_mask64lo(21));
 289 }
 290
 291 /*
 292  * Set the startblock field in a bmap extent record.
 293  */
 294 void
 295 xfs_bmbt_set_startblock(
 296         xfs_bmbt_rec_host_t *r,
 297         xfs_fsblock_t   v)
 298 {
 299         ASSERT((v & xfs_mask64hi(12)) == 0);
 300         r->l0 = (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64hi(55)) |
 301                   (xfs_bmbt_rec_base_t)(v >> 43);
 302         r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)) |
 303                   (xfs_bmbt_rec_base_t)(v << 21);
 304 }
 305
 306 /*
 307  * Set the startoff field in a bmap extent record.
 308  */
 309 void
 310 xfs_bmbt_set_startoff(
 311         xfs_bmbt_rec_host_t *r,
 312         xfs_fileoff_t   v)
 313 {
 314         ASSERT((v & xfs_mask64hi(9)) == 0);
 315         r->l0 = (r->l0 & (xfs_bmbt_rec_base_t) xfs_mask64hi(1)) |
 316                 ((xfs_bmbt_rec_base_t)v << 9) |
 317                   (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64lo(9));
 318 }
 319
 320 /*
 321  * Set the extent state field in a bmap extent record.
 322  */
 323 void
 324 xfs_bmbt_set_state(
 325         xfs_bmbt_rec_host_t *r,
 326         xfs_exntst_t    v)
 327 {
 328         ASSERT(v == XFS_EXT_NORM || v == XFS_EXT_UNWRITTEN);
 329         if (v == XFS_EXT_NORM)
 330                 r->l0 &= xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN);
 331         else
 332                 r->l0 |= xfs_mask64hi(BMBT_EXNTFLAG_BITLEN);
 333 }
 334
 335 /*
 336  * Convert in-memory form of btree root to on-disk form.
 337  */
 338 void
 339 xfs_bmbt_to_bmdr(
 340         struct xfs_mount        *mp,
 341         struct xfs_btree_block  *rblock,
 342         int                     rblocklen,
 343         xfs_bmdr_block_t        *dblock,
 344         int                     dblocklen)
 345 {
 346         int                     dmxr;
 347         xfs_bmbt_key_t          *fkp;
 348         __be64                  *fpp;
 349         xfs_bmbt_key_t          *tkp;
 350         __be64                  *tpp;
 351
 352         if (xfs_sb_version_hascrc(&mp->m_sb)) {
 353                 ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_CRC_MAGIC));
 354                 ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid,
 355                        &mp->m_sb.sb_meta_uuid));
 356                 ASSERT(rblock->bb_u.l.bb_blkno ==
 357                        cpu_to_be64(XFS_BUF_DADDR_NULL));
 358         } else
 359                 ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC));
 360         ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK));
 361         ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK));
 362         ASSERT(rblock->bb_level != 0);
 363         dblock->bb_level = rblock->bb_level;
 364         dblock->bb_numrecs = rblock->bb_numrecs;
 365         dmxr = xfs_bmdr_maxrecs(dblocklen, 0);
 366         fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
 367         tkp = XFS_BMDR_KEY_ADDR(dblock, 1);
 368         fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
 369         tpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
 370         dmxr = be16_to_cpu(dblock->bb_numrecs);
 371         memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
 372         memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
 373 }
 374
 375 /*
 376  * Check extent records, which have just been read, for
 377  * any bit in the extent flag field. ASSERT on debug
 378  * kernels, as this condition should not occur.
 379  * Return an error condition (1) if any flags found,
 380  * otherwise return 0.
 381  */
 382
 383 int
 384 xfs_check_nostate_extents(
 385         xfs_ifork_t             *ifp,
 386         xfs_extnum_t            idx,
 387         xfs_extnum_t            num)
 388 {
 389         for (; num > 0; num--, idx++) {
 390                 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
 391                 if ((ep->l0 >>
 392                      (64 - BMBT_EXNTFLAG_BITLEN)) != 0) {
 393                         ASSERT(0);
 394                         return 1;
 395                 }
 396         }
 397         return 0;
 398 }
 399
 400
 401 STATIC struct xfs_btree_cur *
 402 xfs_bmbt_dup_cursor(
 403         struct xfs_btree_cur    *cur)
 404 {
 405         struct xfs_btree_cur    *new;
 406
 407         new = xfs_bmbt_init_cursor(cur->bc_mp, cur->bc_tp,
 408                         cur->bc_private.b.ip, cur->bc_private.b.whichfork);
 409
 410         /*
 411          * Copy the firstblock, dfops, and flags values,
 412          * since init cursor doesn't get them.
 413          */
 414         new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
 415         new->bc_private.b.dfops = cur->bc_private.b.dfops;
 416         new->bc_private.b.flags = cur->bc_private.b.flags;
 417
 418         return new;
 419 }
 420
 421 STATIC void
 422 xfs_bmbt_update_cursor(
 423         struct xfs_btree_cur    *src,
 424         struct xfs_btree_cur    *dst)
 425 {
 426         ASSERT((dst->bc_private.b.firstblock != NULLFSBLOCK) ||
 427                (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME));
 428         ASSERT(dst->bc_private.b.dfops == src->bc_private.b.dfops);
 429
 430         dst->bc_private.b.allocated += src->bc_private.b.allocated;
 431         dst->bc_private.b.firstblock = src->bc_private.b.firstblock;
 432
 433         src->bc_private.b.allocated = 0;
 434 }
 435
 436 STATIC int
 437 xfs_bmbt_alloc_block(
 438         struct xfs_btree_cur    *cur,
 439         union xfs_btree_ptr     *start,
 440         union xfs_btree_ptr     *new,
 441         int                     *stat)
 442 {
 443         xfs_alloc_arg_t         args;           /* block allocation args */
 444         int                     error;          /* error return value */
 445
 446         memset(&args, 0, sizeof(args));
 447         args.tp = cur->bc_tp;
 448         args.mp = cur->bc_mp;
 449         args.fsbno = cur->bc_private.b.firstblock;
 450         args.firstblock = args.fsbno;
 451         xfs_rmap_ino_bmbt_owner(&args.oinfo, cur->bc_private.b.ip->i_ino,
 452                         cur->bc_private.b.whichfork);
 453
 454         if (args.fsbno == NULLFSBLOCK) {
 455                 args.fsbno = be64_to_cpu(start->l);
 456 try_another_ag:
 457                 args.type = XFS_ALLOCTYPE_START_BNO;
 458                 /*
 459                  * Make sure there is sufficient room left in the AG to
 460                  * complete a full tree split for an extent insert.  If
 461                  * we are converting the middle part of an extent then
 462                  * we may need space for two tree splits.
 463                  *
 464                  * We are relying on the caller to make the correct block
 465                  * reservation for this operation to succeed.  If the
 466                  * reservation amount is insufficient then we may fail a
 467                  * block allocation here and corrupt the filesystem.
 468                  */
 469                 args.minleft = args.tp->t_blk_res;
 470         } else if (cur->bc_private.b.dfops->dop_low) {
 471                 args.type = XFS_ALLOCTYPE_START_BNO;
 472         } else {
 473                 args.type = XFS_ALLOCTYPE_NEAR_BNO;
 474         }
 475
 476         args.minlen = args.maxlen = args.prod = 1;
 477         args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
 478         if (!args.wasdel && args.tp->t_blk_res == 0) {
 479                 error = -ENOSPC;
 480                 goto error0;
 481         }
 482         error = xfs_alloc_vextent(&args);
 483         if (error)
 484                 goto error0;
 485
 486         /*
 487          * During a CoW operation, the allocation and bmbt updates occur in
 488          * different transactions.  The mapping code tries to put new bmbt
 489          * blocks near extents being mapped, but the only way to guarantee this
 490          * is if the alloc and the mapping happen in a single transaction that
 491          * has a block reservation.  That isn't the case here, so if we run out
 492          * of space we'll try again with another AG.
 493          */
 494         if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
 495             args.fsbno == NULLFSBLOCK &&
 496             args.type == XFS_ALLOCTYPE_NEAR_BNO) {
 497                 cur->bc_private.b.dfops->dop_low = true;
 498                 args.fsbno = cur->bc_private.b.firstblock;
 499                 goto try_another_ag;
 500         }
 501
 502         if (args.fsbno == NULLFSBLOCK && args.minleft) {
 503                 /*
 504                  * Could not find an AG with enough free space to satisfy
 505                  * a full btree split.  Try again without minleft and if
 506                  * successful activate the lowspace algorithm.
 507                  */
 508                 args.fsbno = 0;
 509                 args.type = XFS_ALLOCTYPE_FIRST_AG;
 510                 args.minleft = 0;
 511                 error = xfs_alloc_vextent(&args);
 512                 if (error)
 513                         goto error0;
 514                 cur->bc_private.b.dfops->dop_low = true;
 515         }
 516         if (args.fsbno == NULLFSBLOCK) {
 517                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 518                 *stat = 0;
 519                 return 0;
 520         }
 521         ASSERT(args.len == 1);
 522         cur->bc_private.b.firstblock = args.fsbno;
 523         cur->bc_private.b.allocated++;
 524         cur->bc_private.b.ip->i_d.di_nblocks++;
 525         xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
 526         xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip,
 527                         XFS_TRANS_DQ_BCOUNT, 1L);
 528
 529         new->l = cpu_to_be64(args.fsbno);
 530
 531         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 532         *stat = 1;
 533         return 0;
 534
 535  error0:
 536         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
 537         return error;
 538 }
 539
 540 STATIC int
 541 xfs_bmbt_free_block(
 542         struct xfs_btree_cur    *cur,
 543         struct xfs_buf          *bp)
 544 {
 545         struct xfs_mount        *mp = cur->bc_mp;
 546         struct xfs_inode        *ip = cur->bc_private.b.ip;
 547         struct xfs_trans        *tp = cur->bc_tp;
 548         xfs_fsblock_t           fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
 549         struct xfs_owner_info   oinfo;
 550
 551         xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_private.b.whichfork);
 552         xfs_bmap_add_free(mp, cur->bc_private.b.dfops, fsbno, 1, &oinfo);
 553         ip->i_d.di_nblocks--;
 554
 555         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 556         xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 557         return 0;
 558 }
 559
 560 STATIC int
 561 xfs_bmbt_get_minrecs(
 562         struct xfs_btree_cur    *cur,
 563         int                     level)
 564 {
 565         if (level == cur->bc_nlevels - 1) {
 566                 struct xfs_ifork        *ifp;
 567
 568                 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
 569                                     cur->bc_private.b.whichfork);
 570
 571                 return xfs_bmbt_maxrecs(cur->bc_mp,
 572                                         ifp->if_broot_bytes, level == 0) / 2;
 573         }
 574
 575         return cur->bc_mp->m_bmap_dmnr[level != 0];
 576 }
 577
 578 int
 579 xfs_bmbt_get_maxrecs(
 580         struct xfs_btree_cur    *cur,
 581         int                     level)
 582 {
 583         if (level == cur->bc_nlevels - 1) {
 584                 struct xfs_ifork        *ifp;
 585
 586                 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
 587                                     cur->bc_private.b.whichfork);
 588
 589                 return xfs_bmbt_maxrecs(cur->bc_mp,
 590                                         ifp->if_broot_bytes, level == 0);
 591         }
 592
 593         return cur->bc_mp->m_bmap_dmxr[level != 0];
 594
 595 }
 596
 597 /*
 598  * Get the maximum records we could store in the on-disk format.
 599  *
 600  * For non-root nodes this is equivalent to xfs_bmbt_get_maxrecs, but
 601  * for the root node this checks the available space in the dinode fork
 602  * so that we can resize the in-memory buffer to match it.  After a
 603  * resize to the maximum size this function returns the same value
 604  * as xfs_bmbt_get_maxrecs for the root node, too.
 605  */
 606 STATIC int
 607 xfs_bmbt_get_dmaxrecs(
 608         struct xfs_btree_cur    *cur,
 609         int                     level)
 610 {
 611         if (level != cur->bc_nlevels - 1)
 612                 return cur->bc_mp->m_bmap_dmxr[level != 0];
 613         return xfs_bmdr_maxrecs(cur->bc_private.b.forksize, level == 0);
 614 }
 615
 616 STATIC void
 617 xfs_bmbt_init_key_from_rec(
 618         union xfs_btree_key     *key,
 619         union xfs_btree_rec     *rec)
 620 {
 621         key->bmbt.br_startoff =
 622                 cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec->bmbt));
 623 }
 624
 625 STATIC void
 626 xfs_bmbt_init_rec_from_cur(
 627         struct xfs_btree_cur    *cur,
 628         union xfs_btree_rec     *rec)
 629 {
 630         xfs_bmbt_disk_set_all(&rec->bmbt, &cur->bc_rec.b);
 631 }
 632
 633 STATIC void
 634 xfs_bmbt_init_ptr_from_cur(
 635         struct xfs_btree_cur    *cur,
 636         union xfs_btree_ptr     *ptr)
 637 {
 638         ptr->l = 0;
 639 }
 640
 641 STATIC __int64_t
 642 xfs_bmbt_key_diff(
 643         struct xfs_btree_cur    *cur,
 644         union xfs_btree_key     *key)
 645 {
 646         return (__int64_t)be64_to_cpu(key->bmbt.br_startoff) -
 647                                       cur->bc_rec.b.br_startoff;
 648 }
 649
 650 static bool
 651 xfs_bmbt_verify(
 652         struct xfs_buf          *bp)
 653 {
 654         struct xfs_mount        *mp = bp->b_target->bt_mount;
 655         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
 656         unsigned int            level;
 657
 658         switch (block->bb_magic) {
 659         case cpu_to_be32(XFS_BMAP_CRC_MAGIC):
 660                 if (!xfs_sb_version_hascrc(&mp->m_sb))
 661                         return false;
 662                 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
 663                         return false;
 664                 if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn)
 665                         return false;
 666                 /*
 667                  * XXX: need a better way of verifying the owner here. Right now
 668                  * just make sure there has been one set.
 669                  */
 670                 if (be64_to_cpu(block->bb_u.l.bb_owner) == 0)
 671                         return false;
 672                 /* fall through */
 673         case cpu_to_be32(XFS_BMAP_MAGIC):
 674                 break;
 675         default:
 676                 return false;
 677         }
 678
 679         /*
 680          * numrecs and level verification.
 681          *
 682          * We don't know what fork we belong to, so just verify that the level
 683          * is less than the maximum of the two. Later checks will be more
 684          * precise.
 685          */
 686         level = be16_to_cpu(block->bb_level);
 687         if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1]))
 688                 return false;
 689         if (be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
 690                 return false;
 691
 692         /* sibling pointer verification */
 693         if (!block->bb_u.l.bb_leftsib ||
 694             (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) &&
 695              !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))))
 696                 return false;
 697         if (!block->bb_u.l.bb_rightsib ||
 698             (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) &&
 699              !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))))
 700                 return false;
 701
 702         return true;
 703 }
 704
 705 static void
 706 xfs_bmbt_read_verify(
 707         struct xfs_buf  *bp)
 708 {
 709         if (!xfs_btree_lblock_verify_crc(bp))
 710                 xfs_buf_ioerror(bp, -EFSBADCRC);
 711         else if (!xfs_bmbt_verify(bp))
 712                 xfs_buf_ioerror(bp, -EFSCORRUPTED);
 713
 714         if (bp->b_error) {
 715                 trace_xfs_btree_corrupt(bp, _RET_IP_);
 716                 xfs_verifier_error(bp);
 717         }
 718 }
 719
 720 static void
 721 xfs_bmbt_write_verify(
 722         struct xfs_buf  *bp)
 723 {
 724         if (!xfs_bmbt_verify(bp)) {
 725                 trace_xfs_btree_corrupt(bp, _RET_IP_);
 726                 xfs_buf_ioerror(bp, -EFSCORRUPTED);
 727                 xfs_verifier_error(bp);
 728                 return;
 729         }
 730         xfs_btree_lblock_calc_crc(bp);
 731 }
 732
 733 const struct xfs_buf_ops xfs_bmbt_buf_ops = {
 734         .name = "xfs_bmbt",
 735         .verify_read = xfs_bmbt_read_verify,
 736         .verify_write = xfs_bmbt_write_verify,
 737 };
 738
 739
 740 #if defined(DEBUG) || defined(XFS_WARN)
 741 STATIC int
 742 xfs_bmbt_keys_inorder(
 743         struct xfs_btree_cur    *cur,
 744         union xfs_btree_key     *k1,
 745         union xfs_btree_key     *k2)
 746 {
 747         return be64_to_cpu(k1->bmbt.br_startoff) <
 748                 be64_to_cpu(k2->bmbt.br_startoff);
 749 }
 750
 751 STATIC int
 752 xfs_bmbt_recs_inorder(
 753         struct xfs_btree_cur    *cur,
 754         union xfs_btree_rec     *r1,
 755         union xfs_btree_rec     *r2)
 756 {
 757         return xfs_bmbt_disk_get_startoff(&r1->bmbt) +
 758                 xfs_bmbt_disk_get_blockcount(&r1->bmbt) <=
 759                 xfs_bmbt_disk_get_startoff(&r2->bmbt);
 760 }
 761 #endif  /* DEBUG */
 762
 763 static const struct xfs_btree_ops xfs_bmbt_ops = {
 764         .rec_len                = sizeof(xfs_bmbt_rec_t),
 765         .key_len                = sizeof(xfs_bmbt_key_t),
 766
 767         .dup_cursor             = xfs_bmbt_dup_cursor,
 768         .update_cursor          = xfs_bmbt_update_cursor,
 769         .alloc_block            = xfs_bmbt_alloc_block,
 770         .free_block             = xfs_bmbt_free_block,
 771         .get_maxrecs            = xfs_bmbt_get_maxrecs,
 772         .get_minrecs            = xfs_bmbt_get_minrecs,
 773         .get_dmaxrecs           = xfs_bmbt_get_dmaxrecs,
 774         .init_key_from_rec      = xfs_bmbt_init_key_from_rec,
 775         .init_rec_from_cur      = xfs_bmbt_init_rec_from_cur,
 776         .init_ptr_from_cur      = xfs_bmbt_init_ptr_from_cur,
 777         .key_diff               = xfs_bmbt_key_diff,
 778         .buf_ops                = &xfs_bmbt_buf_ops,
 779 #if defined(DEBUG) || defined(XFS_WARN)
 780         .keys_inorder           = xfs_bmbt_keys_inorder,
 781         .recs_inorder           = xfs_bmbt_recs_inorder,
 782 #endif
 783 };
 784
 785 /*
 786  * Allocate a new bmap btree cursor.
 787  */
 788 struct xfs_btree_cur *                          /* new bmap btree cursor */
 789 xfs_bmbt_init_cursor(
 790         struct xfs_mount        *mp,            /* file system mount point */
 791         struct xfs_trans        *tp,            /* transaction pointer */
 792         struct xfs_inode        *ip,            /* inode owning the btree */
 793         int                     whichfork)      /* data or attr fork */
 794 {
 795         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
 796         struct xfs_btree_cur    *cur;
 797         ASSERT(whichfork != XFS_COW_FORK);
 798
 799         cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
 800
 801         cur->bc_tp = tp;
 802         cur->bc_mp = mp;
 803         cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
 804         cur->bc_btnum = XFS_BTNUM_BMAP;
 805         cur->bc_blocklog = mp->m_sb.sb_blocklog;
 806
 807         cur->bc_ops = &xfs_bmbt_ops;
 808         cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE;
 809         if (xfs_sb_version_hascrc(&mp->m_sb))
 810                 cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
 811
 812         cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
 813         cur->bc_private.b.ip = ip;
 814         cur->bc_private.b.firstblock = NULLFSBLOCK;
 815         cur->bc_private.b.dfops = NULL;
 816         cur->bc_private.b.allocated = 0;
 817         cur->bc_private.b.flags = 0;
 818         cur->bc_private.b.whichfork = whichfork;
 819
 820         return cur;
 821 }
 822
 823 /*
 824  * Calculate number of records in a bmap btree block.
 825  */
 826 int
 827 xfs_bmbt_maxrecs(
 828         struct xfs_mount        *mp,
 829         int                     blocklen,
 830         int                     leaf)
 831 {
 832         blocklen -= XFS_BMBT_BLOCK_LEN(mp);
 833
 834         if (leaf)
 835                 return blocklen / sizeof(xfs_bmbt_rec_t);
 836         return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t));
 837 }
 838
 839 /*
 840  * Calculate number of records in a bmap btree inode root.
 841  */
 842 int
 843 xfs_bmdr_maxrecs(
 844         int                     blocklen,
 845         int                     leaf)
 846 {
 847         blocklen -= sizeof(xfs_bmdr_block_t);
 848
 849         if (leaf)
 850                 return blocklen / sizeof(xfs_bmdr_rec_t);
 851         return blocklen / (sizeof(xfs_bmdr_key_t) + sizeof(xfs_bmdr_ptr_t));
 852 }
 853
 854 /*
 855  * Change the owner of a btree format fork fo the inode passed in. Change it to
 856  * the owner of that is passed in so that we can change owners before or after
 857  * we switch forks between inodes. The operation that the caller is doing will
 858  * determine whether is needs to change owner before or after the switch.
 859  *
 860  * For demand paged transactional modification, the fork switch should be done
 861  * after reading in all the blocks, modifying them and pinning them in the
 862  * transaction. For modification when the buffers are already pinned in memory,
 863  * the fork switch can be done before changing the owner as we won't need to
 864  * validate the owner until the btree buffers are unpinned and writes can occur
 865  * again.
 866  *
 867  * For recovery based ownership change, there is no transactional context and
 868  * so a buffer list must be supplied so that we can record the buffers that we
 869  * modified for the caller to issue IO on.
 870  */
 871 int
 872 xfs_bmbt_change_owner(
 873         struct xfs_trans        *tp,
 874         struct xfs_inode        *ip,
 875         int                     whichfork,
 876         xfs_ino_t               new_owner,
 877         struct list_head        *buffer_list)
 878 {
 879         struct xfs_btree_cur    *cur;
 880         int                     error;
 881
 882         ASSERT(tp || buffer_list);
 883         ASSERT(!(tp && buffer_list));
 884         if (whichfork == XFS_DATA_FORK)
 885                 ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_BTREE);
 886         else
 887                 ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE);
 888
 889         cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork);
 890         if (!cur)
 891                 return -ENOMEM;
 892
 893         error = xfs_btree_change_owner(cur, new_owner, buffer_list);
 894         xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
 895         return error;
 896 }