[XFS] implement generic xfs_btree_rshift
[cascardo/linux.git] / fs / xfs / xfs_ialloc_btree.c
1 /*
2  * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir2.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir2_sf.h"
34 #include "xfs_attr_sf.h"
35 #include "xfs_dinode.h"
36 #include "xfs_inode.h"
37 #include "xfs_btree.h"
38 #include "xfs_ialloc.h"
39 #include "xfs_alloc.h"
40 #include "xfs_error.h"
41
42 STATIC void xfs_inobt_log_block(xfs_trans_t *, xfs_buf_t *, int);
43 STATIC void xfs_inobt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
44 STATIC void xfs_inobt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
45 STATIC void xfs_inobt_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
46 STATIC int xfs_inobt_lshift(xfs_btree_cur_t *, int, int *);
47 STATIC int xfs_inobt_newroot(xfs_btree_cur_t *, int *);
48 STATIC int xfs_inobt_split(xfs_btree_cur_t *, int, xfs_agblock_t *,
49                 xfs_inobt_key_t *, xfs_btree_cur_t **, int *);
50
51 /*
52  * Single level of the xfs_inobt_delete record deletion routine.
53  * Delete record pointed to by cur/level.
54  * Remove the record from its block then rebalance the tree.
55  * Return 0 for error, 1 for done, 2 to go on to the next level.
56  */
57 STATIC int                              /* error */
58 xfs_inobt_delrec(
59         xfs_btree_cur_t         *cur,   /* btree cursor */
60         int                     level,  /* level removing record from */
61         int                     *stat)  /* fail/done/go-on */
62 {
63         xfs_buf_t               *agbp;  /* buffer for a.g. inode header */
64         xfs_mount_t             *mp;    /* mount structure */
65         xfs_agi_t               *agi;   /* allocation group inode header */
66         xfs_inobt_block_t       *block; /* btree block record/key lives in */
67         xfs_agblock_t           bno;    /* btree block number */
68         xfs_buf_t               *bp;    /* buffer for block */
69         int                     error;  /* error return value */
70         int                     i;      /* loop index */
71         xfs_inobt_key_t         key;    /* kp points here if block is level 0 */
72         xfs_inobt_key_t         *kp = NULL;     /* pointer to btree keys */
73         xfs_agblock_t           lbno;   /* left block's block number */
74         xfs_buf_t               *lbp;   /* left block's buffer pointer */
75         xfs_inobt_block_t       *left;  /* left btree block */
76         xfs_inobt_key_t         *lkp;   /* left block key pointer */
77         xfs_inobt_ptr_t         *lpp;   /* left block address pointer */
78         int                     lrecs = 0;      /* number of records in left block */
79         xfs_inobt_rec_t         *lrp;   /* left block record pointer */
80         xfs_inobt_ptr_t         *pp = NULL;     /* pointer to btree addresses */
81         int                     ptr;    /* index in btree block for this rec */
82         xfs_agblock_t           rbno;   /* right block's block number */
83         xfs_buf_t               *rbp;   /* right block's buffer pointer */
84         xfs_inobt_block_t       *right; /* right btree block */
85         xfs_inobt_key_t         *rkp;   /* right block key pointer */
86         xfs_inobt_rec_t         *rp;    /* pointer to btree records */
87         xfs_inobt_ptr_t         *rpp;   /* right block address pointer */
88         int                     rrecs = 0;      /* number of records in right block */
89         int                     numrecs;
90         xfs_inobt_rec_t         *rrp;   /* right block record pointer */
91         xfs_btree_cur_t         *tcur;  /* temporary btree cursor */
92
93         mp = cur->bc_mp;
94
95         /*
96          * Get the index of the entry being deleted, check for nothing there.
97          */
98         ptr = cur->bc_ptrs[level];
99         if (ptr == 0) {
100                 *stat = 0;
101                 return 0;
102         }
103
104         /*
105          * Get the buffer & block containing the record or key/ptr.
106          */
107         bp = cur->bc_bufs[level];
108         block = XFS_BUF_TO_INOBT_BLOCK(bp);
109 #ifdef DEBUG
110         if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
111                 return error;
112 #endif
113         /*
114          * Fail if we're off the end of the block.
115          */
116
117         numrecs = be16_to_cpu(block->bb_numrecs);
118         if (ptr > numrecs) {
119                 *stat = 0;
120                 return 0;
121         }
122         /*
123          * It's a nonleaf.  Excise the key and ptr being deleted, by
124          * sliding the entries past them down one.
125          * Log the changed areas of the block.
126          */
127         if (level > 0) {
128                 kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
129                 pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
130 #ifdef DEBUG
131                 for (i = ptr; i < numrecs; i++) {
132                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i]), level)))
133                                 return error;
134                 }
135 #endif
136                 if (ptr < numrecs) {
137                         memmove(&kp[ptr - 1], &kp[ptr],
138                                 (numrecs - ptr) * sizeof(*kp));
139                         memmove(&pp[ptr - 1], &pp[ptr],
140                                 (numrecs - ptr) * sizeof(*kp));
141                         xfs_inobt_log_keys(cur, bp, ptr, numrecs - 1);
142                         xfs_inobt_log_ptrs(cur, bp, ptr, numrecs - 1);
143                 }
144         }
145         /*
146          * It's a leaf.  Excise the record being deleted, by sliding the
147          * entries past it down one.  Log the changed areas of the block.
148          */
149         else {
150                 rp = XFS_INOBT_REC_ADDR(block, 1, cur);
151                 if (ptr < numrecs) {
152                         memmove(&rp[ptr - 1], &rp[ptr],
153                                 (numrecs - ptr) * sizeof(*rp));
154                         xfs_inobt_log_recs(cur, bp, ptr, numrecs - 1);
155                 }
156                 /*
157                  * If it's the first record in the block, we'll need a key
158                  * structure to pass up to the next level (updkey).
159                  */
160                 if (ptr == 1) {
161                         key.ir_startino = rp->ir_startino;
162                         kp = &key;
163                 }
164         }
165         /*
166          * Decrement and log the number of entries in the block.
167          */
168         numrecs--;
169         block->bb_numrecs = cpu_to_be16(numrecs);
170         xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
171         /*
172          * Is this the root level?  If so, we're almost done.
173          */
174         if (level == cur->bc_nlevels - 1) {
175                 /*
176                  * If this is the root level,
177                  * and there's only one entry left,
178                  * and it's NOT the leaf level,
179                  * then we can get rid of this level.
180                  */
181                 if (numrecs == 1 && level > 0) {
182                         agbp = cur->bc_private.a.agbp;
183                         agi = XFS_BUF_TO_AGI(agbp);
184                         /*
185                          * pp is still set to the first pointer in the block.
186                          * Make it the new root of the btree.
187                          */
188                         bno = be32_to_cpu(agi->agi_root);
189                         agi->agi_root = *pp;
190                         be32_add_cpu(&agi->agi_level, -1);
191                         /*
192                          * Free the block.
193                          */
194                         if ((error = xfs_free_extent(cur->bc_tp,
195                                 XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, bno), 1)))
196                                 return error;
197                         xfs_trans_binval(cur->bc_tp, bp);
198                         xfs_ialloc_log_agi(cur->bc_tp, agbp,
199                                 XFS_AGI_ROOT | XFS_AGI_LEVEL);
200                         /*
201                          * Update the cursor so there's one fewer level.
202                          */
203                         cur->bc_bufs[level] = NULL;
204                         cur->bc_nlevels--;
205                 } else if (level > 0 &&
206                            (error = xfs_btree_decrement(cur, level, &i)))
207                         return error;
208                 *stat = 1;
209                 return 0;
210         }
211         /*
212          * If we deleted the leftmost entry in the block, update the
213          * key values above us in the tree.
214          */
215         if (ptr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)kp, level + 1)))
216                 return error;
217         /*
218          * If the number of records remaining in the block is at least
219          * the minimum, we're done.
220          */
221         if (numrecs >= XFS_INOBT_BLOCK_MINRECS(level, cur)) {
222                 if (level > 0 &&
223                     (error = xfs_btree_decrement(cur, level, &i)))
224                         return error;
225                 *stat = 1;
226                 return 0;
227         }
228         /*
229          * Otherwise, we have to move some records around to keep the
230          * tree balanced.  Look at the left and right sibling blocks to
231          * see if we can re-balance by moving only one record.
232          */
233         rbno = be32_to_cpu(block->bb_rightsib);
234         lbno = be32_to_cpu(block->bb_leftsib);
235         bno = NULLAGBLOCK;
236         ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
237         /*
238          * Duplicate the cursor so our btree manipulations here won't
239          * disrupt the next level up.
240          */
241         if ((error = xfs_btree_dup_cursor(cur, &tcur)))
242                 return error;
243         /*
244          * If there's a right sibling, see if it's ok to shift an entry
245          * out of it.
246          */
247         if (rbno != NULLAGBLOCK) {
248                 /*
249                  * Move the temp cursor to the last entry in the next block.
250                  * Actually any entry but the first would suffice.
251                  */
252                 i = xfs_btree_lastrec(tcur, level);
253                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
254                 if ((error = xfs_btree_increment(tcur, level, &i)))
255                         goto error0;
256                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
257                 i = xfs_btree_lastrec(tcur, level);
258                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
259                 /*
260                  * Grab a pointer to the block.
261                  */
262                 rbp = tcur->bc_bufs[level];
263                 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
264 #ifdef DEBUG
265                 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
266                         goto error0;
267 #endif
268                 /*
269                  * Grab the current block number, for future use.
270                  */
271                 bno = be32_to_cpu(right->bb_leftsib);
272                 /*
273                  * If right block is full enough so that removing one entry
274                  * won't make it too empty, and left-shifting an entry out
275                  * of right to us works, we're done.
276                  */
277                 if (be16_to_cpu(right->bb_numrecs) - 1 >=
278                      XFS_INOBT_BLOCK_MINRECS(level, cur)) {
279                         if ((error = xfs_inobt_lshift(tcur, level, &i)))
280                                 goto error0;
281                         if (i) {
282                                 ASSERT(be16_to_cpu(block->bb_numrecs) >=
283                                        XFS_INOBT_BLOCK_MINRECS(level, cur));
284                                 xfs_btree_del_cursor(tcur,
285                                                      XFS_BTREE_NOERROR);
286                                 if (level > 0 &&
287                                     (error = xfs_btree_decrement(cur, level,
288                                                 &i)))
289                                         return error;
290                                 *stat = 1;
291                                 return 0;
292                         }
293                 }
294                 /*
295                  * Otherwise, grab the number of records in right for
296                  * future reference, and fix up the temp cursor to point
297                  * to our block again (last record).
298                  */
299                 rrecs = be16_to_cpu(right->bb_numrecs);
300                 if (lbno != NULLAGBLOCK) {
301                         xfs_btree_firstrec(tcur, level);
302                         if ((error = xfs_btree_decrement(tcur, level, &i)))
303                                 goto error0;
304                 }
305         }
306         /*
307          * If there's a left sibling, see if it's ok to shift an entry
308          * out of it.
309          */
310         if (lbno != NULLAGBLOCK) {
311                 /*
312                  * Move the temp cursor to the first entry in the
313                  * previous block.
314                  */
315                 xfs_btree_firstrec(tcur, level);
316                 if ((error = xfs_btree_decrement(tcur, level, &i)))
317                         goto error0;
318                 xfs_btree_firstrec(tcur, level);
319                 /*
320                  * Grab a pointer to the block.
321                  */
322                 lbp = tcur->bc_bufs[level];
323                 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
324 #ifdef DEBUG
325                 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
326                         goto error0;
327 #endif
328                 /*
329                  * Grab the current block number, for future use.
330                  */
331                 bno = be32_to_cpu(left->bb_rightsib);
332                 /*
333                  * If left block is full enough so that removing one entry
334                  * won't make it too empty, and right-shifting an entry out
335                  * of left to us works, we're done.
336                  */
337                 if (be16_to_cpu(left->bb_numrecs) - 1 >=
338                      XFS_INOBT_BLOCK_MINRECS(level, cur)) {
339                         if ((error = xfs_btree_rshift(tcur, level, &i)))
340                                 goto error0;
341                         if (i) {
342                                 ASSERT(be16_to_cpu(block->bb_numrecs) >=
343                                        XFS_INOBT_BLOCK_MINRECS(level, cur));
344                                 xfs_btree_del_cursor(tcur,
345                                                      XFS_BTREE_NOERROR);
346                                 if (level == 0)
347                                         cur->bc_ptrs[0]++;
348                                 *stat = 1;
349                                 return 0;
350                         }
351                 }
352                 /*
353                  * Otherwise, grab the number of records in right for
354                  * future reference.
355                  */
356                 lrecs = be16_to_cpu(left->bb_numrecs);
357         }
358         /*
359          * Delete the temp cursor, we're done with it.
360          */
361         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
362         /*
363          * If here, we need to do a join to keep the tree balanced.
364          */
365         ASSERT(bno != NULLAGBLOCK);
366         /*
367          * See if we can join with the left neighbor block.
368          */
369         if (lbno != NULLAGBLOCK &&
370             lrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
371                 /*
372                  * Set "right" to be the starting block,
373                  * "left" to be the left neighbor.
374                  */
375                 rbno = bno;
376                 right = block;
377                 rrecs = be16_to_cpu(right->bb_numrecs);
378                 rbp = bp;
379                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
380                                 cur->bc_private.a.agno, lbno, 0, &lbp,
381                                 XFS_INO_BTREE_REF)))
382                         return error;
383                 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
384                 lrecs = be16_to_cpu(left->bb_numrecs);
385                 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
386                         return error;
387         }
388         /*
389          * If that won't work, see if we can join with the right neighbor block.
390          */
391         else if (rbno != NULLAGBLOCK &&
392                  rrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
393                 /*
394                  * Set "left" to be the starting block,
395                  * "right" to be the right neighbor.
396                  */
397                 lbno = bno;
398                 left = block;
399                 lrecs = be16_to_cpu(left->bb_numrecs);
400                 lbp = bp;
401                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
402                                 cur->bc_private.a.agno, rbno, 0, &rbp,
403                                 XFS_INO_BTREE_REF)))
404                         return error;
405                 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
406                 rrecs = be16_to_cpu(right->bb_numrecs);
407                 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
408                         return error;
409         }
410         /*
411          * Otherwise, we can't fix the imbalance.
412          * Just return.  This is probably a logic error, but it's not fatal.
413          */
414         else {
415                 if (level > 0 && (error = xfs_btree_decrement(cur, level, &i)))
416                         return error;
417                 *stat = 1;
418                 return 0;
419         }
420         /*
421          * We're now going to join "left" and "right" by moving all the stuff
422          * in "right" to "left" and deleting "right".
423          */
424         if (level > 0) {
425                 /*
426                  * It's a non-leaf.  Move keys and pointers.
427                  */
428                 lkp = XFS_INOBT_KEY_ADDR(left, lrecs + 1, cur);
429                 lpp = XFS_INOBT_PTR_ADDR(left, lrecs + 1, cur);
430                 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
431                 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
432 #ifdef DEBUG
433                 for (i = 0; i < rrecs; i++) {
434                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
435                                 return error;
436                 }
437 #endif
438                 memcpy(lkp, rkp, rrecs * sizeof(*lkp));
439                 memcpy(lpp, rpp, rrecs * sizeof(*lpp));
440                 xfs_inobt_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
441                 xfs_inobt_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
442         } else {
443                 /*
444                  * It's a leaf.  Move records.
445                  */
446                 lrp = XFS_INOBT_REC_ADDR(left, lrecs + 1, cur);
447                 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
448                 memcpy(lrp, rrp, rrecs * sizeof(*lrp));
449                 xfs_inobt_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
450         }
451         /*
452          * If we joined with the left neighbor, set the buffer in the
453          * cursor to the left block, and fix up the index.
454          */
455         if (bp != lbp) {
456                 xfs_btree_setbuf(cur, level, lbp);
457                 cur->bc_ptrs[level] += lrecs;
458         }
459         /*
460          * If we joined with the right neighbor and there's a level above
461          * us, increment the cursor at that level.
462          */
463         else if (level + 1 < cur->bc_nlevels &&
464                  (error = xfs_btree_increment(cur, level + 1, &i)))
465                 return error;
466         /*
467          * Fix up the number of records in the surviving block.
468          */
469         lrecs += rrecs;
470         left->bb_numrecs = cpu_to_be16(lrecs);
471         /*
472          * Fix up the right block pointer in the surviving block, and log it.
473          */
474         left->bb_rightsib = right->bb_rightsib;
475         xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
476         /*
477          * If there is a right sibling now, make it point to the
478          * remaining block.
479          */
480         if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
481                 xfs_inobt_block_t       *rrblock;
482                 xfs_buf_t               *rrbp;
483
484                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
485                                 cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0,
486                                 &rrbp, XFS_INO_BTREE_REF)))
487                         return error;
488                 rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
489                 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
490                         return error;
491                 rrblock->bb_leftsib = cpu_to_be32(lbno);
492                 xfs_inobt_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
493         }
494         /*
495          * Free the deleting block.
496          */
497         if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp,
498                                      cur->bc_private.a.agno, rbno), 1)))
499                 return error;
500         xfs_trans_binval(cur->bc_tp, rbp);
501         /*
502          * Readjust the ptr at this level if it's not a leaf, since it's
503          * still pointing at the deletion point, which makes the cursor
504          * inconsistent.  If this makes the ptr 0, the caller fixes it up.
505          * We can't use decrement because it would change the next level up.
506          */
507         if (level > 0)
508                 cur->bc_ptrs[level]--;
509         /*
510          * Return value means the next level up has something to do.
511          */
512         *stat = 2;
513         return 0;
514
515 error0:
516         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
517         return error;
518 }
519
520 /*
521  * Insert one record/level.  Return information to the caller
522  * allowing the next level up to proceed if necessary.
523  */
524 STATIC int                              /* error */
525 xfs_inobt_insrec(
526         xfs_btree_cur_t         *cur,   /* btree cursor */
527         int                     level,  /* level to insert record at */
528         xfs_agblock_t           *bnop,  /* i/o: block number inserted */
529         xfs_inobt_rec_t         *recp,  /* i/o: record data inserted */
530         xfs_btree_cur_t         **curp, /* output: new cursor replacing cur */
531         int                     *stat)  /* success/failure */
532 {
533         xfs_inobt_block_t       *block; /* btree block record/key lives in */
534         xfs_buf_t               *bp;    /* buffer for block */
535         int                     error;  /* error return value */
536         int                     i;      /* loop index */
537         xfs_inobt_key_t         key;    /* key value being inserted */
538         xfs_inobt_key_t         *kp=NULL;       /* pointer to btree keys */
539         xfs_agblock_t           nbno;   /* block number of allocated block */
540         xfs_btree_cur_t         *ncur;  /* new cursor to be used at next lvl */
541         xfs_inobt_key_t         nkey;   /* new key value, from split */
542         xfs_inobt_rec_t         nrec;   /* new record value, for caller */
543         int                     numrecs;
544         int                     optr;   /* old ptr value */
545         xfs_inobt_ptr_t         *pp;    /* pointer to btree addresses */
546         int                     ptr;    /* index in btree block for this rec */
547         xfs_inobt_rec_t         *rp=NULL;       /* pointer to btree records */
548
549         /*
550          * GCC doesn't understand the (arguably complex) control flow in
551          * this function and complains about uninitialized structure fields
552          * without this.
553          */
554         memset(&nrec, 0, sizeof(nrec));
555
556         /*
557          * If we made it to the root level, allocate a new root block
558          * and we're done.
559          */
560         if (level >= cur->bc_nlevels) {
561                 error = xfs_inobt_newroot(cur, &i);
562                 *bnop = NULLAGBLOCK;
563                 *stat = i;
564                 return error;
565         }
566         /*
567          * Make a key out of the record data to be inserted, and save it.
568          */
569         key.ir_startino = recp->ir_startino;
570         optr = ptr = cur->bc_ptrs[level];
571         /*
572          * If we're off the left edge, return failure.
573          */
574         if (ptr == 0) {
575                 *stat = 0;
576                 return 0;
577         }
578         /*
579          * Get pointers to the btree buffer and block.
580          */
581         bp = cur->bc_bufs[level];
582         block = XFS_BUF_TO_INOBT_BLOCK(bp);
583         numrecs = be16_to_cpu(block->bb_numrecs);
584 #ifdef DEBUG
585         if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
586                 return error;
587         /*
588          * Check that the new entry is being inserted in the right place.
589          */
590         if (ptr <= numrecs) {
591                 if (level == 0) {
592                         rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
593                         xfs_btree_check_rec(cur->bc_btnum, recp, rp);
594                 } else {
595                         kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
596                         xfs_btree_check_key(cur->bc_btnum, &key, kp);
597                 }
598         }
599 #endif
600         nbno = NULLAGBLOCK;
601         ncur = NULL;
602         /*
603          * If the block is full, we can't insert the new entry until we
604          * make the block un-full.
605          */
606         if (numrecs == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
607                 /*
608                  * First, try shifting an entry to the right neighbor.
609                  */
610                 if ((error = xfs_btree_rshift(cur, level, &i)))
611                         return error;
612                 if (i) {
613                         /* nothing */
614                 }
615                 /*
616                  * Next, try shifting an entry to the left neighbor.
617                  */
618                 else {
619                         if ((error = xfs_inobt_lshift(cur, level, &i)))
620                                 return error;
621                         if (i) {
622                                 optr = ptr = cur->bc_ptrs[level];
623                         } else {
624                                 /*
625                                  * Next, try splitting the current block
626                                  * in half. If this works we have to
627                                  * re-set our variables because
628                                  * we could be in a different block now.
629                                  */
630                                 if ((error = xfs_inobt_split(cur, level, &nbno,
631                                                 &nkey, &ncur, &i)))
632                                         return error;
633                                 if (i) {
634                                         bp = cur->bc_bufs[level];
635                                         block = XFS_BUF_TO_INOBT_BLOCK(bp);
636 #ifdef DEBUG
637                                         if ((error = xfs_btree_check_sblock(cur,
638                                                         block, level, bp)))
639                                                 return error;
640 #endif
641                                         ptr = cur->bc_ptrs[level];
642                                         nrec.ir_startino = nkey.ir_startino;
643                                 } else {
644                                         /*
645                                          * Otherwise the insert fails.
646                                          */
647                                         *stat = 0;
648                                         return 0;
649                                 }
650                         }
651                 }
652         }
653         /*
654          * At this point we know there's room for our new entry in the block
655          * we're pointing at.
656          */
657         numrecs = be16_to_cpu(block->bb_numrecs);
658         if (level > 0) {
659                 /*
660                  * It's a non-leaf entry.  Make a hole for the new data
661                  * in the key and ptr regions of the block.
662                  */
663                 kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
664                 pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
665 #ifdef DEBUG
666                 for (i = numrecs; i >= ptr; i--) {
667                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level)))
668                                 return error;
669                 }
670 #endif
671                 memmove(&kp[ptr], &kp[ptr - 1],
672                         (numrecs - ptr + 1) * sizeof(*kp));
673                 memmove(&pp[ptr], &pp[ptr - 1],
674                         (numrecs - ptr + 1) * sizeof(*pp));
675                 /*
676                  * Now stuff the new data in, bump numrecs and log the new data.
677                  */
678 #ifdef DEBUG
679                 if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
680                         return error;
681 #endif
682                 kp[ptr - 1] = key;
683                 pp[ptr - 1] = cpu_to_be32(*bnop);
684                 numrecs++;
685                 block->bb_numrecs = cpu_to_be16(numrecs);
686                 xfs_inobt_log_keys(cur, bp, ptr, numrecs);
687                 xfs_inobt_log_ptrs(cur, bp, ptr, numrecs);
688         } else {
689                 /*
690                  * It's a leaf entry.  Make a hole for the new record.
691                  */
692                 rp = XFS_INOBT_REC_ADDR(block, 1, cur);
693                 memmove(&rp[ptr], &rp[ptr - 1],
694                         (numrecs - ptr + 1) * sizeof(*rp));
695                 /*
696                  * Now stuff the new record in, bump numrecs
697                  * and log the new data.
698                  */
699                 rp[ptr - 1] = *recp;
700                 numrecs++;
701                 block->bb_numrecs = cpu_to_be16(numrecs);
702                 xfs_inobt_log_recs(cur, bp, ptr, numrecs);
703         }
704         /*
705          * Log the new number of records in the btree header.
706          */
707         xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
708 #ifdef DEBUG
709         /*
710          * Check that the key/record is in the right place, now.
711          */
712         if (ptr < numrecs) {
713                 if (level == 0)
714                         xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
715                                 rp + ptr);
716                 else
717                         xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
718                                 kp + ptr);
719         }
720 #endif
721         /*
722          * If we inserted at the start of a block, update the parents' keys.
723          */
724         if (optr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, level + 1)))
725                 return error;
726         /*
727          * Return the new block number, if any.
728          * If there is one, give back a record value and a cursor too.
729          */
730         *bnop = nbno;
731         if (nbno != NULLAGBLOCK) {
732                 *recp = nrec;
733                 *curp = ncur;
734         }
735         *stat = 1;
736         return 0;
737 }
738
739 /*
740  * Log header fields from a btree block.
741  */
742 STATIC void
743 xfs_inobt_log_block(
744         xfs_trans_t             *tp,    /* transaction pointer */
745         xfs_buf_t               *bp,    /* buffer containing btree block */
746         int                     fields) /* mask of fields: XFS_BB_... */
747 {
748         int                     first;  /* first byte offset logged */
749         int                     last;   /* last byte offset logged */
750         static const short      offsets[] = {   /* table of offsets */
751                 offsetof(xfs_inobt_block_t, bb_magic),
752                 offsetof(xfs_inobt_block_t, bb_level),
753                 offsetof(xfs_inobt_block_t, bb_numrecs),
754                 offsetof(xfs_inobt_block_t, bb_leftsib),
755                 offsetof(xfs_inobt_block_t, bb_rightsib),
756                 sizeof(xfs_inobt_block_t)
757         };
758
759         xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last);
760         xfs_trans_log_buf(tp, bp, first, last);
761 }
762
763 /*
764  * Log keys from a btree block (nonleaf).
765  */
766 STATIC void
767 xfs_inobt_log_keys(
768         xfs_btree_cur_t         *cur,   /* btree cursor */
769         xfs_buf_t               *bp,    /* buffer containing btree block */
770         int                     kfirst, /* index of first key to log */
771         int                     klast)  /* index of last key to log */
772 {
773         xfs_inobt_block_t       *block; /* btree block to log from */
774         int                     first;  /* first byte offset logged */
775         xfs_inobt_key_t         *kp;    /* key pointer in btree block */
776         int                     last;   /* last byte offset logged */
777
778         block = XFS_BUF_TO_INOBT_BLOCK(bp);
779         kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
780         first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
781         last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
782         xfs_trans_log_buf(cur->bc_tp, bp, first, last);
783 }
784
785 /*
786  * Log block pointer fields from a btree block (nonleaf).
787  */
788 STATIC void
789 xfs_inobt_log_ptrs(
790         xfs_btree_cur_t         *cur,   /* btree cursor */
791         xfs_buf_t               *bp,    /* buffer containing btree block */
792         int                     pfirst, /* index of first pointer to log */
793         int                     plast)  /* index of last pointer to log */
794 {
795         xfs_inobt_block_t       *block; /* btree block to log from */
796         int                     first;  /* first byte offset logged */
797         int                     last;   /* last byte offset logged */
798         xfs_inobt_ptr_t         *pp;    /* block-pointer pointer in btree blk */
799
800         block = XFS_BUF_TO_INOBT_BLOCK(bp);
801         pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
802         first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
803         last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
804         xfs_trans_log_buf(cur->bc_tp, bp, first, last);
805 }
806
807 /*
808  * Log records from a btree block (leaf).
809  */
810 STATIC void
811 xfs_inobt_log_recs(
812         xfs_btree_cur_t         *cur,   /* btree cursor */
813         xfs_buf_t               *bp,    /* buffer containing btree block */
814         int                     rfirst, /* index of first record to log */
815         int                     rlast)  /* index of last record to log */
816 {
817         xfs_inobt_block_t       *block; /* btree block to log from */
818         int                     first;  /* first byte offset logged */
819         int                     last;   /* last byte offset logged */
820         xfs_inobt_rec_t         *rp;    /* record pointer for btree block */
821
822         block = XFS_BUF_TO_INOBT_BLOCK(bp);
823         rp = XFS_INOBT_REC_ADDR(block, 1, cur);
824         first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
825         last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
826         xfs_trans_log_buf(cur->bc_tp, bp, first, last);
827 }
828
829 /*
830  * Move 1 record left from cur/level if possible.
831  * Update cur to reflect the new path.
832  */
833 STATIC int                              /* error */
834 xfs_inobt_lshift(
835         xfs_btree_cur_t         *cur,   /* btree cursor */
836         int                     level,  /* level to shift record on */
837         int                     *stat)  /* success/failure */
838 {
839         int                     error;  /* error return value */
840 #ifdef DEBUG
841         int                     i;      /* loop index */
842 #endif
843         xfs_inobt_key_t         key;    /* key value for leaf level upward */
844         xfs_buf_t               *lbp;   /* buffer for left neighbor block */
845         xfs_inobt_block_t       *left;  /* left neighbor btree block */
846         xfs_inobt_key_t         *lkp=NULL;      /* key pointer for left block */
847         xfs_inobt_ptr_t         *lpp;   /* address pointer for left block */
848         xfs_inobt_rec_t         *lrp=NULL;      /* record pointer for left block */
849         int                     nrec;   /* new number of left block entries */
850         xfs_buf_t               *rbp;   /* buffer for right (current) block */
851         xfs_inobt_block_t       *right; /* right (current) btree block */
852         xfs_inobt_key_t         *rkp=NULL;      /* key pointer for right block */
853         xfs_inobt_ptr_t         *rpp=NULL;      /* address pointer for right block */
854         xfs_inobt_rec_t         *rrp=NULL;      /* record pointer for right block */
855
856         /*
857          * Set up variables for this block as "right".
858          */
859         rbp = cur->bc_bufs[level];
860         right = XFS_BUF_TO_INOBT_BLOCK(rbp);
861 #ifdef DEBUG
862         if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
863                 return error;
864 #endif
865         /*
866          * If we've got no left sibling then we can't shift an entry left.
867          */
868         if (be32_to_cpu(right->bb_leftsib) == NULLAGBLOCK) {
869                 *stat = 0;
870                 return 0;
871         }
872         /*
873          * If the cursor entry is the one that would be moved, don't
874          * do it... it's too complicated.
875          */
876         if (cur->bc_ptrs[level] <= 1) {
877                 *stat = 0;
878                 return 0;
879         }
880         /*
881          * Set up the left neighbor as "left".
882          */
883         if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
884                         cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib),
885                         0, &lbp, XFS_INO_BTREE_REF)))
886                 return error;
887         left = XFS_BUF_TO_INOBT_BLOCK(lbp);
888         if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
889                 return error;
890         /*
891          * If it's full, it can't take another entry.
892          */
893         if (be16_to_cpu(left->bb_numrecs) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
894                 *stat = 0;
895                 return 0;
896         }
897         nrec = be16_to_cpu(left->bb_numrecs) + 1;
898         /*
899          * If non-leaf, copy a key and a ptr to the left block.
900          */
901         if (level > 0) {
902                 lkp = XFS_INOBT_KEY_ADDR(left, nrec, cur);
903                 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
904                 *lkp = *rkp;
905                 xfs_inobt_log_keys(cur, lbp, nrec, nrec);
906                 lpp = XFS_INOBT_PTR_ADDR(left, nrec, cur);
907                 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
908 #ifdef DEBUG
909                 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level)))
910                         return error;
911 #endif
912                 *lpp = *rpp;
913                 xfs_inobt_log_ptrs(cur, lbp, nrec, nrec);
914         }
915         /*
916          * If leaf, copy a record to the left block.
917          */
918         else {
919                 lrp = XFS_INOBT_REC_ADDR(left, nrec, cur);
920                 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
921                 *lrp = *rrp;
922                 xfs_inobt_log_recs(cur, lbp, nrec, nrec);
923         }
924         /*
925          * Bump and log left's numrecs, decrement and log right's numrecs.
926          */
927         be16_add_cpu(&left->bb_numrecs, 1);
928         xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
929 #ifdef DEBUG
930         if (level > 0)
931                 xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
932         else
933                 xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
934 #endif
935         be16_add_cpu(&right->bb_numrecs, -1);
936         xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
937         /*
938          * Slide the contents of right down one entry.
939          */
940         if (level > 0) {
941 #ifdef DEBUG
942                 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
943                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i + 1]),
944                                         level)))
945                                 return error;
946                 }
947 #endif
948                 memmove(rkp, rkp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
949                 memmove(rpp, rpp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
950                 xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
951                 xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
952         } else {
953                 memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
954                 xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
955                 key.ir_startino = rrp->ir_startino;
956                 rkp = &key;
957         }
958         /*
959          * Update the parent key values of right.
960          */
961         if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)rkp, level + 1)))
962                 return error;
963         /*
964          * Slide the cursor value left one.
965          */
966         cur->bc_ptrs[level]--;
967         *stat = 1;
968         return 0;
969 }
970
971 /*
972  * Allocate a new root block, fill it in.
973  */
974 STATIC int                              /* error */
975 xfs_inobt_newroot(
976         xfs_btree_cur_t         *cur,   /* btree cursor */
977         int                     *stat)  /* success/failure */
978 {
979         xfs_agi_t               *agi;   /* a.g. inode header */
980         xfs_alloc_arg_t         args;   /* allocation argument structure */
981         xfs_inobt_block_t       *block; /* one half of the old root block */
982         xfs_buf_t               *bp;    /* buffer containing block */
983         int                     error;  /* error return value */
984         xfs_inobt_key_t         *kp;    /* btree key pointer */
985         xfs_agblock_t           lbno;   /* left block number */
986         xfs_buf_t               *lbp;   /* left buffer pointer */
987         xfs_inobt_block_t       *left;  /* left btree block */
988         xfs_buf_t               *nbp;   /* new (root) buffer */
989         xfs_inobt_block_t       *new;   /* new (root) btree block */
990         int                     nptr;   /* new value for key index, 1 or 2 */
991         xfs_inobt_ptr_t         *pp;    /* btree address pointer */
992         xfs_agblock_t           rbno;   /* right block number */
993         xfs_buf_t               *rbp;   /* right buffer pointer */
994         xfs_inobt_block_t       *right; /* right btree block */
995         xfs_inobt_rec_t         *rp;    /* btree record pointer */
996
997         ASSERT(cur->bc_nlevels < XFS_IN_MAXLEVELS(cur->bc_mp));
998
999         /*
1000          * Get a block & a buffer.
1001          */
1002         agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
1003         args.tp = cur->bc_tp;
1004         args.mp = cur->bc_mp;
1005         args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno,
1006                 be32_to_cpu(agi->agi_root));
1007         args.mod = args.minleft = args.alignment = args.total = args.wasdel =
1008                 args.isfl = args.userdata = args.minalignslop = 0;
1009         args.minlen = args.maxlen = args.prod = 1;
1010         args.type = XFS_ALLOCTYPE_NEAR_BNO;
1011         if ((error = xfs_alloc_vextent(&args)))
1012                 return error;
1013         /*
1014          * None available, we fail.
1015          */
1016         if (args.fsbno == NULLFSBLOCK) {
1017                 *stat = 0;
1018                 return 0;
1019         }
1020         ASSERT(args.len == 1);
1021         nbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
1022         new = XFS_BUF_TO_INOBT_BLOCK(nbp);
1023         /*
1024          * Set the root data in the a.g. inode structure.
1025          */
1026         agi->agi_root = cpu_to_be32(args.agbno);
1027         be32_add_cpu(&agi->agi_level, 1);
1028         xfs_ialloc_log_agi(args.tp, cur->bc_private.a.agbp,
1029                 XFS_AGI_ROOT | XFS_AGI_LEVEL);
1030         /*
1031          * At the previous root level there are now two blocks: the old
1032          * root, and the new block generated when it was split.
1033          * We don't know which one the cursor is pointing at, so we
1034          * set up variables "left" and "right" for each case.
1035          */
1036         bp = cur->bc_bufs[cur->bc_nlevels - 1];
1037         block = XFS_BUF_TO_INOBT_BLOCK(bp);
1038 #ifdef DEBUG
1039         if ((error = xfs_btree_check_sblock(cur, block, cur->bc_nlevels - 1, bp)))
1040                 return error;
1041 #endif
1042         if (be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
1043                 /*
1044                  * Our block is left, pick up the right block.
1045                  */
1046                 lbp = bp;
1047                 lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
1048                 left = block;
1049                 rbno = be32_to_cpu(left->bb_rightsib);
1050                 if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1051                                 rbno, 0, &rbp, XFS_INO_BTREE_REF)))
1052                         return error;
1053                 bp = rbp;
1054                 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1055                 if ((error = xfs_btree_check_sblock(cur, right,
1056                                 cur->bc_nlevels - 1, rbp)))
1057                         return error;
1058                 nptr = 1;
1059         } else {
1060                 /*
1061                  * Our block is right, pick up the left block.
1062                  */
1063                 rbp = bp;
1064                 rbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(rbp));
1065                 right = block;
1066                 lbno = be32_to_cpu(right->bb_leftsib);
1067                 if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1068                                 lbno, 0, &lbp, XFS_INO_BTREE_REF)))
1069                         return error;
1070                 bp = lbp;
1071                 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1072                 if ((error = xfs_btree_check_sblock(cur, left,
1073                                 cur->bc_nlevels - 1, lbp)))
1074                         return error;
1075                 nptr = 2;
1076         }
1077         /*
1078          * Fill in the new block's btree header and log it.
1079          */
1080         new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1081         new->bb_level = cpu_to_be16(cur->bc_nlevels);
1082         new->bb_numrecs = cpu_to_be16(2);
1083         new->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1084         new->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1085         xfs_inobt_log_block(args.tp, nbp, XFS_BB_ALL_BITS);
1086         ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
1087         /*
1088          * Fill in the key data in the new root.
1089          */
1090         kp = XFS_INOBT_KEY_ADDR(new, 1, cur);
1091         if (be16_to_cpu(left->bb_level) > 0) {
1092                 kp[0] = *XFS_INOBT_KEY_ADDR(left, 1, cur);
1093                 kp[1] = *XFS_INOBT_KEY_ADDR(right, 1, cur);
1094         } else {
1095                 rp = XFS_INOBT_REC_ADDR(left, 1, cur);
1096                 kp[0].ir_startino = rp->ir_startino;
1097                 rp = XFS_INOBT_REC_ADDR(right, 1, cur);
1098                 kp[1].ir_startino = rp->ir_startino;
1099         }
1100         xfs_inobt_log_keys(cur, nbp, 1, 2);
1101         /*
1102          * Fill in the pointer data in the new root.
1103          */
1104         pp = XFS_INOBT_PTR_ADDR(new, 1, cur);
1105         pp[0] = cpu_to_be32(lbno);
1106         pp[1] = cpu_to_be32(rbno);
1107         xfs_inobt_log_ptrs(cur, nbp, 1, 2);
1108         /*
1109          * Fix up the cursor.
1110          */
1111         xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
1112         cur->bc_ptrs[cur->bc_nlevels] = nptr;
1113         cur->bc_nlevels++;
1114         *stat = 1;
1115         return 0;
1116 }
1117
1118 /*
1119  * Split cur/level block in half.
1120  * Return new block number and its first record (to be inserted into parent).
1121  */
1122 STATIC int                              /* error */
1123 xfs_inobt_split(
1124         xfs_btree_cur_t         *cur,   /* btree cursor */
1125         int                     level,  /* level to split */
1126         xfs_agblock_t           *bnop,  /* output: block number allocated */
1127         xfs_inobt_key_t         *keyp,  /* output: first key of new block */
1128         xfs_btree_cur_t         **curp, /* output: new cursor */
1129         int                     *stat)  /* success/failure */
1130 {
1131         xfs_alloc_arg_t         args;   /* allocation argument structure */
1132         int                     error;  /* error return value */
1133         int                     i;      /* loop index/record number */
1134         xfs_agblock_t           lbno;   /* left (current) block number */
1135         xfs_buf_t               *lbp;   /* buffer for left block */
1136         xfs_inobt_block_t       *left;  /* left (current) btree block */
1137         xfs_inobt_key_t         *lkp;   /* left btree key pointer */
1138         xfs_inobt_ptr_t         *lpp;   /* left btree address pointer */
1139         xfs_inobt_rec_t         *lrp;   /* left btree record pointer */
1140         xfs_buf_t               *rbp;   /* buffer for right block */
1141         xfs_inobt_block_t       *right; /* right (new) btree block */
1142         xfs_inobt_key_t         *rkp;   /* right btree key pointer */
1143         xfs_inobt_ptr_t         *rpp;   /* right btree address pointer */
1144         xfs_inobt_rec_t         *rrp;   /* right btree record pointer */
1145
1146         /*
1147          * Set up left block (current one).
1148          */
1149         lbp = cur->bc_bufs[level];
1150         args.tp = cur->bc_tp;
1151         args.mp = cur->bc_mp;
1152         lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
1153         /*
1154          * Allocate the new block.
1155          * If we can't do it, we're toast.  Give up.
1156          */
1157         args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, lbno);
1158         args.mod = args.minleft = args.alignment = args.total = args.wasdel =
1159                 args.isfl = args.userdata = args.minalignslop = 0;
1160         args.minlen = args.maxlen = args.prod = 1;
1161         args.type = XFS_ALLOCTYPE_NEAR_BNO;
1162         if ((error = xfs_alloc_vextent(&args)))
1163                 return error;
1164         if (args.fsbno == NULLFSBLOCK) {
1165                 *stat = 0;
1166                 return 0;
1167         }
1168         ASSERT(args.len == 1);
1169         rbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
1170         /*
1171          * Set up the new block as "right".
1172          */
1173         right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1174         /*
1175          * "Left" is the current (according to the cursor) block.
1176          */
1177         left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1178 #ifdef DEBUG
1179         if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1180                 return error;
1181 #endif
1182         /*
1183          * Fill in the btree header for the new block.
1184          */
1185         right->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1186         right->bb_level = left->bb_level;
1187         right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
1188         /*
1189          * Make sure that if there's an odd number of entries now, that
1190          * each new block will have the same number of entries.
1191          */
1192         if ((be16_to_cpu(left->bb_numrecs) & 1) &&
1193             cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
1194                 be16_add_cpu(&right->bb_numrecs, 1);
1195         i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
1196         /*
1197          * For non-leaf blocks, copy keys and addresses over to the new block.
1198          */
1199         if (level > 0) {
1200                 lkp = XFS_INOBT_KEY_ADDR(left, i, cur);
1201                 lpp = XFS_INOBT_PTR_ADDR(left, i, cur);
1202                 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1203                 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1204 #ifdef DEBUG
1205                 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1206                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
1207                                 return error;
1208                 }
1209 #endif
1210                 memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1211                 memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1212                 xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1213                 xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1214                 *keyp = *rkp;
1215         }
1216         /*
1217          * For leaf blocks, copy records over to the new block.
1218          */
1219         else {
1220                 lrp = XFS_INOBT_REC_ADDR(left, i, cur);
1221                 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1222                 memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1223                 xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1224                 keyp->ir_startino = rrp->ir_startino;
1225         }
1226         /*
1227          * Find the left block number by looking in the buffer.
1228          * Adjust numrecs, sibling pointers.
1229          */
1230         be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
1231         right->bb_rightsib = left->bb_rightsib;
1232         left->bb_rightsib = cpu_to_be32(args.agbno);
1233         right->bb_leftsib = cpu_to_be32(lbno);
1234         xfs_inobt_log_block(args.tp, rbp, XFS_BB_ALL_BITS);
1235         xfs_inobt_log_block(args.tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
1236         /*
1237          * If there's a block to the new block's right, make that block
1238          * point back to right instead of to left.
1239          */
1240         if (be32_to_cpu(right->bb_rightsib) != NULLAGBLOCK) {
1241                 xfs_inobt_block_t       *rrblock;       /* rr btree block */
1242                 xfs_buf_t               *rrbp;          /* buffer for rrblock */
1243
1244                 if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1245                                 be32_to_cpu(right->bb_rightsib), 0, &rrbp,
1246                                 XFS_INO_BTREE_REF)))
1247                         return error;
1248                 rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
1249                 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
1250                         return error;
1251                 rrblock->bb_leftsib = cpu_to_be32(args.agbno);
1252                 xfs_inobt_log_block(args.tp, rrbp, XFS_BB_LEFTSIB);
1253         }
1254         /*
1255          * If the cursor is really in the right block, move it there.
1256          * If it's just pointing past the last entry in left, then we'll
1257          * insert there, so don't change anything in that case.
1258          */
1259         if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) {
1260                 xfs_btree_setbuf(cur, level, rbp);
1261                 cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs);
1262         }
1263         /*
1264          * If there are more levels, we'll need another cursor which refers
1265          * the right block, no matter where this cursor was.
1266          */
1267         if (level + 1 < cur->bc_nlevels) {
1268                 if ((error = xfs_btree_dup_cursor(cur, curp)))
1269                         return error;
1270                 (*curp)->bc_ptrs[level + 1]++;
1271         }
1272         *bnop = args.agbno;
1273         *stat = 1;
1274         return 0;
1275 }
1276
1277 /*
1278  * Externally visible routines.
1279  */
1280
1281 /*
1282  * Delete the record pointed to by cur.
1283  * The cursor refers to the place where the record was (could be inserted)
1284  * when the operation returns.
1285  */
1286 int                                     /* error */
1287 xfs_inobt_delete(
1288         xfs_btree_cur_t *cur,           /* btree cursor */
1289         int             *stat)          /* success/failure */
1290 {
1291         int             error;
1292         int             i;              /* result code */
1293         int             level;          /* btree level */
1294
1295         /*
1296          * Go up the tree, starting at leaf level.
1297          * If 2 is returned then a join was done; go to the next level.
1298          * Otherwise we are done.
1299          */
1300         for (level = 0, i = 2; i == 2; level++) {
1301                 if ((error = xfs_inobt_delrec(cur, level, &i)))
1302                         return error;
1303         }
1304         if (i == 0) {
1305                 for (level = 1; level < cur->bc_nlevels; level++) {
1306                         if (cur->bc_ptrs[level] == 0) {
1307                                 if ((error = xfs_btree_decrement(cur, level, &i)))
1308                                         return error;
1309                                 break;
1310                         }
1311                 }
1312         }
1313         *stat = i;
1314         return 0;
1315 }
1316
1317
1318 /*
1319  * Get the data from the pointed-to record.
1320  */
1321 int                                     /* error */
1322 xfs_inobt_get_rec(
1323         xfs_btree_cur_t         *cur,   /* btree cursor */
1324         xfs_agino_t             *ino,   /* output: starting inode of chunk */
1325         __int32_t               *fcnt,  /* output: number of free inodes */
1326         xfs_inofree_t           *free,  /* output: free inode mask */
1327         int                     *stat)  /* output: success/failure */
1328 {
1329         xfs_inobt_block_t       *block; /* btree block */
1330         xfs_buf_t               *bp;    /* buffer containing btree block */
1331 #ifdef DEBUG
1332         int                     error;  /* error return value */
1333 #endif
1334         int                     ptr;    /* record number */
1335         xfs_inobt_rec_t         *rec;   /* record data */
1336
1337         bp = cur->bc_bufs[0];
1338         ptr = cur->bc_ptrs[0];
1339         block = XFS_BUF_TO_INOBT_BLOCK(bp);
1340 #ifdef DEBUG
1341         if ((error = xfs_btree_check_sblock(cur, block, 0, bp)))
1342                 return error;
1343 #endif
1344         /*
1345          * Off the right end or left end, return failure.
1346          */
1347         if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) {
1348                 *stat = 0;
1349                 return 0;
1350         }
1351         /*
1352          * Point to the record and extract its data.
1353          */
1354         rec = XFS_INOBT_REC_ADDR(block, ptr, cur);
1355         *ino = be32_to_cpu(rec->ir_startino);
1356         *fcnt = be32_to_cpu(rec->ir_freecount);
1357         *free = be64_to_cpu(rec->ir_free);
1358         *stat = 1;
1359         return 0;
1360 }
1361
1362 /*
1363  * Insert the current record at the point referenced by cur.
1364  * The cursor may be inconsistent on return if splits have been done.
1365  */
1366 int                                     /* error */
1367 xfs_inobt_insert(
1368         xfs_btree_cur_t *cur,           /* btree cursor */
1369         int             *stat)          /* success/failure */
1370 {
1371         int             error;          /* error return value */
1372         int             i;              /* result value, 0 for failure */
1373         int             level;          /* current level number in btree */
1374         xfs_agblock_t   nbno;           /* new block number (split result) */
1375         xfs_btree_cur_t *ncur;          /* new cursor (split result) */
1376         xfs_inobt_rec_t nrec;           /* record being inserted this level */
1377         xfs_btree_cur_t *pcur;          /* previous level's cursor */
1378
1379         level = 0;
1380         nbno = NULLAGBLOCK;
1381         nrec.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);
1382         nrec.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount);
1383         nrec.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free);
1384         ncur = NULL;
1385         pcur = cur;
1386         /*
1387          * Loop going up the tree, starting at the leaf level.
1388          * Stop when we don't get a split block, that must mean that
1389          * the insert is finished with this level.
1390          */
1391         do {
1392                 /*
1393                  * Insert nrec/nbno into this level of the tree.
1394                  * Note if we fail, nbno will be null.
1395                  */
1396                 if ((error = xfs_inobt_insrec(pcur, level++, &nbno, &nrec, &ncur,
1397                                 &i))) {
1398                         if (pcur != cur)
1399                                 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
1400                         return error;
1401                 }
1402                 /*
1403                  * See if the cursor we just used is trash.
1404                  * Can't trash the caller's cursor, but otherwise we should
1405                  * if ncur is a new cursor or we're about to be done.
1406                  */
1407                 if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
1408                         cur->bc_nlevels = pcur->bc_nlevels;
1409                         xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
1410                 }
1411                 /*
1412                  * If we got a new cursor, switch to it.
1413                  */
1414                 if (ncur) {
1415                         pcur = ncur;
1416                         ncur = NULL;
1417                 }
1418         } while (nbno != NULLAGBLOCK);
1419         *stat = i;
1420         return 0;
1421 }
1422
1423 STATIC struct xfs_btree_cur *
1424 xfs_inobt_dup_cursor(
1425         struct xfs_btree_cur    *cur)
1426 {
1427         return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp,
1428                         cur->bc_private.a.agbp, cur->bc_private.a.agno);
1429 }
1430
1431 STATIC int
1432 xfs_inobt_get_maxrecs(
1433         struct xfs_btree_cur    *cur,
1434         int                     level)
1435 {
1436         return cur->bc_mp->m_inobt_mxr[level != 0];
1437 }
1438
1439 STATIC void
1440 xfs_inobt_init_key_from_rec(
1441         union xfs_btree_key     *key,
1442         union xfs_btree_rec     *rec)
1443 {
1444         key->inobt.ir_startino = rec->inobt.ir_startino;
1445 }
1446
1447 /*
1448  * intial value of ptr for lookup
1449  */
1450 STATIC void
1451 xfs_inobt_init_ptr_from_cur(
1452         struct xfs_btree_cur    *cur,
1453         union xfs_btree_ptr     *ptr)
1454 {
1455         struct xfs_agi          *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
1456
1457         ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno));
1458
1459         ptr->s = agi->agi_root;
1460 }
1461
1462 STATIC __int64_t
1463 xfs_inobt_key_diff(
1464         struct xfs_btree_cur    *cur,
1465         union xfs_btree_key     *key)
1466 {
1467         return (__int64_t)be32_to_cpu(key->inobt.ir_startino) -
1468                           cur->bc_rec.i.ir_startino;
1469 }
1470
1471 #ifdef XFS_BTREE_TRACE
1472 ktrace_t        *xfs_inobt_trace_buf;
1473
1474 STATIC void
1475 xfs_inobt_trace_enter(
1476         struct xfs_btree_cur    *cur,
1477         const char              *func,
1478         char                    *s,
1479         int                     type,
1480         int                     line,
1481         __psunsigned_t          a0,
1482         __psunsigned_t          a1,
1483         __psunsigned_t          a2,
1484         __psunsigned_t          a3,
1485         __psunsigned_t          a4,
1486         __psunsigned_t          a5,
1487         __psunsigned_t          a6,
1488         __psunsigned_t          a7,
1489         __psunsigned_t          a8,
1490         __psunsigned_t          a9,
1491         __psunsigned_t          a10)
1492 {
1493         ktrace_enter(xfs_inobt_trace_buf, (void *)(__psint_t)type,
1494                 (void *)func, (void *)s, NULL, (void *)cur,
1495                 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
1496                 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
1497                 (void *)a8, (void *)a9, (void *)a10);
1498 }
1499
1500 STATIC void
1501 xfs_inobt_trace_cursor(
1502         struct xfs_btree_cur    *cur,
1503         __uint32_t              *s0,
1504         __uint64_t              *l0,
1505         __uint64_t              *l1)
1506 {
1507         *s0 = cur->bc_private.a.agno;
1508         *l0 = cur->bc_rec.i.ir_startino;
1509         *l1 = cur->bc_rec.i.ir_free;
1510 }
1511
1512 STATIC void
1513 xfs_inobt_trace_key(
1514         struct xfs_btree_cur    *cur,
1515         union xfs_btree_key     *key,
1516         __uint64_t              *l0,
1517         __uint64_t              *l1)
1518 {
1519         *l0 = be32_to_cpu(key->inobt.ir_startino);
1520         *l1 = 0;
1521 }
1522
1523 STATIC void
1524 xfs_inobt_trace_record(
1525         struct xfs_btree_cur    *cur,
1526         union xfs_btree_rec     *rec,
1527         __uint64_t              *l0,
1528         __uint64_t              *l1,
1529         __uint64_t              *l2)
1530 {
1531         *l0 = be32_to_cpu(rec->inobt.ir_startino);
1532         *l1 = be32_to_cpu(rec->inobt.ir_freecount);
1533         *l2 = be64_to_cpu(rec->inobt.ir_free);
1534 }
1535 #endif /* XFS_BTREE_TRACE */
1536
1537 static const struct xfs_btree_ops xfs_inobt_ops = {
1538         .rec_len                = sizeof(xfs_inobt_rec_t),
1539         .key_len                = sizeof(xfs_inobt_key_t),
1540
1541         .dup_cursor             = xfs_inobt_dup_cursor,
1542         .get_maxrecs            = xfs_inobt_get_maxrecs,
1543         .init_key_from_rec      = xfs_inobt_init_key_from_rec,
1544         .init_ptr_from_cur      = xfs_inobt_init_ptr_from_cur,
1545         .key_diff               = xfs_inobt_key_diff,
1546
1547 #ifdef XFS_BTREE_TRACE
1548         .trace_enter            = xfs_inobt_trace_enter,
1549         .trace_cursor           = xfs_inobt_trace_cursor,
1550         .trace_key              = xfs_inobt_trace_key,
1551         .trace_record           = xfs_inobt_trace_record,
1552 #endif
1553 };
1554
1555 /*
1556  * Allocate a new inode btree cursor.
1557  */
1558 struct xfs_btree_cur *                          /* new inode btree cursor */
1559 xfs_inobt_init_cursor(
1560         struct xfs_mount        *mp,            /* file system mount point */
1561         struct xfs_trans        *tp,            /* transaction pointer */
1562         struct xfs_buf          *agbp,          /* buffer for agi structure */
1563         xfs_agnumber_t          agno)           /* allocation group number */
1564 {
1565         struct xfs_agi          *agi = XFS_BUF_TO_AGI(agbp);
1566         struct xfs_btree_cur    *cur;
1567
1568         cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
1569
1570         cur->bc_tp = tp;
1571         cur->bc_mp = mp;
1572         cur->bc_nlevels = be32_to_cpu(agi->agi_level);
1573         cur->bc_btnum = XFS_BTNUM_INO;
1574         cur->bc_blocklog = mp->m_sb.sb_blocklog;
1575
1576         cur->bc_ops = &xfs_inobt_ops;
1577
1578         cur->bc_private.a.agbp = agbp;
1579         cur->bc_private.a.agno = agno;
1580
1581         return cur;
1582 }