nilfs2: add read ahead mode to nilfs_btnode_submit_block
[cascardo/linux.git] / fs / nilfs2 / btree.c
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32 #include "dat.h"
33
34 static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
35 {
36         struct nilfs_btree_path *path;
37         int level = NILFS_BTREE_LEVEL_DATA;
38
39         path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
40         if (path == NULL)
41                 goto out;
42
43         for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
44                 path[level].bp_bh = NULL;
45                 path[level].bp_sib_bh = NULL;
46                 path[level].bp_index = 0;
47                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
48                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
49                 path[level].bp_op = NULL;
50         }
51
52 out:
53         return path;
54 }
55
56 static void nilfs_btree_free_path(struct nilfs_btree_path *path)
57 {
58         int level = NILFS_BTREE_LEVEL_DATA;
59
60         for (; level < NILFS_BTREE_LEVEL_MAX; level++)
61                 brelse(path[level].bp_bh);
62
63         kmem_cache_free(nilfs_btree_path_cache, path);
64 }
65
66 /*
67  * B-tree node operations
68  */
69 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
70                                  struct buffer_head **bhp)
71 {
72         struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
73         struct buffer_head *bh;
74         sector_t pbn = 0;
75         int err;
76
77         err = nilfs_btnode_submit_block(btnc, ptr, pbn, READ, bhp, &pbn);
78         if (err)
79                 return err == -EEXIST ? 0 : err;
80
81         bh = *bhp;
82         wait_on_buffer(bh);
83         if (!buffer_uptodate(bh)) {
84                 brelse(bh);
85                 return -EIO;
86         }
87         if (nilfs_btree_broken_node_block(bh)) {
88                 clear_buffer_uptodate(bh);
89                 brelse(bh);
90                 return -EINVAL;
91         }
92         return 0;
93 }
94
95 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
96                                      __u64 ptr, struct buffer_head **bhp)
97 {
98         struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
99         struct buffer_head *bh;
100
101         bh = nilfs_btnode_create_block(btnc, ptr);
102         if (!bh)
103                 return -ENOMEM;
104
105         set_buffer_nilfs_volatile(bh);
106         *bhp = bh;
107         return 0;
108 }
109
110 static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
111 {
112         return node->bn_flags;
113 }
114
115 static void
116 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
117 {
118         node->bn_flags = flags;
119 }
120
121 static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
122 {
123         return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
124 }
125
126 static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
127 {
128         return node->bn_level;
129 }
130
131 static void
132 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
133 {
134         node->bn_level = level;
135 }
136
137 static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
138 {
139         return le16_to_cpu(node->bn_nchildren);
140 }
141
142 static void
143 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
144 {
145         node->bn_nchildren = cpu_to_le16(nchildren);
146 }
147
148 static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
149 {
150         return 1 << btree->b_inode->i_blkbits;
151 }
152
153 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
154 {
155         return btree->b_nchildren_per_block;
156 }
157
158 static __le64 *
159 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
160 {
161         return (__le64 *)((char *)(node + 1) +
162                           (nilfs_btree_node_root(node) ?
163                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
164 }
165
166 static __le64 *
167 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
168 {
169         return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
170 }
171
172 static __u64
173 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
174 {
175         return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
176 }
177
178 static void
179 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
180 {
181         *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
182 }
183
184 static __u64
185 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
186                          int ncmax)
187 {
188         return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
189 }
190
191 static void
192 nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
193                          int ncmax)
194 {
195         *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
196 }
197
198 static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
199                                   int level, int nchildren, int ncmax,
200                                   const __u64 *keys, const __u64 *ptrs)
201 {
202         __le64 *dkeys;
203         __le64 *dptrs;
204         int i;
205
206         nilfs_btree_node_set_flags(node, flags);
207         nilfs_btree_node_set_level(node, level);
208         nilfs_btree_node_set_nchildren(node, nchildren);
209
210         dkeys = nilfs_btree_node_dkeys(node);
211         dptrs = nilfs_btree_node_dptrs(node, ncmax);
212         for (i = 0; i < nchildren; i++) {
213                 dkeys[i] = cpu_to_le64(keys[i]);
214                 dptrs[i] = cpu_to_le64(ptrs[i]);
215         }
216 }
217
218 /* Assume the buffer heads corresponding to left and right are locked. */
219 static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
220                                        struct nilfs_btree_node *right,
221                                        int n, int lncmax, int rncmax)
222 {
223         __le64 *ldkeys, *rdkeys;
224         __le64 *ldptrs, *rdptrs;
225         int lnchildren, rnchildren;
226
227         ldkeys = nilfs_btree_node_dkeys(left);
228         ldptrs = nilfs_btree_node_dptrs(left, lncmax);
229         lnchildren = nilfs_btree_node_get_nchildren(left);
230
231         rdkeys = nilfs_btree_node_dkeys(right);
232         rdptrs = nilfs_btree_node_dptrs(right, rncmax);
233         rnchildren = nilfs_btree_node_get_nchildren(right);
234
235         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
236         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
237         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
238         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
239
240         lnchildren += n;
241         rnchildren -= n;
242         nilfs_btree_node_set_nchildren(left, lnchildren);
243         nilfs_btree_node_set_nchildren(right, rnchildren);
244 }
245
246 /* Assume that the buffer heads corresponding to left and right are locked. */
247 static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
248                                         struct nilfs_btree_node *right,
249                                         int n, int lncmax, int rncmax)
250 {
251         __le64 *ldkeys, *rdkeys;
252         __le64 *ldptrs, *rdptrs;
253         int lnchildren, rnchildren;
254
255         ldkeys = nilfs_btree_node_dkeys(left);
256         ldptrs = nilfs_btree_node_dptrs(left, lncmax);
257         lnchildren = nilfs_btree_node_get_nchildren(left);
258
259         rdkeys = nilfs_btree_node_dkeys(right);
260         rdptrs = nilfs_btree_node_dptrs(right, rncmax);
261         rnchildren = nilfs_btree_node_get_nchildren(right);
262
263         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
264         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
265         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
266         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
267
268         lnchildren -= n;
269         rnchildren += n;
270         nilfs_btree_node_set_nchildren(left, lnchildren);
271         nilfs_btree_node_set_nchildren(right, rnchildren);
272 }
273
274 /* Assume that the buffer head corresponding to node is locked. */
275 static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
276                                     __u64 key, __u64 ptr, int ncmax)
277 {
278         __le64 *dkeys;
279         __le64 *dptrs;
280         int nchildren;
281
282         dkeys = nilfs_btree_node_dkeys(node);
283         dptrs = nilfs_btree_node_dptrs(node, ncmax);
284         nchildren = nilfs_btree_node_get_nchildren(node);
285         if (index < nchildren) {
286                 memmove(dkeys + index + 1, dkeys + index,
287                         (nchildren - index) * sizeof(*dkeys));
288                 memmove(dptrs + index + 1, dptrs + index,
289                         (nchildren - index) * sizeof(*dptrs));
290         }
291         dkeys[index] = cpu_to_le64(key);
292         dptrs[index] = cpu_to_le64(ptr);
293         nchildren++;
294         nilfs_btree_node_set_nchildren(node, nchildren);
295 }
296
297 /* Assume that the buffer head corresponding to node is locked. */
298 static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
299                                     __u64 *keyp, __u64 *ptrp, int ncmax)
300 {
301         __u64 key;
302         __u64 ptr;
303         __le64 *dkeys;
304         __le64 *dptrs;
305         int nchildren;
306
307         dkeys = nilfs_btree_node_dkeys(node);
308         dptrs = nilfs_btree_node_dptrs(node, ncmax);
309         key = le64_to_cpu(dkeys[index]);
310         ptr = le64_to_cpu(dptrs[index]);
311         nchildren = nilfs_btree_node_get_nchildren(node);
312         if (keyp != NULL)
313                 *keyp = key;
314         if (ptrp != NULL)
315                 *ptrp = ptr;
316
317         if (index < nchildren - 1) {
318                 memmove(dkeys + index, dkeys + index + 1,
319                         (nchildren - index - 1) * sizeof(*dkeys));
320                 memmove(dptrs + index, dptrs + index + 1,
321                         (nchildren - index - 1) * sizeof(*dptrs));
322         }
323         nchildren--;
324         nilfs_btree_node_set_nchildren(node, nchildren);
325 }
326
327 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
328                                    __u64 key, int *indexp)
329 {
330         __u64 nkey;
331         int index, low, high, s;
332
333         /* binary search */
334         low = 0;
335         high = nilfs_btree_node_get_nchildren(node) - 1;
336         index = 0;
337         s = 0;
338         while (low <= high) {
339                 index = (low + high) / 2;
340                 nkey = nilfs_btree_node_get_key(node, index);
341                 if (nkey == key) {
342                         s = 0;
343                         goto out;
344                 } else if (nkey < key) {
345                         low = index + 1;
346                         s = -1;
347                 } else {
348                         high = index - 1;
349                         s = 1;
350                 }
351         }
352
353         /* adjust index */
354         if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
355                 if (s > 0 && index > 0)
356                         index--;
357         } else if (s < 0)
358                 index++;
359
360  out:
361         *indexp = index;
362
363         return s == 0;
364 }
365
366 /**
367  * nilfs_btree_node_broken - verify consistency of btree node
368  * @node: btree node block to be examined
369  * @size: node size (in bytes)
370  * @blocknr: block number
371  *
372  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
373  */
374 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
375                                    size_t size, sector_t blocknr)
376 {
377         int level, flags, nchildren;
378         int ret = 0;
379
380         level = nilfs_btree_node_get_level(node);
381         flags = nilfs_btree_node_get_flags(node);
382         nchildren = nilfs_btree_node_get_nchildren(node);
383
384         if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
385                      level >= NILFS_BTREE_LEVEL_MAX ||
386                      (flags & NILFS_BTREE_NODE_ROOT) ||
387                      nchildren < 0 ||
388                      nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
389                 printk(KERN_CRIT "NILFS: bad btree node (blocknr=%llu): "
390                        "level = %d, flags = 0x%x, nchildren = %d\n",
391                        (unsigned long long)blocknr, level, flags, nchildren);
392                 ret = 1;
393         }
394         return ret;
395 }
396
397 int nilfs_btree_broken_node_block(struct buffer_head *bh)
398 {
399         return nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
400                                        bh->b_size, bh->b_blocknr);
401 }
402
403 static struct nilfs_btree_node *
404 nilfs_btree_get_root(const struct nilfs_bmap *btree)
405 {
406         return (struct nilfs_btree_node *)btree->b_u.u_data;
407 }
408
409 static struct nilfs_btree_node *
410 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
411 {
412         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
413 }
414
415 static struct nilfs_btree_node *
416 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
417 {
418         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
419 }
420
421 static int nilfs_btree_height(const struct nilfs_bmap *btree)
422 {
423         return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
424 }
425
426 static struct nilfs_btree_node *
427 nilfs_btree_get_node(const struct nilfs_bmap *btree,
428                      const struct nilfs_btree_path *path,
429                      int level, int *ncmaxp)
430 {
431         struct nilfs_btree_node *node;
432
433         if (level == nilfs_btree_height(btree) - 1) {
434                 node = nilfs_btree_get_root(btree);
435                 *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
436         } else {
437                 node = nilfs_btree_get_nonroot_node(path, level);
438                 *ncmaxp = nilfs_btree_nchildren_per_block(btree);
439         }
440         return node;
441 }
442
443 static int
444 nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
445 {
446         if (unlikely(nilfs_btree_node_get_level(node) != level)) {
447                 dump_stack();
448                 printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n",
449                        nilfs_btree_node_get_level(node), level);
450                 return 1;
451         }
452         return 0;
453 }
454
455 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
456                                  struct nilfs_btree_path *path,
457                                  __u64 key, __u64 *ptrp, int minlevel)
458 {
459         struct nilfs_btree_node *node;
460         __u64 ptr;
461         int level, index, found, ncmax, ret;
462
463         node = nilfs_btree_get_root(btree);
464         level = nilfs_btree_node_get_level(node);
465         if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
466                 return -ENOENT;
467
468         found = nilfs_btree_node_lookup(node, key, &index);
469         ptr = nilfs_btree_node_get_ptr(node, index,
470                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
471         path[level].bp_bh = NULL;
472         path[level].bp_index = index;
473
474         ncmax = nilfs_btree_nchildren_per_block(btree);
475
476         for (level--; level >= minlevel; level--) {
477                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
478                 if (ret < 0)
479                         return ret;
480                 node = nilfs_btree_get_nonroot_node(path, level);
481                 if (nilfs_btree_bad_node(node, level))
482                         return -EINVAL;
483                 if (!found)
484                         found = nilfs_btree_node_lookup(node, key, &index);
485                 else
486                         index = 0;
487                 if (index < ncmax) {
488                         ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
489                 } else {
490                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
491                         /* insert */
492                         ptr = NILFS_BMAP_INVALID_PTR;
493                 }
494                 path[level].bp_index = index;
495         }
496         if (!found)
497                 return -ENOENT;
498
499         if (ptrp != NULL)
500                 *ptrp = ptr;
501
502         return 0;
503 }
504
505 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
506                                       struct nilfs_btree_path *path,
507                                       __u64 *keyp, __u64 *ptrp)
508 {
509         struct nilfs_btree_node *node;
510         __u64 ptr;
511         int index, level, ncmax, ret;
512
513         node = nilfs_btree_get_root(btree);
514         index = nilfs_btree_node_get_nchildren(node) - 1;
515         if (index < 0)
516                 return -ENOENT;
517         level = nilfs_btree_node_get_level(node);
518         ptr = nilfs_btree_node_get_ptr(node, index,
519                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
520         path[level].bp_bh = NULL;
521         path[level].bp_index = index;
522         ncmax = nilfs_btree_nchildren_per_block(btree);
523
524         for (level--; level > 0; level--) {
525                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
526                 if (ret < 0)
527                         return ret;
528                 node = nilfs_btree_get_nonroot_node(path, level);
529                 if (nilfs_btree_bad_node(node, level))
530                         return -EINVAL;
531                 index = nilfs_btree_node_get_nchildren(node) - 1;
532                 ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
533                 path[level].bp_index = index;
534         }
535
536         if (keyp != NULL)
537                 *keyp = nilfs_btree_node_get_key(node, index);
538         if (ptrp != NULL)
539                 *ptrp = ptr;
540
541         return 0;
542 }
543
544 static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
545                               __u64 key, int level, __u64 *ptrp)
546 {
547         struct nilfs_btree_path *path;
548         int ret;
549
550         path = nilfs_btree_alloc_path();
551         if (path == NULL)
552                 return -ENOMEM;
553
554         ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level);
555
556         nilfs_btree_free_path(path);
557
558         return ret;
559 }
560
561 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
562                                      __u64 key, __u64 *ptrp, unsigned maxblocks)
563 {
564         struct nilfs_btree_path *path;
565         struct nilfs_btree_node *node;
566         struct inode *dat = NULL;
567         __u64 ptr, ptr2;
568         sector_t blocknr;
569         int level = NILFS_BTREE_LEVEL_NODE_MIN;
570         int ret, cnt, index, maxlevel, ncmax;
571
572         path = nilfs_btree_alloc_path();
573         if (path == NULL)
574                 return -ENOMEM;
575
576         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
577         if (ret < 0)
578                 goto out;
579
580         if (NILFS_BMAP_USE_VBN(btree)) {
581                 dat = nilfs_bmap_get_dat(btree);
582                 ret = nilfs_dat_translate(dat, ptr, &blocknr);
583                 if (ret < 0)
584                         goto out;
585                 ptr = blocknr;
586         }
587         cnt = 1;
588         if (cnt == maxblocks)
589                 goto end;
590
591         maxlevel = nilfs_btree_height(btree) - 1;
592         node = nilfs_btree_get_node(btree, path, level, &ncmax);
593         index = path[level].bp_index + 1;
594         for (;;) {
595                 while (index < nilfs_btree_node_get_nchildren(node)) {
596                         if (nilfs_btree_node_get_key(node, index) !=
597                             key + cnt)
598                                 goto end;
599                         ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
600                         if (dat) {
601                                 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
602                                 if (ret < 0)
603                                         goto out;
604                                 ptr2 = blocknr;
605                         }
606                         if (ptr2 != ptr + cnt || ++cnt == maxblocks)
607                                 goto end;
608                         index++;
609                         continue;
610                 }
611                 if (level == maxlevel)
612                         break;
613
614                 /* look-up right sibling node */
615                 node = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
616                 index = path[level + 1].bp_index + 1;
617                 if (index >= nilfs_btree_node_get_nchildren(node) ||
618                     nilfs_btree_node_get_key(node, index) != key + cnt)
619                         break;
620                 ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
621                 path[level + 1].bp_index = index;
622
623                 brelse(path[level].bp_bh);
624                 path[level].bp_bh = NULL;
625                 ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh);
626                 if (ret < 0)
627                         goto out;
628                 node = nilfs_btree_get_nonroot_node(path, level);
629                 ncmax = nilfs_btree_nchildren_per_block(btree);
630                 index = 0;
631                 path[level].bp_index = index;
632         }
633  end:
634         *ptrp = ptr;
635         ret = cnt;
636  out:
637         nilfs_btree_free_path(path);
638         return ret;
639 }
640
641 static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
642                                     struct nilfs_btree_path *path,
643                                     int level, __u64 key)
644 {
645         if (level < nilfs_btree_height(btree) - 1) {
646                 do {
647                         nilfs_btree_node_set_key(
648                                 nilfs_btree_get_nonroot_node(path, level),
649                                 path[level].bp_index, key);
650                         if (!buffer_dirty(path[level].bp_bh))
651                                 nilfs_btnode_mark_dirty(path[level].bp_bh);
652                 } while ((path[level].bp_index == 0) &&
653                          (++level < nilfs_btree_height(btree) - 1));
654         }
655
656         /* root */
657         if (level == nilfs_btree_height(btree) - 1) {
658                 nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
659                                          path[level].bp_index, key);
660         }
661 }
662
663 static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
664                                   struct nilfs_btree_path *path,
665                                   int level, __u64 *keyp, __u64 *ptrp)
666 {
667         struct nilfs_btree_node *node;
668         int ncblk;
669
670         if (level < nilfs_btree_height(btree) - 1) {
671                 node = nilfs_btree_get_nonroot_node(path, level);
672                 ncblk = nilfs_btree_nchildren_per_block(btree);
673                 nilfs_btree_node_insert(node, path[level].bp_index,
674                                         *keyp, *ptrp, ncblk);
675                 if (!buffer_dirty(path[level].bp_bh))
676                         nilfs_btnode_mark_dirty(path[level].bp_bh);
677
678                 if (path[level].bp_index == 0)
679                         nilfs_btree_promote_key(btree, path, level + 1,
680                                                 nilfs_btree_node_get_key(node,
681                                                                          0));
682         } else {
683                 node = nilfs_btree_get_root(btree);
684                 nilfs_btree_node_insert(node, path[level].bp_index,
685                                         *keyp, *ptrp,
686                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
687         }
688 }
689
690 static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
691                                    struct nilfs_btree_path *path,
692                                    int level, __u64 *keyp, __u64 *ptrp)
693 {
694         struct nilfs_btree_node *node, *left;
695         int nchildren, lnchildren, n, move, ncblk;
696
697         node = nilfs_btree_get_nonroot_node(path, level);
698         left = nilfs_btree_get_sib_node(path, level);
699         nchildren = nilfs_btree_node_get_nchildren(node);
700         lnchildren = nilfs_btree_node_get_nchildren(left);
701         ncblk = nilfs_btree_nchildren_per_block(btree);
702         move = 0;
703
704         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
705         if (n > path[level].bp_index) {
706                 /* move insert point */
707                 n--;
708                 move = 1;
709         }
710
711         nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
712
713         if (!buffer_dirty(path[level].bp_bh))
714                 nilfs_btnode_mark_dirty(path[level].bp_bh);
715         if (!buffer_dirty(path[level].bp_sib_bh))
716                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
717
718         nilfs_btree_promote_key(btree, path, level + 1,
719                                 nilfs_btree_node_get_key(node, 0));
720
721         if (move) {
722                 brelse(path[level].bp_bh);
723                 path[level].bp_bh = path[level].bp_sib_bh;
724                 path[level].bp_sib_bh = NULL;
725                 path[level].bp_index += lnchildren;
726                 path[level + 1].bp_index--;
727         } else {
728                 brelse(path[level].bp_sib_bh);
729                 path[level].bp_sib_bh = NULL;
730                 path[level].bp_index -= n;
731         }
732
733         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
734 }
735
736 static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
737                                     struct nilfs_btree_path *path,
738                                     int level, __u64 *keyp, __u64 *ptrp)
739 {
740         struct nilfs_btree_node *node, *right;
741         int nchildren, rnchildren, n, move, ncblk;
742
743         node = nilfs_btree_get_nonroot_node(path, level);
744         right = nilfs_btree_get_sib_node(path, level);
745         nchildren = nilfs_btree_node_get_nchildren(node);
746         rnchildren = nilfs_btree_node_get_nchildren(right);
747         ncblk = nilfs_btree_nchildren_per_block(btree);
748         move = 0;
749
750         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
751         if (n > nchildren - path[level].bp_index) {
752                 /* move insert point */
753                 n--;
754                 move = 1;
755         }
756
757         nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
758
759         if (!buffer_dirty(path[level].bp_bh))
760                 nilfs_btnode_mark_dirty(path[level].bp_bh);
761         if (!buffer_dirty(path[level].bp_sib_bh))
762                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
763
764         path[level + 1].bp_index++;
765         nilfs_btree_promote_key(btree, path, level + 1,
766                                 nilfs_btree_node_get_key(right, 0));
767         path[level + 1].bp_index--;
768
769         if (move) {
770                 brelse(path[level].bp_bh);
771                 path[level].bp_bh = path[level].bp_sib_bh;
772                 path[level].bp_sib_bh = NULL;
773                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
774                 path[level + 1].bp_index++;
775         } else {
776                 brelse(path[level].bp_sib_bh);
777                 path[level].bp_sib_bh = NULL;
778         }
779
780         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
781 }
782
783 static void nilfs_btree_split(struct nilfs_bmap *btree,
784                               struct nilfs_btree_path *path,
785                               int level, __u64 *keyp, __u64 *ptrp)
786 {
787         struct nilfs_btree_node *node, *right;
788         __u64 newkey;
789         __u64 newptr;
790         int nchildren, n, move, ncblk;
791
792         node = nilfs_btree_get_nonroot_node(path, level);
793         right = nilfs_btree_get_sib_node(path, level);
794         nchildren = nilfs_btree_node_get_nchildren(node);
795         ncblk = nilfs_btree_nchildren_per_block(btree);
796         move = 0;
797
798         n = (nchildren + 1) / 2;
799         if (n > nchildren - path[level].bp_index) {
800                 n--;
801                 move = 1;
802         }
803
804         nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
805
806         if (!buffer_dirty(path[level].bp_bh))
807                 nilfs_btnode_mark_dirty(path[level].bp_bh);
808         if (!buffer_dirty(path[level].bp_sib_bh))
809                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
810
811         newkey = nilfs_btree_node_get_key(right, 0);
812         newptr = path[level].bp_newreq.bpr_ptr;
813
814         if (move) {
815                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
816                 nilfs_btree_node_insert(right, path[level].bp_index,
817                                         *keyp, *ptrp, ncblk);
818
819                 *keyp = nilfs_btree_node_get_key(right, 0);
820                 *ptrp = path[level].bp_newreq.bpr_ptr;
821
822                 brelse(path[level].bp_bh);
823                 path[level].bp_bh = path[level].bp_sib_bh;
824                 path[level].bp_sib_bh = NULL;
825         } else {
826                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
827
828                 *keyp = nilfs_btree_node_get_key(right, 0);
829                 *ptrp = path[level].bp_newreq.bpr_ptr;
830
831                 brelse(path[level].bp_sib_bh);
832                 path[level].bp_sib_bh = NULL;
833         }
834
835         path[level + 1].bp_index++;
836 }
837
838 static void nilfs_btree_grow(struct nilfs_bmap *btree,
839                              struct nilfs_btree_path *path,
840                              int level, __u64 *keyp, __u64 *ptrp)
841 {
842         struct nilfs_btree_node *root, *child;
843         int n, ncblk;
844
845         root = nilfs_btree_get_root(btree);
846         child = nilfs_btree_get_sib_node(path, level);
847         ncblk = nilfs_btree_nchildren_per_block(btree);
848
849         n = nilfs_btree_node_get_nchildren(root);
850
851         nilfs_btree_node_move_right(root, child, n,
852                                     NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
853         nilfs_btree_node_set_level(root, level + 1);
854
855         if (!buffer_dirty(path[level].bp_sib_bh))
856                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
857
858         path[level].bp_bh = path[level].bp_sib_bh;
859         path[level].bp_sib_bh = NULL;
860
861         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
862
863         *keyp = nilfs_btree_node_get_key(child, 0);
864         *ptrp = path[level].bp_newreq.bpr_ptr;
865 }
866
867 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
868                                    const struct nilfs_btree_path *path)
869 {
870         struct nilfs_btree_node *node;
871         int level, ncmax;
872
873         if (path == NULL)
874                 return NILFS_BMAP_INVALID_PTR;
875
876         /* left sibling */
877         level = NILFS_BTREE_LEVEL_NODE_MIN;
878         if (path[level].bp_index > 0) {
879                 node = nilfs_btree_get_node(btree, path, level, &ncmax);
880                 return nilfs_btree_node_get_ptr(node,
881                                                 path[level].bp_index - 1,
882                                                 ncmax);
883         }
884
885         /* parent */
886         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
887         if (level <= nilfs_btree_height(btree) - 1) {
888                 node = nilfs_btree_get_node(btree, path, level, &ncmax);
889                 return nilfs_btree_node_get_ptr(node, path[level].bp_index,
890                                                 ncmax);
891         }
892
893         return NILFS_BMAP_INVALID_PTR;
894 }
895
896 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
897                                        const struct nilfs_btree_path *path,
898                                        __u64 key)
899 {
900         __u64 ptr;
901
902         ptr = nilfs_bmap_find_target_seq(btree, key);
903         if (ptr != NILFS_BMAP_INVALID_PTR)
904                 /* sequential access */
905                 return ptr;
906         else {
907                 ptr = nilfs_btree_find_near(btree, path);
908                 if (ptr != NILFS_BMAP_INVALID_PTR)
909                         /* near */
910                         return ptr;
911         }
912         /* block group */
913         return nilfs_bmap_find_target_in_group(btree);
914 }
915
916 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
917                                       struct nilfs_btree_path *path,
918                                       int *levelp, __u64 key, __u64 ptr,
919                                       struct nilfs_bmap_stats *stats)
920 {
921         struct buffer_head *bh;
922         struct nilfs_btree_node *node, *parent, *sib;
923         __u64 sibptr;
924         int pindex, level, ncmax, ncblk, ret;
925         struct inode *dat = NULL;
926
927         stats->bs_nblocks = 0;
928         level = NILFS_BTREE_LEVEL_DATA;
929
930         /* allocate a new ptr for data block */
931         if (NILFS_BMAP_USE_VBN(btree)) {
932                 path[level].bp_newreq.bpr_ptr =
933                         nilfs_btree_find_target_v(btree, path, key);
934                 dat = nilfs_bmap_get_dat(btree);
935         }
936
937         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
938         if (ret < 0)
939                 goto err_out_data;
940
941         ncblk = nilfs_btree_nchildren_per_block(btree);
942
943         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
944              level < nilfs_btree_height(btree) - 1;
945              level++) {
946                 node = nilfs_btree_get_nonroot_node(path, level);
947                 if (nilfs_btree_node_get_nchildren(node) < ncblk) {
948                         path[level].bp_op = nilfs_btree_do_insert;
949                         stats->bs_nblocks++;
950                         goto out;
951                 }
952
953                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
954                 pindex = path[level + 1].bp_index;
955
956                 /* left sibling */
957                 if (pindex > 0) {
958                         sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
959                                                           ncmax);
960                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
961                         if (ret < 0)
962                                 goto err_out_child_node;
963                         sib = (struct nilfs_btree_node *)bh->b_data;
964                         if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
965                                 path[level].bp_sib_bh = bh;
966                                 path[level].bp_op = nilfs_btree_carry_left;
967                                 stats->bs_nblocks++;
968                                 goto out;
969                         } else {
970                                 brelse(bh);
971                         }
972                 }
973
974                 /* right sibling */
975                 if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
976                         sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
977                                                           ncmax);
978                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
979                         if (ret < 0)
980                                 goto err_out_child_node;
981                         sib = (struct nilfs_btree_node *)bh->b_data;
982                         if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
983                                 path[level].bp_sib_bh = bh;
984                                 path[level].bp_op = nilfs_btree_carry_right;
985                                 stats->bs_nblocks++;
986                                 goto out;
987                         } else {
988                                 brelse(bh);
989                         }
990                 }
991
992                 /* split */
993                 path[level].bp_newreq.bpr_ptr =
994                         path[level - 1].bp_newreq.bpr_ptr + 1;
995                 ret = nilfs_bmap_prepare_alloc_ptr(btree,
996                                                    &path[level].bp_newreq, dat);
997                 if (ret < 0)
998                         goto err_out_child_node;
999                 ret = nilfs_btree_get_new_block(btree,
1000                                                 path[level].bp_newreq.bpr_ptr,
1001                                                 &bh);
1002                 if (ret < 0)
1003                         goto err_out_curr_node;
1004
1005                 stats->bs_nblocks++;
1006
1007                 sib = (struct nilfs_btree_node *)bh->b_data;
1008                 nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1009                 path[level].bp_sib_bh = bh;
1010                 path[level].bp_op = nilfs_btree_split;
1011         }
1012
1013         /* root */
1014         node = nilfs_btree_get_root(btree);
1015         if (nilfs_btree_node_get_nchildren(node) <
1016             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1017                 path[level].bp_op = nilfs_btree_do_insert;
1018                 stats->bs_nblocks++;
1019                 goto out;
1020         }
1021
1022         /* grow */
1023         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1024         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1025         if (ret < 0)
1026                 goto err_out_child_node;
1027         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1028                                         &bh);
1029         if (ret < 0)
1030                 goto err_out_curr_node;
1031
1032         nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1033                               0, level, 0, ncblk, NULL, NULL);
1034         path[level].bp_sib_bh = bh;
1035         path[level].bp_op = nilfs_btree_grow;
1036
1037         level++;
1038         path[level].bp_op = nilfs_btree_do_insert;
1039
1040         /* a newly-created node block and a data block are added */
1041         stats->bs_nblocks += 2;
1042
1043         /* success */
1044  out:
1045         *levelp = level;
1046         return ret;
1047
1048         /* error */
1049  err_out_curr_node:
1050         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1051  err_out_child_node:
1052         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1053                 nilfs_btnode_delete(path[level].bp_sib_bh);
1054                 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1055
1056         }
1057
1058         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1059  err_out_data:
1060         *levelp = level;
1061         stats->bs_nblocks = 0;
1062         return ret;
1063 }
1064
1065 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1066                                       struct nilfs_btree_path *path,
1067                                       int maxlevel, __u64 key, __u64 ptr)
1068 {
1069         struct inode *dat = NULL;
1070         int level;
1071
1072         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1073         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1074         if (NILFS_BMAP_USE_VBN(btree)) {
1075                 nilfs_bmap_set_target_v(btree, key, ptr);
1076                 dat = nilfs_bmap_get_dat(btree);
1077         }
1078
1079         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1080                 nilfs_bmap_commit_alloc_ptr(btree,
1081                                             &path[level - 1].bp_newreq, dat);
1082                 path[level].bp_op(btree, path, level, &key, &ptr);
1083         }
1084
1085         if (!nilfs_bmap_dirty(btree))
1086                 nilfs_bmap_set_dirty(btree);
1087 }
1088
1089 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1090 {
1091         struct nilfs_btree_path *path;
1092         struct nilfs_bmap_stats stats;
1093         int level, ret;
1094
1095         path = nilfs_btree_alloc_path();
1096         if (path == NULL)
1097                 return -ENOMEM;
1098
1099         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1100                                     NILFS_BTREE_LEVEL_NODE_MIN);
1101         if (ret != -ENOENT) {
1102                 if (ret == 0)
1103                         ret = -EEXIST;
1104                 goto out;
1105         }
1106
1107         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1108         if (ret < 0)
1109                 goto out;
1110         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1111         nilfs_bmap_add_blocks(btree, stats.bs_nblocks);
1112
1113  out:
1114         nilfs_btree_free_path(path);
1115         return ret;
1116 }
1117
1118 static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1119                                   struct nilfs_btree_path *path,
1120                                   int level, __u64 *keyp, __u64 *ptrp)
1121 {
1122         struct nilfs_btree_node *node;
1123         int ncblk;
1124
1125         if (level < nilfs_btree_height(btree) - 1) {
1126                 node = nilfs_btree_get_nonroot_node(path, level);
1127                 ncblk = nilfs_btree_nchildren_per_block(btree);
1128                 nilfs_btree_node_delete(node, path[level].bp_index,
1129                                         keyp, ptrp, ncblk);
1130                 if (!buffer_dirty(path[level].bp_bh))
1131                         nilfs_btnode_mark_dirty(path[level].bp_bh);
1132                 if (path[level].bp_index == 0)
1133                         nilfs_btree_promote_key(btree, path, level + 1,
1134                                 nilfs_btree_node_get_key(node, 0));
1135         } else {
1136                 node = nilfs_btree_get_root(btree);
1137                 nilfs_btree_node_delete(node, path[level].bp_index,
1138                                         keyp, ptrp,
1139                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1140         }
1141 }
1142
1143 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1144                                     struct nilfs_btree_path *path,
1145                                     int level, __u64 *keyp, __u64 *ptrp)
1146 {
1147         struct nilfs_btree_node *node, *left;
1148         int nchildren, lnchildren, n, ncblk;
1149
1150         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1151
1152         node = nilfs_btree_get_nonroot_node(path, level);
1153         left = nilfs_btree_get_sib_node(path, level);
1154         nchildren = nilfs_btree_node_get_nchildren(node);
1155         lnchildren = nilfs_btree_node_get_nchildren(left);
1156         ncblk = nilfs_btree_nchildren_per_block(btree);
1157
1158         n = (nchildren + lnchildren) / 2 - nchildren;
1159
1160         nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1161
1162         if (!buffer_dirty(path[level].bp_bh))
1163                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1164         if (!buffer_dirty(path[level].bp_sib_bh))
1165                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1166
1167         nilfs_btree_promote_key(btree, path, level + 1,
1168                                 nilfs_btree_node_get_key(node, 0));
1169
1170         brelse(path[level].bp_sib_bh);
1171         path[level].bp_sib_bh = NULL;
1172         path[level].bp_index += n;
1173 }
1174
1175 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1176                                      struct nilfs_btree_path *path,
1177                                      int level, __u64 *keyp, __u64 *ptrp)
1178 {
1179         struct nilfs_btree_node *node, *right;
1180         int nchildren, rnchildren, n, ncblk;
1181
1182         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1183
1184         node = nilfs_btree_get_nonroot_node(path, level);
1185         right = nilfs_btree_get_sib_node(path, level);
1186         nchildren = nilfs_btree_node_get_nchildren(node);
1187         rnchildren = nilfs_btree_node_get_nchildren(right);
1188         ncblk = nilfs_btree_nchildren_per_block(btree);
1189
1190         n = (nchildren + rnchildren) / 2 - nchildren;
1191
1192         nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1193
1194         if (!buffer_dirty(path[level].bp_bh))
1195                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1196         if (!buffer_dirty(path[level].bp_sib_bh))
1197                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1198
1199         path[level + 1].bp_index++;
1200         nilfs_btree_promote_key(btree, path, level + 1,
1201                                 nilfs_btree_node_get_key(right, 0));
1202         path[level + 1].bp_index--;
1203
1204         brelse(path[level].bp_sib_bh);
1205         path[level].bp_sib_bh = NULL;
1206 }
1207
1208 static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1209                                     struct nilfs_btree_path *path,
1210                                     int level, __u64 *keyp, __u64 *ptrp)
1211 {
1212         struct nilfs_btree_node *node, *left;
1213         int n, ncblk;
1214
1215         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1216
1217         node = nilfs_btree_get_nonroot_node(path, level);
1218         left = nilfs_btree_get_sib_node(path, level);
1219         ncblk = nilfs_btree_nchildren_per_block(btree);
1220
1221         n = nilfs_btree_node_get_nchildren(node);
1222
1223         nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1224
1225         if (!buffer_dirty(path[level].bp_sib_bh))
1226                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1227
1228         nilfs_btnode_delete(path[level].bp_bh);
1229         path[level].bp_bh = path[level].bp_sib_bh;
1230         path[level].bp_sib_bh = NULL;
1231         path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1232 }
1233
1234 static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1235                                      struct nilfs_btree_path *path,
1236                                      int level, __u64 *keyp, __u64 *ptrp)
1237 {
1238         struct nilfs_btree_node *node, *right;
1239         int n, ncblk;
1240
1241         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1242
1243         node = nilfs_btree_get_nonroot_node(path, level);
1244         right = nilfs_btree_get_sib_node(path, level);
1245         ncblk = nilfs_btree_nchildren_per_block(btree);
1246
1247         n = nilfs_btree_node_get_nchildren(right);
1248
1249         nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1250
1251         if (!buffer_dirty(path[level].bp_bh))
1252                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1253
1254         nilfs_btnode_delete(path[level].bp_sib_bh);
1255         path[level].bp_sib_bh = NULL;
1256         path[level + 1].bp_index++;
1257 }
1258
1259 static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1260                                struct nilfs_btree_path *path,
1261                                int level, __u64 *keyp, __u64 *ptrp)
1262 {
1263         struct nilfs_btree_node *root, *child;
1264         int n, ncblk;
1265
1266         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1267
1268         root = nilfs_btree_get_root(btree);
1269         child = nilfs_btree_get_nonroot_node(path, level);
1270         ncblk = nilfs_btree_nchildren_per_block(btree);
1271
1272         nilfs_btree_node_delete(root, 0, NULL, NULL,
1273                                 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1274         nilfs_btree_node_set_level(root, level);
1275         n = nilfs_btree_node_get_nchildren(child);
1276         nilfs_btree_node_move_left(root, child, n,
1277                                    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1278
1279         nilfs_btnode_delete(path[level].bp_bh);
1280         path[level].bp_bh = NULL;
1281 }
1282
1283
1284 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1285                                       struct nilfs_btree_path *path,
1286                                       int *levelp,
1287                                       struct nilfs_bmap_stats *stats,
1288                                       struct inode *dat)
1289 {
1290         struct buffer_head *bh;
1291         struct nilfs_btree_node *node, *parent, *sib;
1292         __u64 sibptr;
1293         int pindex, level, ncmin, ncmax, ncblk, ret;
1294
1295         ret = 0;
1296         stats->bs_nblocks = 0;
1297         ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1298         ncblk = nilfs_btree_nchildren_per_block(btree);
1299
1300         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1301              level < nilfs_btree_height(btree) - 1;
1302              level++) {
1303                 node = nilfs_btree_get_nonroot_node(path, level);
1304                 path[level].bp_oldreq.bpr_ptr =
1305                         nilfs_btree_node_get_ptr(node, path[level].bp_index,
1306                                                  ncblk);
1307                 ret = nilfs_bmap_prepare_end_ptr(btree,
1308                                                  &path[level].bp_oldreq, dat);
1309                 if (ret < 0)
1310                         goto err_out_child_node;
1311
1312                 if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1313                         path[level].bp_op = nilfs_btree_do_delete;
1314                         stats->bs_nblocks++;
1315                         goto out;
1316                 }
1317
1318                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1319                 pindex = path[level + 1].bp_index;
1320
1321                 if (pindex > 0) {
1322                         /* left sibling */
1323                         sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1324                                                           ncmax);
1325                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1326                         if (ret < 0)
1327                                 goto err_out_curr_node;
1328                         sib = (struct nilfs_btree_node *)bh->b_data;
1329                         if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1330                                 path[level].bp_sib_bh = bh;
1331                                 path[level].bp_op = nilfs_btree_borrow_left;
1332                                 stats->bs_nblocks++;
1333                                 goto out;
1334                         } else {
1335                                 path[level].bp_sib_bh = bh;
1336                                 path[level].bp_op = nilfs_btree_concat_left;
1337                                 stats->bs_nblocks++;
1338                                 /* continue; */
1339                         }
1340                 } else if (pindex <
1341                            nilfs_btree_node_get_nchildren(parent) - 1) {
1342                         /* right sibling */
1343                         sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1344                                                           ncmax);
1345                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1346                         if (ret < 0)
1347                                 goto err_out_curr_node;
1348                         sib = (struct nilfs_btree_node *)bh->b_data;
1349                         if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1350                                 path[level].bp_sib_bh = bh;
1351                                 path[level].bp_op = nilfs_btree_borrow_right;
1352                                 stats->bs_nblocks++;
1353                                 goto out;
1354                         } else {
1355                                 path[level].bp_sib_bh = bh;
1356                                 path[level].bp_op = nilfs_btree_concat_right;
1357                                 stats->bs_nblocks++;
1358                                 /* continue; */
1359                         }
1360                 } else {
1361                         /* no siblings */
1362                         /* the only child of the root node */
1363                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1364                         if (nilfs_btree_node_get_nchildren(node) - 1 <=
1365                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1366                                 path[level].bp_op = nilfs_btree_shrink;
1367                                 stats->bs_nblocks += 2;
1368                         } else {
1369                                 path[level].bp_op = nilfs_btree_do_delete;
1370                                 stats->bs_nblocks++;
1371                         }
1372
1373                         goto out;
1374
1375                 }
1376         }
1377
1378         node = nilfs_btree_get_root(btree);
1379         path[level].bp_oldreq.bpr_ptr =
1380                 nilfs_btree_node_get_ptr(node, path[level].bp_index,
1381                                          NILFS_BTREE_ROOT_NCHILDREN_MAX);
1382
1383         ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1384         if (ret < 0)
1385                 goto err_out_child_node;
1386
1387         /* child of the root node is deleted */
1388         path[level].bp_op = nilfs_btree_do_delete;
1389         stats->bs_nblocks++;
1390
1391         /* success */
1392  out:
1393         *levelp = level;
1394         return ret;
1395
1396         /* error */
1397  err_out_curr_node:
1398         nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1399  err_out_child_node:
1400         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1401                 brelse(path[level].bp_sib_bh);
1402                 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1403         }
1404         *levelp = level;
1405         stats->bs_nblocks = 0;
1406         return ret;
1407 }
1408
1409 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1410                                       struct nilfs_btree_path *path,
1411                                       int maxlevel, struct inode *dat)
1412 {
1413         int level;
1414
1415         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1416                 nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1417                 path[level].bp_op(btree, path, level, NULL, NULL);
1418         }
1419
1420         if (!nilfs_bmap_dirty(btree))
1421                 nilfs_bmap_set_dirty(btree);
1422 }
1423
1424 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1425
1426 {
1427         struct nilfs_btree_path *path;
1428         struct nilfs_bmap_stats stats;
1429         struct inode *dat;
1430         int level, ret;
1431
1432         path = nilfs_btree_alloc_path();
1433         if (path == NULL)
1434                 return -ENOMEM;
1435
1436         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1437                                     NILFS_BTREE_LEVEL_NODE_MIN);
1438         if (ret < 0)
1439                 goto out;
1440
1441
1442         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1443
1444         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1445         if (ret < 0)
1446                 goto out;
1447         nilfs_btree_commit_delete(btree, path, level, dat);
1448         nilfs_bmap_sub_blocks(btree, stats.bs_nblocks);
1449
1450 out:
1451         nilfs_btree_free_path(path);
1452         return ret;
1453 }
1454
1455 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1456 {
1457         struct nilfs_btree_path *path;
1458         int ret;
1459
1460         path = nilfs_btree_alloc_path();
1461         if (path == NULL)
1462                 return -ENOMEM;
1463
1464         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1465
1466         nilfs_btree_free_path(path);
1467
1468         return ret;
1469 }
1470
1471 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1472 {
1473         struct buffer_head *bh;
1474         struct nilfs_btree_node *root, *node;
1475         __u64 maxkey, nextmaxkey;
1476         __u64 ptr;
1477         int nchildren, ret;
1478
1479         root = nilfs_btree_get_root(btree);
1480         switch (nilfs_btree_height(btree)) {
1481         case 2:
1482                 bh = NULL;
1483                 node = root;
1484                 break;
1485         case 3:
1486                 nchildren = nilfs_btree_node_get_nchildren(root);
1487                 if (nchildren > 1)
1488                         return 0;
1489                 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1490                                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1491                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1492                 if (ret < 0)
1493                         return ret;
1494                 node = (struct nilfs_btree_node *)bh->b_data;
1495                 break;
1496         default:
1497                 return 0;
1498         }
1499
1500         nchildren = nilfs_btree_node_get_nchildren(node);
1501         maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1502         nextmaxkey = (nchildren > 1) ?
1503                 nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1504         if (bh != NULL)
1505                 brelse(bh);
1506
1507         return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1508 }
1509
1510 static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1511                                    __u64 *keys, __u64 *ptrs, int nitems)
1512 {
1513         struct buffer_head *bh;
1514         struct nilfs_btree_node *node, *root;
1515         __le64 *dkeys;
1516         __le64 *dptrs;
1517         __u64 ptr;
1518         int nchildren, ncmax, i, ret;
1519
1520         root = nilfs_btree_get_root(btree);
1521         switch (nilfs_btree_height(btree)) {
1522         case 2:
1523                 bh = NULL;
1524                 node = root;
1525                 ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1526                 break;
1527         case 3:
1528                 nchildren = nilfs_btree_node_get_nchildren(root);
1529                 WARN_ON(nchildren > 1);
1530                 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1531                                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1532                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1533                 if (ret < 0)
1534                         return ret;
1535                 node = (struct nilfs_btree_node *)bh->b_data;
1536                 ncmax = nilfs_btree_nchildren_per_block(btree);
1537                 break;
1538         default:
1539                 node = NULL;
1540                 return -EINVAL;
1541         }
1542
1543         nchildren = nilfs_btree_node_get_nchildren(node);
1544         if (nchildren < nitems)
1545                 nitems = nchildren;
1546         dkeys = nilfs_btree_node_dkeys(node);
1547         dptrs = nilfs_btree_node_dptrs(node, ncmax);
1548         for (i = 0; i < nitems; i++) {
1549                 keys[i] = le64_to_cpu(dkeys[i]);
1550                 ptrs[i] = le64_to_cpu(dptrs[i]);
1551         }
1552
1553         if (bh != NULL)
1554                 brelse(bh);
1555
1556         return nitems;
1557 }
1558
1559 static int
1560 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1561                                        union nilfs_bmap_ptr_req *dreq,
1562                                        union nilfs_bmap_ptr_req *nreq,
1563                                        struct buffer_head **bhp,
1564                                        struct nilfs_bmap_stats *stats)
1565 {
1566         struct buffer_head *bh;
1567         struct inode *dat = NULL;
1568         int ret;
1569
1570         stats->bs_nblocks = 0;
1571
1572         /* for data */
1573         /* cannot find near ptr */
1574         if (NILFS_BMAP_USE_VBN(btree)) {
1575                 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1576                 dat = nilfs_bmap_get_dat(btree);
1577         }
1578
1579         ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1580         if (ret < 0)
1581                 return ret;
1582
1583         *bhp = NULL;
1584         stats->bs_nblocks++;
1585         if (nreq != NULL) {
1586                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1587                 ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1588                 if (ret < 0)
1589                         goto err_out_dreq;
1590
1591                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1592                 if (ret < 0)
1593                         goto err_out_nreq;
1594
1595                 *bhp = bh;
1596                 stats->bs_nblocks++;
1597         }
1598
1599         /* success */
1600         return 0;
1601
1602         /* error */
1603  err_out_nreq:
1604         nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1605  err_out_dreq:
1606         nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1607         stats->bs_nblocks = 0;
1608         return ret;
1609
1610 }
1611
1612 static void
1613 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1614                                       __u64 key, __u64 ptr,
1615                                       const __u64 *keys, const __u64 *ptrs,
1616                                       int n,
1617                                       union nilfs_bmap_ptr_req *dreq,
1618                                       union nilfs_bmap_ptr_req *nreq,
1619                                       struct buffer_head *bh)
1620 {
1621         struct nilfs_btree_node *node;
1622         struct inode *dat;
1623         __u64 tmpptr;
1624         int ncblk;
1625
1626         /* free resources */
1627         if (btree->b_ops->bop_clear != NULL)
1628                 btree->b_ops->bop_clear(btree);
1629
1630         /* ptr must be a pointer to a buffer head. */
1631         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1632
1633         /* convert and insert */
1634         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1635         nilfs_btree_init(btree);
1636         if (nreq != NULL) {
1637                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1638                 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1639
1640                 /* create child node at level 1 */
1641                 node = (struct nilfs_btree_node *)bh->b_data;
1642                 ncblk = nilfs_btree_nchildren_per_block(btree);
1643                 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1644                 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1645                 if (!buffer_dirty(bh))
1646                         nilfs_btnode_mark_dirty(bh);
1647                 if (!nilfs_bmap_dirty(btree))
1648                         nilfs_bmap_set_dirty(btree);
1649
1650                 brelse(bh);
1651
1652                 /* create root node at level 2 */
1653                 node = nilfs_btree_get_root(btree);
1654                 tmpptr = nreq->bpr_ptr;
1655                 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1656                                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1657                                       &keys[0], &tmpptr);
1658         } else {
1659                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1660
1661                 /* create root node at level 1 */
1662                 node = nilfs_btree_get_root(btree);
1663                 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1664                                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1665                                       keys, ptrs);
1666                 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1667                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1668                 if (!nilfs_bmap_dirty(btree))
1669                         nilfs_bmap_set_dirty(btree);
1670         }
1671
1672         if (NILFS_BMAP_USE_VBN(btree))
1673                 nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1674 }
1675
1676 /**
1677  * nilfs_btree_convert_and_insert -
1678  * @bmap:
1679  * @key:
1680  * @ptr:
1681  * @keys:
1682  * @ptrs:
1683  * @n:
1684  */
1685 int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1686                                    __u64 key, __u64 ptr,
1687                                    const __u64 *keys, const __u64 *ptrs, int n)
1688 {
1689         struct buffer_head *bh;
1690         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1691         struct nilfs_bmap_stats stats;
1692         int ret;
1693
1694         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1695                 di = &dreq;
1696                 ni = NULL;
1697         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1698                            1 << btree->b_inode->i_blkbits)) {
1699                 di = &dreq;
1700                 ni = &nreq;
1701         } else {
1702                 di = NULL;
1703                 ni = NULL;
1704                 BUG();
1705         }
1706
1707         ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1708                                                      &stats);
1709         if (ret < 0)
1710                 return ret;
1711         nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1712                                               di, ni, bh);
1713         nilfs_bmap_add_blocks(btree, stats.bs_nblocks);
1714         return 0;
1715 }
1716
1717 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1718                                    struct nilfs_btree_path *path,
1719                                    int level,
1720                                    struct buffer_head *bh)
1721 {
1722         while ((++level < nilfs_btree_height(btree) - 1) &&
1723                !buffer_dirty(path[level].bp_bh))
1724                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1725
1726         return 0;
1727 }
1728
1729 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1730                                         struct nilfs_btree_path *path,
1731                                         int level, struct inode *dat)
1732 {
1733         struct nilfs_btree_node *parent;
1734         int ncmax, ret;
1735
1736         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1737         path[level].bp_oldreq.bpr_ptr =
1738                 nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1739                                          ncmax);
1740         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1741         ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1742                                        &path[level].bp_newreq.bpr_req);
1743         if (ret < 0)
1744                 return ret;
1745
1746         if (buffer_nilfs_node(path[level].bp_bh)) {
1747                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1748                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1749                 path[level].bp_ctxt.bh = path[level].bp_bh;
1750                 ret = nilfs_btnode_prepare_change_key(
1751                         &NILFS_BMAP_I(btree)->i_btnode_cache,
1752                         &path[level].bp_ctxt);
1753                 if (ret < 0) {
1754                         nilfs_dat_abort_update(dat,
1755                                                &path[level].bp_oldreq.bpr_req,
1756                                                &path[level].bp_newreq.bpr_req);
1757                         return ret;
1758                 }
1759         }
1760
1761         return 0;
1762 }
1763
1764 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1765                                         struct nilfs_btree_path *path,
1766                                         int level, struct inode *dat)
1767 {
1768         struct nilfs_btree_node *parent;
1769         int ncmax;
1770
1771         nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1772                                 &path[level].bp_newreq.bpr_req,
1773                                 btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1774
1775         if (buffer_nilfs_node(path[level].bp_bh)) {
1776                 nilfs_btnode_commit_change_key(
1777                         &NILFS_BMAP_I(btree)->i_btnode_cache,
1778                         &path[level].bp_ctxt);
1779                 path[level].bp_bh = path[level].bp_ctxt.bh;
1780         }
1781         set_buffer_nilfs_volatile(path[level].bp_bh);
1782
1783         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1784         nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1785                                  path[level].bp_newreq.bpr_ptr, ncmax);
1786 }
1787
1788 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1789                                        struct nilfs_btree_path *path,
1790                                        int level, struct inode *dat)
1791 {
1792         nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1793                                &path[level].bp_newreq.bpr_req);
1794         if (buffer_nilfs_node(path[level].bp_bh))
1795                 nilfs_btnode_abort_change_key(
1796                         &NILFS_BMAP_I(btree)->i_btnode_cache,
1797                         &path[level].bp_ctxt);
1798 }
1799
1800 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1801                                            struct nilfs_btree_path *path,
1802                                            int minlevel, int *maxlevelp,
1803                                            struct inode *dat)
1804 {
1805         int level, ret;
1806
1807         level = minlevel;
1808         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1809                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1810                 if (ret < 0)
1811                         return ret;
1812         }
1813         while ((++level < nilfs_btree_height(btree) - 1) &&
1814                !buffer_dirty(path[level].bp_bh)) {
1815
1816                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1817                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1818                 if (ret < 0)
1819                         goto out;
1820         }
1821
1822         /* success */
1823         *maxlevelp = level - 1;
1824         return 0;
1825
1826         /* error */
1827  out:
1828         while (--level > minlevel)
1829                 nilfs_btree_abort_update_v(btree, path, level, dat);
1830         if (!buffer_nilfs_volatile(path[level].bp_bh))
1831                 nilfs_btree_abort_update_v(btree, path, level, dat);
1832         return ret;
1833 }
1834
1835 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
1836                                            struct nilfs_btree_path *path,
1837                                            int minlevel, int maxlevel,
1838                                            struct buffer_head *bh,
1839                                            struct inode *dat)
1840 {
1841         int level;
1842
1843         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1844                 nilfs_btree_commit_update_v(btree, path, minlevel, dat);
1845
1846         for (level = minlevel + 1; level <= maxlevel; level++)
1847                 nilfs_btree_commit_update_v(btree, path, level, dat);
1848 }
1849
1850 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
1851                                    struct nilfs_btree_path *path,
1852                                    int level, struct buffer_head *bh)
1853 {
1854         int maxlevel = 0, ret;
1855         struct nilfs_btree_node *parent;
1856         struct inode *dat = nilfs_bmap_get_dat(btree);
1857         __u64 ptr;
1858         int ncmax;
1859
1860         get_bh(bh);
1861         path[level].bp_bh = bh;
1862         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
1863                                               dat);
1864         if (ret < 0)
1865                 goto out;
1866
1867         if (buffer_nilfs_volatile(path[level].bp_bh)) {
1868                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1869                 ptr = nilfs_btree_node_get_ptr(parent,
1870                                                path[level + 1].bp_index,
1871                                                ncmax);
1872                 ret = nilfs_dat_mark_dirty(dat, ptr);
1873                 if (ret < 0)
1874                         goto out;
1875         }
1876
1877         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
1878
1879  out:
1880         brelse(path[level].bp_bh);
1881         path[level].bp_bh = NULL;
1882         return ret;
1883 }
1884
1885 static int nilfs_btree_propagate(struct nilfs_bmap *btree,
1886                                  struct buffer_head *bh)
1887 {
1888         struct nilfs_btree_path *path;
1889         struct nilfs_btree_node *node;
1890         __u64 key;
1891         int level, ret;
1892
1893         WARN_ON(!buffer_dirty(bh));
1894
1895         path = nilfs_btree_alloc_path();
1896         if (path == NULL)
1897                 return -ENOMEM;
1898
1899         if (buffer_nilfs_node(bh)) {
1900                 node = (struct nilfs_btree_node *)bh->b_data;
1901                 key = nilfs_btree_node_get_key(node, 0);
1902                 level = nilfs_btree_node_get_level(node);
1903         } else {
1904                 key = nilfs_bmap_data_get_key(btree, bh);
1905                 level = NILFS_BTREE_LEVEL_DATA;
1906         }
1907
1908         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
1909         if (ret < 0) {
1910                 if (unlikely(ret == -ENOENT))
1911                         printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1912                                __func__, (unsigned long long)key, level);
1913                 goto out;
1914         }
1915
1916         ret = NILFS_BMAP_USE_VBN(btree) ?
1917                 nilfs_btree_propagate_v(btree, path, level, bh) :
1918                 nilfs_btree_propagate_p(btree, path, level, bh);
1919
1920  out:
1921         nilfs_btree_free_path(path);
1922
1923         return ret;
1924 }
1925
1926 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
1927                                     struct buffer_head *bh)
1928 {
1929         return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
1930 }
1931
1932 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
1933                                          struct list_head *lists,
1934                                          struct buffer_head *bh)
1935 {
1936         struct list_head *head;
1937         struct buffer_head *cbh;
1938         struct nilfs_btree_node *node, *cnode;
1939         __u64 key, ckey;
1940         int level;
1941
1942         get_bh(bh);
1943         node = (struct nilfs_btree_node *)bh->b_data;
1944         key = nilfs_btree_node_get_key(node, 0);
1945         level = nilfs_btree_node_get_level(node);
1946         if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
1947             level >= NILFS_BTREE_LEVEL_MAX) {
1948                 dump_stack();
1949                 printk(KERN_WARNING
1950                        "%s: invalid btree level: %d (key=%llu, ino=%lu, "
1951                        "blocknr=%llu)\n",
1952                        __func__, level, (unsigned long long)key,
1953                        NILFS_BMAP_I(btree)->vfs_inode.i_ino,
1954                        (unsigned long long)bh->b_blocknr);
1955                 return;
1956         }
1957
1958         list_for_each(head, &lists[level]) {
1959                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
1960                 cnode = (struct nilfs_btree_node *)cbh->b_data;
1961                 ckey = nilfs_btree_node_get_key(cnode, 0);
1962                 if (key < ckey)
1963                         break;
1964         }
1965         list_add_tail(&bh->b_assoc_buffers, head);
1966 }
1967
1968 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
1969                                              struct list_head *listp)
1970 {
1971         struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache;
1972         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
1973         struct pagevec pvec;
1974         struct buffer_head *bh, *head;
1975         pgoff_t index = 0;
1976         int level, i;
1977
1978         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1979              level < NILFS_BTREE_LEVEL_MAX;
1980              level++)
1981                 INIT_LIST_HEAD(&lists[level]);
1982
1983         pagevec_init(&pvec, 0);
1984
1985         while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
1986                                   PAGEVEC_SIZE)) {
1987                 for (i = 0; i < pagevec_count(&pvec); i++) {
1988                         bh = head = page_buffers(pvec.pages[i]);
1989                         do {
1990                                 if (buffer_dirty(bh))
1991                                         nilfs_btree_add_dirty_buffer(btree,
1992                                                                      lists, bh);
1993                         } while ((bh = bh->b_this_page) != head);
1994                 }
1995                 pagevec_release(&pvec);
1996                 cond_resched();
1997         }
1998
1999         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2000              level < NILFS_BTREE_LEVEL_MAX;
2001              level++)
2002                 list_splice_tail(&lists[level], listp);
2003 }
2004
2005 static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2006                                 struct nilfs_btree_path *path,
2007                                 int level,
2008                                 struct buffer_head **bh,
2009                                 sector_t blocknr,
2010                                 union nilfs_binfo *binfo)
2011 {
2012         struct nilfs_btree_node *parent;
2013         __u64 key;
2014         __u64 ptr;
2015         int ncmax, ret;
2016
2017         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2018         ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2019                                        ncmax);
2020         if (buffer_nilfs_node(*bh)) {
2021                 path[level].bp_ctxt.oldkey = ptr;
2022                 path[level].bp_ctxt.newkey = blocknr;
2023                 path[level].bp_ctxt.bh = *bh;
2024                 ret = nilfs_btnode_prepare_change_key(
2025                         &NILFS_BMAP_I(btree)->i_btnode_cache,
2026                         &path[level].bp_ctxt);
2027                 if (ret < 0)
2028                         return ret;
2029                 nilfs_btnode_commit_change_key(
2030                         &NILFS_BMAP_I(btree)->i_btnode_cache,
2031                         &path[level].bp_ctxt);
2032                 *bh = path[level].bp_ctxt.bh;
2033         }
2034
2035         nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2036                                  ncmax);
2037
2038         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2039         /* on-disk format */
2040         binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2041         binfo->bi_dat.bi_level = level;
2042
2043         return 0;
2044 }
2045
2046 static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2047                                 struct nilfs_btree_path *path,
2048                                 int level,
2049                                 struct buffer_head **bh,
2050                                 sector_t blocknr,
2051                                 union nilfs_binfo *binfo)
2052 {
2053         struct nilfs_btree_node *parent;
2054         struct inode *dat = nilfs_bmap_get_dat(btree);
2055         __u64 key;
2056         __u64 ptr;
2057         union nilfs_bmap_ptr_req req;
2058         int ncmax, ret;
2059
2060         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2061         ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2062                                        ncmax);
2063         req.bpr_ptr = ptr;
2064         ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2065         if (ret < 0)
2066                 return ret;
2067         nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2068
2069         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2070         /* on-disk format */
2071         binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2072         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2073
2074         return 0;
2075 }
2076
2077 static int nilfs_btree_assign(struct nilfs_bmap *btree,
2078                               struct buffer_head **bh,
2079                               sector_t blocknr,
2080                               union nilfs_binfo *binfo)
2081 {
2082         struct nilfs_btree_path *path;
2083         struct nilfs_btree_node *node;
2084         __u64 key;
2085         int level, ret;
2086
2087         path = nilfs_btree_alloc_path();
2088         if (path == NULL)
2089                 return -ENOMEM;
2090
2091         if (buffer_nilfs_node(*bh)) {
2092                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2093                 key = nilfs_btree_node_get_key(node, 0);
2094                 level = nilfs_btree_node_get_level(node);
2095         } else {
2096                 key = nilfs_bmap_data_get_key(btree, *bh);
2097                 level = NILFS_BTREE_LEVEL_DATA;
2098         }
2099
2100         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2101         if (ret < 0) {
2102                 WARN_ON(ret == -ENOENT);
2103                 goto out;
2104         }
2105
2106         ret = NILFS_BMAP_USE_VBN(btree) ?
2107                 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2108                 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2109
2110  out:
2111         nilfs_btree_free_path(path);
2112
2113         return ret;
2114 }
2115
2116 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2117                                  struct buffer_head **bh,
2118                                  sector_t blocknr,
2119                                  union nilfs_binfo *binfo)
2120 {
2121         struct nilfs_btree_node *node;
2122         __u64 key;
2123         int ret;
2124
2125         ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2126                              blocknr);
2127         if (ret < 0)
2128                 return ret;
2129
2130         if (buffer_nilfs_node(*bh)) {
2131                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2132                 key = nilfs_btree_node_get_key(node, 0);
2133         } else
2134                 key = nilfs_bmap_data_get_key(btree, *bh);
2135
2136         /* on-disk format */
2137         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2138         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2139
2140         return 0;
2141 }
2142
2143 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2144 {
2145         struct buffer_head *bh;
2146         struct nilfs_btree_path *path;
2147         __u64 ptr;
2148         int ret;
2149
2150         path = nilfs_btree_alloc_path();
2151         if (path == NULL)
2152                 return -ENOMEM;
2153
2154         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2155         if (ret < 0) {
2156                 WARN_ON(ret == -ENOENT);
2157                 goto out;
2158         }
2159         ret = nilfs_btree_get_block(btree, ptr, &bh);
2160         if (ret < 0) {
2161                 WARN_ON(ret == -ENOENT);
2162                 goto out;
2163         }
2164
2165         if (!buffer_dirty(bh))
2166                 nilfs_btnode_mark_dirty(bh);
2167         brelse(bh);
2168         if (!nilfs_bmap_dirty(btree))
2169                 nilfs_bmap_set_dirty(btree);
2170
2171  out:
2172         nilfs_btree_free_path(path);
2173         return ret;
2174 }
2175
2176 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2177         .bop_lookup             =       nilfs_btree_lookup,
2178         .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2179         .bop_insert             =       nilfs_btree_insert,
2180         .bop_delete             =       nilfs_btree_delete,
2181         .bop_clear              =       NULL,
2182
2183         .bop_propagate          =       nilfs_btree_propagate,
2184
2185         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2186
2187         .bop_assign             =       nilfs_btree_assign,
2188         .bop_mark               =       nilfs_btree_mark,
2189
2190         .bop_last_key           =       nilfs_btree_last_key,
2191         .bop_check_insert       =       NULL,
2192         .bop_check_delete       =       nilfs_btree_check_delete,
2193         .bop_gather_data        =       nilfs_btree_gather_data,
2194 };
2195
2196 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2197         .bop_lookup             =       NULL,
2198         .bop_lookup_contig      =       NULL,
2199         .bop_insert             =       NULL,
2200         .bop_delete             =       NULL,
2201         .bop_clear              =       NULL,
2202
2203         .bop_propagate          =       nilfs_btree_propagate_gc,
2204
2205         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2206
2207         .bop_assign             =       nilfs_btree_assign_gc,
2208         .bop_mark               =       NULL,
2209
2210         .bop_last_key           =       NULL,
2211         .bop_check_insert       =       NULL,
2212         .bop_check_delete       =       NULL,
2213         .bop_gather_data        =       NULL,
2214 };
2215
2216 int nilfs_btree_init(struct nilfs_bmap *bmap)
2217 {
2218         bmap->b_ops = &nilfs_btree_ops;
2219         bmap->b_nchildren_per_block =
2220                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2221         return 0;
2222 }
2223
2224 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2225 {
2226         bmap->b_ops = &nilfs_btree_ops_gc;
2227         bmap->b_nchildren_per_block =
2228                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2229 }