2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
6 * Now we have all buffers that must be used in balancing of the tree
7 * Further calculations can not cause schedule(), and thus the buffer
8 * tree will be stable until the balancing will be finished
9 * balance the tree according to the analysis made before,
10 * and using buffers obtained after all above.
13 #include <asm/uaccess.h>
14 #include <linux/time.h>
16 #include <linux/buffer_head.h>
17 #include <linux/kernel.h>
19 static inline void buffer_info_init_left(struct tree_balance *tb,
20 struct buffer_info *bi)
24 bi->bi_parent = tb->FL[0];
25 bi->bi_position = get_left_neighbor_position(tb, 0);
28 static inline void buffer_info_init_right(struct tree_balance *tb,
29 struct buffer_info *bi)
33 bi->bi_parent = tb->FR[0];
34 bi->bi_position = get_right_neighbor_position(tb, 0);
37 static inline void buffer_info_init_tbS0(struct tree_balance *tb,
38 struct buffer_info *bi)
41 bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path);
42 bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
43 bi->bi_position = PATH_H_POSITION(tb->tb_path, 1);
46 static inline void buffer_info_init_bh(struct tree_balance *tb,
47 struct buffer_info *bi,
48 struct buffer_head *bh)
56 inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
57 struct buffer_head *bh, int flag)
59 journal_mark_dirty(tb->transaction_handle,
60 tb->transaction_handle->t_super, bh);
63 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
64 #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
68 * if deleting something ( tb->insert_size[0] < 0 )
69 * return(balance_leaf_when_delete()); (flag d handled here)
71 * if lnum is larger than 0 we put items into the left node
72 * if rnum is larger than 0 we put items into the right node
73 * if snum1 is larger than 0 we put items into the new node s1
74 * if snum2 is larger than 0 we put items into the new node s2
75 * Note that all *num* count new items being created.
77 * It would be easier to read balance_leaf() if each of these summary
78 * lines was a separate procedure rather than being inlined. I think
79 * that there are many passages here and in balance_leaf_when_delete() in
80 * which two calls to one procedure can replace two passages, and it
81 * might save cache space and improve software maintenance costs to do so.
83 * Vladimir made the perceptive comment that we should offload most of
84 * the decision making in this function into fix_nodes/check_balance, and
85 * then create some sort of structure in tb that says what actions should
86 * be performed by do_balance.
92 * Balance leaf node in case of delete or cut: insert_size[0] < 0
94 * lnum, rnum can have values >= -1
95 * -1 means that the neighbor must be joined with S
96 * 0 means that nothing should be done with the neighbor
97 * >0 means to shift entirely or partly the specified number of items
100 static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
102 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
103 int item_pos = PATH_LAST_POSITION(tb->tb_path);
104 int pos_in_item = tb->tb_path->pos_in_item;
105 struct buffer_info bi;
107 struct item_head *ih;
109 RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
110 "vs- 12000: level: wrong FR %z", tb->FR[0]);
111 RFALSE(tb->blknum[0] > 1,
112 "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
113 RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0),
114 "PAP-12010: tree can not be empty");
116 ih = item_head(tbS0, item_pos);
117 buffer_info_init_tbS0(tb, &bi);
119 /* Delete or truncate the item */
122 case M_DELETE: /* delete item in S[0] */
124 RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
125 "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
126 -tb->insert_size[0], ih);
128 leaf_delete_items(&bi, 0, item_pos, 1, -1);
130 if (!item_pos && tb->CFL[0]) {
131 if (B_NR_ITEMS(tbS0)) {
132 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0,
135 if (!PATH_H_POSITION(tb->tb_path, 1))
136 replace_key(tb, tb->CFL[0], tb->lkey[0],
137 PATH_H_PPARENT(tb->tb_path,
142 RFALSE(!item_pos && !tb->CFL[0],
143 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0],
148 case M_CUT:{ /* cut item in S[0] */
149 if (is_direntry_le_ih(ih)) {
152 * UFS unlink semantics are such that you
153 * can only delete one directory entry at
158 * when we cut a directory tb->insert_size[0]
159 * means number of entries to be cut (always 1)
161 tb->insert_size[0] = -1;
162 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
163 -tb->insert_size[0]);
165 RFALSE(!item_pos && !pos_in_item && !tb->CFL[0],
166 "PAP-12030: can not change delimiting key. CFL[0]=%p",
169 if (!item_pos && !pos_in_item && tb->CFL[0]) {
170 replace_key(tb, tb->CFL[0], tb->lkey[0],
174 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
175 -tb->insert_size[0]);
177 RFALSE(!ih_item_len(ih),
178 "PAP-12035: cut must leave non-zero dynamic length of item");
184 print_cur_tb("12040");
185 reiserfs_panic(tb->tb_sb, "PAP-12040",
186 "unexpected mode: %s(%d)",
188 M_PASTE) ? "PASTE" : ((flag ==
189 M_INSERT) ? "INSERT" :
194 * the rule is that no shifting occurs unless by shifting
195 * a node can be freed
197 n = B_NR_ITEMS(tbS0);
198 /* L[0] takes part in balancing */
200 /* L[0] must be joined with S[0] */
201 if (tb->lnum[0] == -1) {
202 /* R[0] must be also joined with S[0] */
203 if (tb->rnum[0] == -1) {
204 if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) {
206 * all contents of all the 3 buffers
209 if (PATH_H_POSITION(tb->tb_path, 1) == 0
210 && 1 < B_NR_ITEMS(tb->FR[0]))
211 replace_key(tb, tb->CFL[0],
215 leaf_move_items(LEAF_FROM_S_TO_L, tb, n,
217 leaf_move_items(LEAF_FROM_R_TO_L, tb,
218 B_NR_ITEMS(tb->R[0]),
221 reiserfs_invalidate_buffer(tb, tbS0);
222 reiserfs_invalidate_buffer(tb,
228 * all contents of all the 3 buffers will
231 leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1,
233 leaf_move_items(LEAF_FROM_L_TO_R, tb,
234 B_NR_ITEMS(tb->L[0]), -1, NULL);
236 /* right_delimiting_key is correct in R[0] */
237 replace_key(tb, tb->CFR[0], tb->rkey[0],
240 reiserfs_invalidate_buffer(tb, tbS0);
241 reiserfs_invalidate_buffer(tb, tb->L[0]);
246 RFALSE(tb->rnum[0] != 0,
247 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
248 /* all contents of L[0] and S[0] will be in L[0] */
249 leaf_shift_left(tb, n, -1);
251 reiserfs_invalidate_buffer(tb, tbS0);
257 * a part of contents of S[0] will be in L[0] and the
258 * rest part of S[0] will be in R[0]
261 RFALSE((tb->lnum[0] + tb->rnum[0] < n) ||
262 (tb->lnum[0] + tb->rnum[0] > n + 1),
263 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
264 tb->rnum[0], tb->lnum[0], n);
265 RFALSE((tb->lnum[0] + tb->rnum[0] == n) &&
266 (tb->lbytes != -1 || tb->rbytes != -1),
267 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
268 tb->rbytes, tb->lbytes);
269 RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) &&
270 (tb->lbytes < 1 || tb->rbytes != -1),
271 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
272 tb->rbytes, tb->lbytes);
274 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
275 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
277 reiserfs_invalidate_buffer(tb, tbS0);
282 if (tb->rnum[0] == -1) {
283 /* all contents of R[0] and S[0] will be in R[0] */
284 leaf_shift_right(tb, n, -1);
285 reiserfs_invalidate_buffer(tb, tbS0);
290 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
294 static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item header of inserted item (this is on little endian) */
295 const char *body, /* body of inserted item or bytes to paste */
296 int flag, /* i - insert, d - delete, c - cut, p - paste
297 (see comment to do_balance) */
298 struct item_head *insert_key, /* in our processing of one level we sometimes determine what
299 must be inserted into the next higher level. This insertion
300 consists of a key or two keys and their corresponding
302 struct buffer_head **insert_ptr /* inserted node-ptrs for the next level */
305 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
306 int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0]
307 of the affected item */
308 struct buffer_info bi;
309 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */
310 int snum[2]; /* number of items that will be placed
311 into S_new (includes partially shifted
313 int sbytes[2]; /* if an item is partially shifted into S_new then
314 if it is a directory item
315 it is the number of entries from the item that are shifted into S_new
317 it is the number of bytes from the item that are shifted into S_new
324 PROC_INFO_INC(tb->tb_sb, balance_at[0]);
326 /* Make balance in case insert_size[0] < 0 */
327 if (tb->insert_size[0] < 0)
328 return balance_leaf_when_delete(tb, flag);
331 if (flag == M_INSERT && !body)
332 zeros_num = ih_item_len(ih);
334 pos_in_item = tb->tb_path->pos_in_item;
335 /* for indirect item pos_in_item is measured in unformatted node
336 pointers. Recalculate to bytes */
338 && is_indirect_le_ih(item_head(tbS0, item_pos)))
339 pos_in_item *= UNFM_P_SIZE;
341 if (tb->lnum[0] > 0) {
342 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */
343 if (item_pos < tb->lnum[0]) {
344 /* new item or it part falls to L[0], shift it too */
345 n = B_NR_ITEMS(tb->L[0]);
348 case M_INSERT: /* insert item into L[0] */
350 if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
351 /* part of new item falls into L[0] */
355 ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, -1);
357 /* Calculate item length to insert to S[0] */
358 new_item_len = ih_item_len(ih) - tb->lbytes;
359 /* Calculate and check item length to insert to L[0] */
360 put_ih_item_len(ih, ih_item_len(ih) - new_item_len);
362 RFALSE(ih_item_len(ih) <= 0,
363 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
366 /* Insert new item into L[0] */
367 buffer_info_init_left(tb, &bi);
368 leaf_insert_into_buf(&bi,
369 n + item_pos - ret_val, ih, body,
370 zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num);
372 version = ih_version(ih);
374 /* Calculate key component, item length and body to insert into S[0] */
375 set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
376 (tb-> lbytes << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0)));
378 put_ih_item_len(ih, new_item_len);
379 if (tb->lbytes > zeros_num) {
380 body += (tb->lbytes - zeros_num);
383 zeros_num -= tb->lbytes;
385 RFALSE(ih_item_len(ih) <= 0,
386 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
389 /* new item in whole falls into L[0] */
390 /* Shift lnum[0]-1 items to L[0] */
391 ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, tb->lbytes);
392 /* Insert new item into L[0] */
393 buffer_info_init_left(tb, &bi);
394 leaf_insert_into_buf(&bi, n + item_pos - ret_val, ih, body, zeros_num);
395 tb->insert_size[0] = 0;
400 case M_PASTE: /* append item in L[0] */
402 if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
403 /* we must shift the part of the appended item */
404 if (is_direntry_le_ih(item_head(tbS0, item_pos))) {
407 "PAP-12090: invalid parameter in case of a directory");
409 if (tb->lbytes > pos_in_item) {
410 /* new directory entry falls into L[0] */
411 struct item_head *pasted;
412 int l_pos_in_item = pos_in_item;
414 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
415 ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes-1);
416 if (ret_val && !item_pos) {
417 pasted = item_head(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1);
418 l_pos_in_item += ih_entry_count(pasted) - (tb->lbytes -1);
421 /* Append given directory entry to directory item */
422 buffer_info_init_left(tb, &bi);
423 leaf_paste_in_buffer(&bi, n + item_pos - ret_val, l_pos_in_item, tb->insert_size[0], body, zeros_num);
425 /* previous string prepared space for pasting new entry, following string pastes this entry */
427 /* when we have merge directory item, pos_in_item has been changed too */
429 /* paste new directory entry. 1 is entry number */
430 leaf_paste_entries(&bi, n + item_pos - ret_val, l_pos_in_item,
431 1, (struct reiserfs_de_head *) body,
432 body + DEH_SIZE, tb->insert_size[0]);
433 tb->insert_size[0] = 0;
435 /* new directory item doesn't fall into L[0] */
436 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
437 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
439 /* Calculate new position to append in item body */
440 pos_in_item -= tb->lbytes;
443 RFALSE(tb->lbytes <= 0, "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", tb->lbytes);
444 RFALSE(pos_in_item != ih_item_len(item_head(tbS0, item_pos)),
445 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
446 ih_item_len(item_head(tbS0, item_pos)),pos_in_item);
448 if (tb->lbytes >= pos_in_item) {
449 /* appended item will be in L[0] in whole */
452 /* this bytes number must be appended to the last item of L[h] */
453 l_n = tb->lbytes - pos_in_item;
455 /* Calculate new insert_size[0] */
456 tb->insert_size[0] -= l_n;
458 RFALSE(tb->insert_size[0] <= 0,
459 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
461 ret_val = leaf_shift_left(tb, tb->lnum[0], ih_item_len
462 (item_head(tbS0, item_pos)));
463 /* Append to body of item in L[0] */
464 buffer_info_init_left(tb, &bi);
466 (&bi, n + item_pos - ret_val, ih_item_len
467 (item_head(tb->L[0], n + item_pos - ret_val)),
469 zeros_num > l_n ? l_n : zeros_num);
470 /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */
475 RFALSE(ih_item_len(item_head(tbS0, 0)),
476 "PAP-12106: item length must be 0");
477 RFALSE(comp_short_le_keys(leaf_key(tbS0, 0), leaf_key
478 (tb->L[0], n + item_pos - ret_val)),
479 "PAP-12107: items must be of the same file");
480 if (is_indirect_le_ih(item_head(tb->L[0], n + item_pos - ret_val))) {
481 temp_l = l_n << (tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT);
483 /* update key of first item in S0 */
484 version = ih_version(item_head(tbS0, 0));
485 set_le_key_k_offset(version, leaf_key(tbS0, 0),
486 le_key_k_offset(version,leaf_key(tbS0, 0)) + temp_l);
487 /* update left delimiting key */
488 set_le_key_k_offset(version, internal_key(tb->CFL[0], tb->lkey[0]),
489 le_key_k_offset(version, internal_key(tb->CFL[0], tb->lkey[0])) + temp_l);
492 /* Calculate new body, position in item and insert_size[0] */
493 if (l_n > zeros_num) {
494 body += (l_n - zeros_num);
500 RFALSE(comp_short_le_keys(leaf_key(tbS0, 0), leaf_key(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1))
501 || !op_is_left_mergeable(leaf_key(tbS0, 0), tbS0->b_size)
502 || !op_is_left_mergeable(internal_key(tb->CFL[0], tb->lkey[0]), tbS0->b_size),
503 "PAP-12120: item must be merge-able with left neighboring item");
504 } else { /* only part of the appended item will be in L[0] */
506 /* Calculate position in item for append in S[0] */
507 pos_in_item -= tb->lbytes;
509 RFALSE(pos_in_item <= 0, "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item);
511 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
512 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
515 } else { /* appended item will be in L[0] in whole */
517 struct item_head *pasted;
519 if (!item_pos && op_is_left_mergeable(leaf_key(tbS0, 0), tbS0->b_size)) { /* if we paste into first item of S[0] and it is left mergable */
520 /* then increment pos_in_item by the size of the last item in L[0] */
521 pasted = item_head(tb->L[0], n - 1);
522 if (is_direntry_le_ih(pasted))
523 pos_in_item += ih_entry_count(pasted);
525 pos_in_item += ih_item_len(pasted);
528 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
529 ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
530 /* Append to body of item in L[0] */
531 buffer_info_init_left(tb, &bi);
532 leaf_paste_in_buffer(&bi, n + item_pos - ret_val,
537 /* if appended item is directory, paste entry */
538 pasted = item_head(tb->L[0], n + item_pos - ret_val);
539 if (is_direntry_le_ih(pasted))
540 leaf_paste_entries(&bi, n + item_pos - ret_val,
542 (struct reiserfs_de_head *) body,
545 /* if appended item is indirect item, put unformatted node into un list */
546 if (is_indirect_le_ih(pasted))
547 set_ih_free_space(pasted, 0);
548 tb->insert_size[0] = 0;
552 default: /* cases d and t */
553 reiserfs_panic(tb->tb_sb, "PAP-12130",
554 "lnum > 0: unexpected mode: "
556 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
559 /* new item doesn't fall into L[0] */
560 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
564 /* tb->lnum[0] > 0 */
565 /* Calculate new item position */
566 item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0));
568 if (tb->rnum[0] > 0) {
569 /* shift rnum[0] items from S[0] to the right neighbor R[0] */
570 n = B_NR_ITEMS(tbS0);
573 case M_INSERT: /* insert item */
574 if (n - tb->rnum[0] < item_pos) { /* new item or its part falls to R[0] */
575 if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { /* part of new item falls into R[0] */
576 loff_t old_key_comp, old_len, r_zeros_number;
581 leaf_shift_right(tb, tb->rnum[0] - 1, -1);
583 version = ih_version(ih);
584 /* Remember key component and item length */
585 old_key_comp = le_ih_k_offset(ih);
586 old_len = ih_item_len(ih);
588 /* Calculate key component and item length to insert into R[0] */
589 offset = le_ih_k_offset(ih) + ((old_len - tb->rbytes) << (is_indirect_le_ih(ih) ? tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT : 0));
590 set_le_ih_k_offset(ih, offset);
591 put_ih_item_len(ih, tb->rbytes);
592 /* Insert part of the item into R[0] */
593 buffer_info_init_right(tb, &bi);
594 if ((old_len - tb->rbytes) > zeros_num) {
596 r_body = body + (old_len - tb->rbytes) - zeros_num;
599 r_zeros_number = zeros_num - (old_len - tb->rbytes);
600 zeros_num -= r_zeros_number;
603 leaf_insert_into_buf(&bi, 0, ih, r_body,
606 /* Replace right delimiting key by first key in R[0] */
607 replace_key(tb, tb->CFR[0], tb->rkey[0],
610 /* Calculate key component and item length to insert into S[0] */
611 set_le_ih_k_offset(ih, old_key_comp);
612 put_ih_item_len(ih, old_len - tb->rbytes);
614 tb->insert_size[0] -= tb->rbytes;
616 } else { /* whole new item falls into R[0] */
618 /* Shift rnum[0]-1 items to R[0] */
619 ret_val = leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes);
620 /* Insert new item into R[0] */
621 buffer_info_init_right(tb, &bi);
622 leaf_insert_into_buf(&bi, item_pos - n + tb->rnum[0] - 1,
623 ih, body, zeros_num);
625 if (item_pos - n + tb->rnum[0] - 1 == 0) {
626 replace_key(tb, tb->CFR[0],
631 zeros_num = tb->insert_size[0] = 0;
633 } else { /* new item or part of it doesn't fall into R[0] */
635 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
639 case M_PASTE: /* append item */
641 if (n - tb->rnum[0] <= item_pos) { /* pasted item or part of it falls to R[0] */
642 if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) { /* we must shift the part of the appended item */
643 if (is_direntry_le_ih(item_head(tbS0, item_pos))) { /* we append to directory item */
647 "PAP-12145: invalid parameter in case of a directory");
648 entry_count = ih_entry_count(item_head
650 if (entry_count - tb->rbytes <
652 /* new directory entry falls into R[0] */
654 int paste_entry_position;
656 RFALSE(tb->rbytes - 1 >= entry_count || !tb-> insert_size[0],
657 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
658 tb->rbytes, entry_count);
659 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
660 leaf_shift_right(tb, tb->rnum[0], tb->rbytes - 1);
661 /* Paste given directory entry to directory item */
662 paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1;
663 buffer_info_init_right(tb, &bi);
664 leaf_paste_in_buffer(&bi, 0, paste_entry_position, tb->insert_size[0], body, zeros_num);
666 leaf_paste_entries(&bi, 0, paste_entry_position, 1,
667 (struct reiserfs_de_head *) body,
668 body + DEH_SIZE, tb->insert_size[0]);
670 if (paste_entry_position == 0) {
671 /* change delimiting keys */
672 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0],0);
675 tb->insert_size[0] = 0;
677 } else { /* new directory entry doesn't fall into R[0] */
679 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
681 } else { /* regular object */
683 int n_shift, n_rem, r_zeros_number;
686 /* Calculate number of bytes which must be shifted from appended item */
687 if ((n_shift = tb->rbytes - tb->insert_size[0]) < 0)
690 RFALSE(pos_in_item != ih_item_len
691 (item_head(tbS0, item_pos)),
692 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
693 pos_in_item, ih_item_len
694 (item_head(tbS0, item_pos)));
696 leaf_shift_right(tb, tb->rnum[0], n_shift);
697 /* Calculate number of bytes which must remain in body after appending to R[0] */
698 if ((n_rem = tb->insert_size[0] - tb->rbytes) < 0)
703 unsigned long temp_rem = n_rem;
705 version = ih_version(item_head(tb->R[0], 0));
706 if (is_indirect_le_key(version, leaf_key(tb->R[0], 0))) {
707 temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT);
709 set_le_key_k_offset(version, leaf_key(tb->R[0], 0),
710 le_key_k_offset(version, leaf_key(tb->R[0], 0)) + temp_rem);
711 set_le_key_k_offset(version, internal_key(tb->CFR[0], tb->rkey[0]),
712 le_key_k_offset(version, internal_key(tb->CFR[0], tb->rkey[0])) + temp_rem);
714 /* k_offset (leaf_key(tb->R[0],0)) += n_rem;
715 k_offset (internal_key(tb->CFR[0],tb->rkey[0])) += n_rem;*/
716 do_balance_mark_internal_dirty(tb, tb->CFR[0], 0);
718 /* Append part of body into R[0] */
719 buffer_info_init_right(tb, &bi);
720 if (n_rem > zeros_num) {
722 r_body = body + n_rem - zeros_num;
725 r_zeros_number = zeros_num - n_rem;
726 zeros_num -= r_zeros_number;
729 leaf_paste_in_buffer(&bi, 0, n_shift,
730 tb->insert_size[0] - n_rem,
731 r_body, r_zeros_number);
733 if (is_indirect_le_ih(item_head(tb->R[0], 0))) {
736 "PAP-12160: paste more than one unformatted node pointer");
738 set_ih_free_space(item_head(tb->R[0], 0), 0);
740 tb->insert_size[0] = n_rem;
744 } else { /* pasted item in whole falls into R[0] */
746 struct item_head *pasted;
748 ret_val = leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
749 /* append item in R[0] */
750 if (pos_in_item >= 0) {
751 buffer_info_init_right(tb, &bi);
752 leaf_paste_in_buffer(&bi, item_pos - n + tb->rnum[0], pos_in_item,
753 tb->insert_size[0], body, zeros_num);
756 /* paste new entry, if item is directory item */
757 pasted = item_head(tb->R[0], item_pos - n + tb->rnum[0]);
758 if (is_direntry_le_ih(pasted) && pos_in_item >= 0) {
759 leaf_paste_entries(&bi, item_pos - n + tb->rnum[0],
761 (struct reiserfs_de_head *) body,
762 body + DEH_SIZE, tb->insert_size[0]);
765 RFALSE(item_pos - n + tb->rnum[0],
766 "PAP-12165: directory item must be first item of node when pasting is in 0th position");
768 /* update delimiting keys */
769 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
773 if (is_indirect_le_ih(pasted))
774 set_ih_free_space(pasted, 0);
775 zeros_num = tb->insert_size[0] = 0;
777 } else { /* new item doesn't fall into R[0] */
779 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
782 default: /* cases d and t */
783 reiserfs_panic(tb->tb_sb, "PAP-12175",
784 "rnum > 0: unexpected mode: %s(%d)",
785 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
790 /* tb->rnum[0] > 0 */
791 RFALSE(tb->blknum[0] > 3,
792 "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]);
793 RFALSE(tb->blknum[0] < 0,
794 "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]);
796 /* if while adding to a node we discover that it is possible to split
797 it in two, and merge the left part into the left neighbor and the
798 right part into the right neighbor, eliminating the node */
799 if (tb->blknum[0] == 0) { /* node S[0] is empty now */
801 RFALSE(!tb->lnum[0] || !tb->rnum[0],
802 "PAP-12190: lnum and rnum must not be zero");
803 /* if insertion was done before 0-th position in R[0], right
804 delimiting key of the tb->L[0]'s and left delimiting key are
808 reiserfs_panic(tb->tb_sb, "vs-12195",
809 "CFR not initialized");
810 copy_key(internal_key(tb->CFL[0], tb->lkey[0]),
811 internal_key(tb->CFR[0], tb->rkey[0]));
812 do_balance_mark_internal_dirty(tb, tb->CFL[0], 0);
815 reiserfs_invalidate_buffer(tb, tbS0);
819 /* Fill new nodes that appear in place of S[0] */
821 /* I am told that this copying is because we need an array to enable
822 the looping code. -Hans */
823 snum[0] = tb->s1num, snum[1] = tb->s2num;
824 sbytes[0] = tb->s1bytes;
825 sbytes[1] = tb->s2bytes;
826 for (i = tb->blknum[0] - 2; i >= 0; i--) {
828 RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i,
831 /* here we shift from S to S_new nodes */
833 S_new[i] = get_FEB(tb);
835 /* initialized block type and tree level */
836 set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL);
838 n = B_NR_ITEMS(tbS0);
841 case M_INSERT: /* insert item */
843 if (n - snum[i] < item_pos) { /* new item or it's part falls to first new node S_new[i] */
844 if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) { /* part of new item falls into S_new[i] */
845 int old_key_comp, old_len, r_zeros_number;
849 /* Move snum[i]-1 items from S[0] to S_new[i] */
850 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
853 /* Remember key component and item length */
854 version = ih_version(ih);
855 old_key_comp = le_ih_k_offset(ih);
856 old_len = ih_item_len(ih);
858 /* Calculate key component and item length to insert into S_new[i] */
859 set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
860 ((old_len - sbytes[i]) << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0)));
862 put_ih_item_len(ih, sbytes[i]);
864 /* Insert part of the item into S_new[i] before 0-th item */
865 buffer_info_init_bh(tb, &bi, S_new[i]);
867 if ((old_len - sbytes[i]) > zeros_num) {
869 r_body = body + (old_len - sbytes[i]) - zeros_num;
872 r_zeros_number = zeros_num - (old_len - sbytes[i]);
873 zeros_num -= r_zeros_number;
876 leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeros_number);
878 /* Calculate key component and item length to insert into S[i] */
879 set_le_ih_k_offset(ih, old_key_comp);
880 put_ih_item_len(ih, old_len - sbytes[i]);
881 tb->insert_size[0] -= sbytes[i];
882 } else { /* whole new item falls into S_new[i] */
884 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
885 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
886 snum[i] - 1, sbytes[i], S_new[i]);
888 /* Insert new item into S_new[i] */
889 buffer_info_init_bh(tb, &bi, S_new[i]);
890 leaf_insert_into_buf(&bi, item_pos - n + snum[i] - 1,
891 ih, body, zeros_num);
893 zeros_num = tb->insert_size[0] = 0;
897 else { /* new item or it part don't falls into S_new[i] */
899 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
900 snum[i], sbytes[i], S_new[i]);
904 case M_PASTE: /* append item */
906 if (n - snum[i] <= item_pos) { /* pasted item or part if it falls to S_new[i] */
907 if (item_pos == n - snum[i] && sbytes[i] != -1) { /* we must shift part of the appended item */
908 struct item_head *aux_ih;
910 RFALSE(ih, "PAP-12210: ih must be 0");
912 aux_ih = item_head(tbS0, item_pos);
913 if (is_direntry_le_ih(aux_ih)) {
914 /* we append to directory item */
918 entry_count = ih_entry_count(aux_ih);
920 if (entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count) {
921 /* new directory entry falls into S_new[i] */
923 RFALSE(!tb->insert_size[0], "PAP-12215: insert_size is already 0");
924 RFALSE(sbytes[i] - 1 >= entry_count,
925 "PAP-12220: there are no so much entries (%d), only %d",
926 sbytes[i] - 1, entry_count);
928 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
929 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i] - 1, S_new[i]);
930 /* Paste given directory entry to directory item */
931 buffer_info_init_bh(tb, &bi, S_new[i]);
932 leaf_paste_in_buffer(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1,
933 tb->insert_size[0], body, zeros_num);
934 /* paste new directory entry */
935 leaf_paste_entries(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1, 1,
936 (struct reiserfs_de_head *) body,
937 body + DEH_SIZE, tb->insert_size[0]);
938 tb->insert_size[0] = 0;
940 } else { /* new directory entry doesn't fall into S_new[i] */
941 leaf_move_items(LEAF_FROM_S_TO_SNEW,tb, snum[i], sbytes[i], S_new[i]);
943 } else { /* regular object */
945 int n_shift, n_rem, r_zeros_number;
948 RFALSE(pos_in_item != ih_item_len(item_head(tbS0, item_pos)) || tb->insert_size[0] <= 0,
949 "PAP-12225: item too short or insert_size <= 0");
951 /* Calculate number of bytes which must be shifted from appended item */
952 n_shift = sbytes[i] - tb->insert_size[0];
955 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]);
957 /* Calculate number of bytes which must remain in body after append to S_new[i] */
958 n_rem = tb->insert_size[0] - sbytes[i];
961 /* Append part of body into S_new[0] */
962 buffer_info_init_bh(tb, &bi, S_new[i]);
963 if (n_rem > zeros_num) {
965 r_body = body + n_rem - zeros_num;
968 r_zeros_number = zeros_num - n_rem;
969 zeros_num -= r_zeros_number;
972 leaf_paste_in_buffer(&bi, 0, n_shift,
973 tb->insert_size[0] - n_rem,
974 r_body, r_zeros_number);
976 struct item_head *tmp;
978 tmp = item_head(S_new[i], 0);
979 if (is_indirect_le_ih
981 set_ih_free_space(tmp, 0);
982 set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT)));
984 set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + n_rem);
988 tb->insert_size[0] = n_rem;
993 /* item falls wholly into S_new[i] */
996 struct item_head *pasted;
998 #ifdef CONFIG_REISERFS_CHECK
999 struct item_head *ih_check = item_head(tbS0, item_pos);
1001 if (!is_direntry_le_ih(ih_check)
1002 && (pos_in_item != ih_item_len(ih_check)
1003 || tb->insert_size[0] <= 0))
1004 reiserfs_panic(tb->tb_sb,
1009 #endif /* CONFIG_REISERFS_CHECK */
1011 leaf_mi = leaf_move_items(LEAF_FROM_S_TO_SNEW,
1017 "PAP-12240: unexpected value returned by leaf_move_items (%d)",
1020 /* paste into item */
1021 buffer_info_init_bh(tb, &bi, S_new[i]);
1022 leaf_paste_in_buffer(&bi,
1023 item_pos - n + snum[i],
1028 pasted = item_head(S_new[i], item_pos - n + snum[i]);
1029 if (is_direntry_le_ih(pasted)) {
1030 leaf_paste_entries(&bi,
1031 item_pos - n + snum[i],
1033 (struct reiserfs_de_head *)body,
1039 /* if we paste to indirect item update ih_free_space */
1040 if (is_indirect_le_ih(pasted))
1041 set_ih_free_space(pasted, 0);
1042 zeros_num = tb->insert_size[0] = 0;
1046 else { /* pasted item doesn't fall into S_new[i] */
1048 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1049 snum[i], sbytes[i], S_new[i]);
1052 default: /* cases d and t */
1053 reiserfs_panic(tb->tb_sb, "PAP-12245",
1054 "blknum > 2: unexpected mode: %s(%d)",
1055 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
1058 memcpy(insert_key + i, leaf_key(S_new[i], 0), KEY_SIZE);
1059 insert_ptr[i] = S_new[i];
1061 RFALSE(!buffer_journaled(S_new[i])
1062 || buffer_journal_dirty(S_new[i])
1063 || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)",
1067 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1068 affected item which remains in S */
1069 if (0 <= item_pos && item_pos < tb->s0num) { /* if we must insert or append into buffer S[0] */
1072 case M_INSERT: /* insert item into S[0] */
1073 buffer_info_init_tbS0(tb, &bi);
1074 leaf_insert_into_buf(&bi, item_pos, ih, body,
1077 /* If we insert the first key change the delimiting key */
1078 if (item_pos == 0) {
1079 if (tb->CFL[0]) /* can be 0 in reiserfsck */
1080 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
1084 case M_PASTE:{ /* append item in S[0] */
1085 struct item_head *pasted;
1087 pasted = item_head(tbS0, item_pos);
1088 /* when directory, may be new entry already pasted */
1089 if (is_direntry_le_ih(pasted)) {
1090 if (pos_in_item >= 0 && pos_in_item <= ih_entry_count(pasted)) {
1092 RFALSE(!tb->insert_size[0],
1093 "PAP-12260: insert_size is 0 already");
1096 buffer_info_init_tbS0(tb, &bi);
1097 leaf_paste_in_buffer(&bi, item_pos, pos_in_item,
1098 tb->insert_size[0], body,
1102 leaf_paste_entries(&bi, item_pos, pos_in_item, 1,
1103 (struct reiserfs_de_head *)body,
1105 tb->insert_size[0]);
1106 if (!item_pos && !pos_in_item) {
1107 RFALSE(!tb->CFL[0] || !tb->L[0],
1108 "PAP-12270: CFL[0]/L[0] must be specified");
1110 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
1112 tb->insert_size[0] = 0;
1114 } else { /* regular object */
1115 if (pos_in_item == ih_item_len(pasted)) {
1117 RFALSE(tb->insert_size[0] <= 0,
1118 "PAP-12275: insert size must not be %d",
1119 tb->insert_size[0]);
1120 buffer_info_init_tbS0(tb, &bi);
1121 leaf_paste_in_buffer(&bi, item_pos, pos_in_item,
1122 tb->insert_size[0], body, zeros_num);
1124 if (is_indirect_le_ih(pasted)) {
1129 "PAP-12280: insert_size for indirect item must be %d, not %d",
1134 set_ih_free_space(pasted, 0);
1136 tb->insert_size[0] = 0;
1138 #ifdef CONFIG_REISERFS_CHECK
1140 if (tb->insert_size[0]) {
1141 print_cur_tb("12285");
1142 reiserfs_panic(tb->tb_sb,
1147 tb->insert_size[0]);
1150 #endif /* CONFIG_REISERFS_CHECK */
1153 } /* case M_PASTE: */
1156 #ifdef CONFIG_REISERFS_CHECK
1157 if (flag == M_PASTE && tb->insert_size[0]) {
1158 print_cur_tb("12290");
1159 reiserfs_panic(tb->tb_sb,
1160 "PAP-12290", "insert_size is still not 0 (%d)",
1161 tb->insert_size[0]);
1163 #endif /* CONFIG_REISERFS_CHECK */
1165 } /* Leaf level of the tree is balanced (end of balance_leaf) */
1167 /* Make empty node */
1168 void make_empty_node(struct buffer_info *bi)
1170 struct block_head *blkh;
1172 RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
1174 blkh = B_BLK_HEAD(bi->bi_bh);
1175 set_blkh_nr_item(blkh, 0);
1176 set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh));
1179 B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */
1182 /* Get first empty buffer */
1183 struct buffer_head *get_FEB(struct tree_balance *tb)
1186 struct buffer_info bi;
1188 for (i = 0; i < MAX_FEB_SIZE; i++)
1189 if (tb->FEB[i] != NULL)
1192 if (i == MAX_FEB_SIZE)
1193 reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty");
1195 buffer_info_init_bh(tb, &bi, tb->FEB[i]);
1196 make_empty_node(&bi);
1197 set_buffer_uptodate(tb->FEB[i]);
1198 tb->used[i] = tb->FEB[i];
1204 /* This is now used because reiserfs_free_block has to be able to schedule. */
1205 static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
1209 if (buffer_dirty(bh))
1210 reiserfs_warning(tb->tb_sb, "reiserfs-12320",
1211 "called with dirty buffer");
1212 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++)
1213 if (!tb->thrown[i]) {
1215 get_bh(bh); /* free_thrown puts this */
1218 reiserfs_warning(tb->tb_sb, "reiserfs-12321",
1219 "too many thrown buffers");
1222 static void free_thrown(struct tree_balance *tb)
1225 b_blocknr_t blocknr;
1226 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) {
1227 if (tb->thrown[i]) {
1228 blocknr = tb->thrown[i]->b_blocknr;
1229 if (buffer_dirty(tb->thrown[i]))
1230 reiserfs_warning(tb->tb_sb, "reiserfs-12322",
1231 "called with dirty buffer %d",
1233 brelse(tb->thrown[i]); /* incremented in store_thrown */
1234 reiserfs_free_block(tb->transaction_handle, NULL,
1240 void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh)
1242 struct block_head *blkh;
1243 blkh = B_BLK_HEAD(bh);
1244 set_blkh_level(blkh, FREE_LEVEL);
1245 set_blkh_nr_item(blkh, 0);
1247 clear_buffer_dirty(bh);
1248 store_thrown(tb, bh);
1251 /* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
1252 void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest,
1253 struct buffer_head *src, int n_src)
1256 RFALSE(dest == NULL || src == NULL,
1257 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1259 RFALSE(!B_IS_KEYS_LEVEL(dest),
1260 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1262 RFALSE(n_dest < 0 || n_src < 0,
1263 "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
1264 RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
1265 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1266 n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
1268 if (B_IS_ITEMS_LEVEL(src))
1269 /* source buffer contains leaf node */
1270 memcpy(internal_key(dest, n_dest), item_head(src, n_src),
1273 memcpy(internal_key(dest, n_dest), internal_key(src, n_src),
1276 do_balance_mark_internal_dirty(tb, dest, 0);
1279 int get_left_neighbor_position(struct tree_balance *tb, int h)
1281 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1283 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FL[h] == NULL,
1284 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1285 h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h));
1287 if (Sh_position == 0)
1288 return B_NR_ITEMS(tb->FL[h]);
1290 return Sh_position - 1;
1293 int get_right_neighbor_position(struct tree_balance *tb, int h)
1295 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1297 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FR[h] == NULL,
1298 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1299 h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]);
1301 if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h)))
1304 return Sh_position + 1;
1307 #ifdef CONFIG_REISERFS_CHECK
1309 int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value);
1310 static void check_internal_node(struct super_block *s, struct buffer_head *bh,
1313 struct disk_child *dc;
1316 RFALSE(!bh, "PAP-12336: bh == 0");
1318 if (!bh || !B_IS_IN_TREE(bh))
1321 RFALSE(!buffer_dirty(bh) &&
1322 !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1323 "PAP-12337: buffer (%b) must be dirty", bh);
1324 dc = B_N_CHILD(bh, 0);
1326 for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) {
1327 if (!is_reusable(s, dc_block_number(dc), 1)) {
1329 reiserfs_panic(s, "PAP-12338",
1330 "invalid child pointer %y in %b",
1336 static int locked_or_not_in_tree(struct tree_balance *tb,
1337 struct buffer_head *bh, char *which)
1339 if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) ||
1340 !B_IS_IN_TREE(bh)) {
1341 reiserfs_warning(tb->tb_sb, "vs-12339", "%s (%b)", which, bh);
1347 static int check_before_balancing(struct tree_balance *tb)
1351 if (REISERFS_SB(tb->tb_sb)->cur_tb) {
1352 reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule "
1353 "occurred based on cur_tb not being null at "
1354 "this point in code. do_balance cannot properly "
1355 "handle concurrent tree accesses on a same "
1360 * double check that buffers that we will modify are unlocked.
1361 * (fix_nodes should already have prepped all of these for us).
1364 retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]");
1365 retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]");
1366 retval |= locked_or_not_in_tree(tb, tb->CFL[0], "CFL[0]");
1367 check_leaf(tb->L[0]);
1370 retval |= locked_or_not_in_tree(tb, tb->R[0], "R[0]");
1371 retval |= locked_or_not_in_tree(tb, tb->FR[0], "FR[0]");
1372 retval |= locked_or_not_in_tree(tb, tb->CFR[0], "CFR[0]");
1373 check_leaf(tb->R[0]);
1375 retval |= locked_or_not_in_tree(tb, PATH_PLAST_BUFFER(tb->tb_path),
1377 check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1382 static void check_after_balance_leaf(struct tree_balance *tb)
1385 if (B_FREE_SPACE(tb->L[0]) !=
1386 MAX_CHILD_SIZE(tb->L[0]) -
1388 (tb->FL[0], get_left_neighbor_position(tb, 0)))) {
1389 print_cur_tb("12221");
1390 reiserfs_panic(tb->tb_sb, "PAP-12355",
1391 "shift to left was incorrect");
1395 if (B_FREE_SPACE(tb->R[0]) !=
1396 MAX_CHILD_SIZE(tb->R[0]) -
1398 (tb->FR[0], get_right_neighbor_position(tb, 0)))) {
1399 print_cur_tb("12222");
1400 reiserfs_panic(tb->tb_sb, "PAP-12360",
1401 "shift to right was incorrect");
1404 if (PATH_H_PBUFFER(tb->tb_path, 1) &&
1405 (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) !=
1406 (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1407 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1408 PATH_H_POSITION(tb->tb_path, 1)))))) {
1409 int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0));
1410 int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1411 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1412 PATH_H_POSITION(tb->tb_path,
1414 print_cur_tb("12223");
1415 reiserfs_warning(tb->tb_sb, "reiserfs-12363",
1416 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1417 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1419 MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)),
1420 PATH_H_PBUFFER(tb->tb_path, 1),
1421 PATH_H_POSITION(tb->tb_path, 1),
1423 (PATH_H_PBUFFER(tb->tb_path, 1),
1424 PATH_H_POSITION(tb->tb_path, 1))),
1426 reiserfs_panic(tb->tb_sb, "PAP-12365", "S is incorrect");
1430 static void check_leaf_level(struct tree_balance *tb)
1432 check_leaf(tb->L[0]);
1433 check_leaf(tb->R[0]);
1434 check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1437 static void check_internal_levels(struct tree_balance *tb)
1441 /* check all internal nodes */
1442 for (h = 1; tb->insert_size[h]; h++) {
1443 check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h),
1444 "BAD BUFFER ON PATH");
1446 check_internal_node(tb->tb_sb, tb->L[h], "BAD L");
1448 check_internal_node(tb->tb_sb, tb->R[h], "BAD R");
1456 * Now we have all of the buffers that must be used in balancing of
1457 * the tree. We rely on the assumption that schedule() will not occur
1458 * while do_balance works. ( Only interrupt handlers are acceptable.)
1459 * We balance the tree according to the analysis made before this,
1460 * using buffers already obtained. For SMP support it will someday be
1461 * necessary to add ordered locking of tb.
1465 * Some interesting rules of balancing:
1466 * we delete a maximum of two nodes per level per balancing: we never
1467 * delete R, when we delete two of three nodes L, S, R then we move
1470 * we only delete L if we are deleting two nodes, if we delete only
1471 * one node we delete S
1473 * if we shift leaves then we shift as much as we can: this is a
1474 * deliberate policy of extremism in node packing which results in
1475 * higher average utilization after repeated random balance operations
1476 * at the cost of more memory copies and more balancing as a result of
1477 * small insertions to full nodes.
1479 * if we shift internal nodes we try to evenly balance the node
1480 * utilization, with consequent less balancing at the cost of lower
1483 * one could argue that the policy for directories in leaves should be
1484 * that of internal nodes, but we will wait until another day to
1485 * evaluate this.... It would be nice to someday measure and prove
1486 * these assumptions as to what is optimal....
1489 static inline void do_balance_starts(struct tree_balance *tb)
1491 /* use print_cur_tb() to see initial state of struct tree_balance */
1493 /* store_print_tb (tb); */
1495 /* do not delete, just comment it out */
1497 print_tb(flag, PATH_LAST_POSITION(tb->tb_path),
1498 tb->tb_path->pos_in_item, tb, "check");
1500 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
1501 #ifdef CONFIG_REISERFS_CHECK
1502 REISERFS_SB(tb->tb_sb)->cur_tb = tb;
1506 static inline void do_balance_completed(struct tree_balance *tb)
1509 #ifdef CONFIG_REISERFS_CHECK
1510 check_leaf_level(tb);
1511 check_internal_levels(tb);
1512 REISERFS_SB(tb->tb_sb)->cur_tb = NULL;
1516 * reiserfs_free_block is no longer schedule safe. So, we need to
1517 * put the buffers we want freed on the thrown list during do_balance,
1518 * and then free them now
1521 REISERFS_SB(tb->tb_sb)->s_do_balance++;
1523 /* release all nodes hold to perform the balancing */
1530 * do_balance - balance the tree
1532 * @tb: tree_balance structure
1533 * @ih: item header of inserted item
1534 * @body: body of inserted item or bytes to paste
1535 * @flag: 'i' - insert, 'd' - delete, 'c' - cut, 'p' paste
1537 * Cut means delete part of an item (includes removing an entry from a
1540 * Delete means delete whole item.
1542 * Insert means add a new item into the tree.
1544 * Paste means to append to the end of an existing file or to
1545 * insert a directory entry.
1547 void do_balance(struct tree_balance *tb, struct item_head *ih,
1548 const char *body, int flag)
1550 int child_pos; /* position of a child node in its parent */
1551 int h; /* level of the tree being processed */
1554 * in our processing of one level we sometimes determine what
1555 * must be inserted into the next higher level. This insertion
1556 * consists of a key or two keys and their corresponding
1559 struct item_head insert_key[2];
1561 /* inserted node-ptrs for the next level */
1562 struct buffer_head *insert_ptr[2];
1565 tb->need_balance_dirty = 0;
1567 if (FILESYSTEM_CHANGED_TB(tb)) {
1568 reiserfs_panic(tb->tb_sb, "clm-6000", "fs generation has "
1571 /* if we have no real work to do */
1572 if (!tb->insert_size[0]) {
1573 reiserfs_warning(tb->tb_sb, "PAP-12350",
1574 "insert_size == 0, mode == %c", flag);
1579 atomic_inc(&(fs_generation(tb->tb_sb)));
1580 do_balance_starts(tb);
1583 * balance_leaf returns 0 except if combining L R and S into
1584 * one node. see balance_internal() for explanation of this
1587 child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) +
1588 balance_leaf(tb, ih, body, flag, insert_key, insert_ptr);
1590 #ifdef CONFIG_REISERFS_CHECK
1591 check_after_balance_leaf(tb);
1594 /* Balance internal level of the tree. */
1595 for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++)
1597 balance_internal(tb, h, child_pos, insert_key, insert_ptr);
1599 do_balance_completed(tb);