399b2009b677d37a9aae18fbf40e6aabd4dc0d2b
[cascardo/linux.git] / fs / reiserfs / do_balan.c
1 /*
2  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3  */
4
5 /*
6  * Now we have all buffers that must be used in balancing of the tree
7  * Further calculations can not cause schedule(), and thus the buffer
8  * tree will be stable until the balancing will be finished
9  * balance the tree according to the analysis made before,
10  * and using buffers obtained after all above.
11  */
12
13 #include <asm/uaccess.h>
14 #include <linux/time.h>
15 #include "reiserfs.h"
16 #include <linux/buffer_head.h>
17 #include <linux/kernel.h>
18
19 static inline void buffer_info_init_left(struct tree_balance *tb,
20                                          struct buffer_info *bi)
21 {
22         bi->tb          = tb;
23         bi->bi_bh       = tb->L[0];
24         bi->bi_parent   = tb->FL[0];
25         bi->bi_position = get_left_neighbor_position(tb, 0);
26 }
27
28 static inline void buffer_info_init_right(struct tree_balance *tb,
29                                           struct buffer_info *bi)
30 {
31         bi->tb          = tb;
32         bi->bi_bh       = tb->R[0];
33         bi->bi_parent   = tb->FR[0];
34         bi->bi_position = get_right_neighbor_position(tb, 0);
35 }
36
37 static inline void buffer_info_init_tbS0(struct tree_balance *tb,
38                                          struct buffer_info *bi)
39 {
40         bi->tb          = tb;
41         bi->bi_bh        = PATH_PLAST_BUFFER(tb->tb_path);
42         bi->bi_parent   = PATH_H_PPARENT(tb->tb_path, 0);
43         bi->bi_position = PATH_H_POSITION(tb->tb_path, 1);
44 }
45
46 static inline void buffer_info_init_bh(struct tree_balance *tb,
47                                        struct buffer_info *bi,
48                                        struct buffer_head *bh)
49 {
50         bi->tb          = tb;
51         bi->bi_bh       = bh;
52         bi->bi_parent   = NULL;
53         bi->bi_position = 0;
54 }
55
56 inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
57                                        struct buffer_head *bh, int flag)
58 {
59         journal_mark_dirty(tb->transaction_handle,
60                            tb->transaction_handle->t_super, bh);
61 }
62
63 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
64 #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
65
66 /*
67  * summary:
68  *  if deleting something ( tb->insert_size[0] < 0 )
69  *    return(balance_leaf_when_delete()); (flag d handled here)
70  *  else
71  *    if lnum is larger than 0 we put items into the left node
72  *    if rnum is larger than 0 we put items into the right node
73  *    if snum1 is larger than 0 we put items into the new node s1
74  *    if snum2 is larger than 0 we put items into the new node s2
75  * Note that all *num* count new items being created.
76  *
77  * It would be easier to read balance_leaf() if each of these summary
78  * lines was a separate procedure rather than being inlined.  I think
79  * that there are many passages here and in balance_leaf_when_delete() in
80  * which two calls to one procedure can replace two passages, and it
81  * might save cache space and improve software maintenance costs to do so.
82  *
83  * Vladimir made the perceptive comment that we should offload most of
84  * the decision making in this function into fix_nodes/check_balance, and
85  * then create some sort of structure in tb that says what actions should
86  * be performed by do_balance.
87  *
88  * -Hans
89  */
90
91 /*
92  * Balance leaf node in case of delete or cut: insert_size[0] < 0
93  *
94  * lnum, rnum can have values >= -1
95  *      -1 means that the neighbor must be joined with S
96  *       0 means that nothing should be done with the neighbor
97  *      >0 means to shift entirely or partly the specified number of items
98  *         to the neighbor
99  */
100 static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
101 {
102         struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
103         int item_pos = PATH_LAST_POSITION(tb->tb_path);
104         int pos_in_item = tb->tb_path->pos_in_item;
105         struct buffer_info bi;
106         int n;
107         struct item_head *ih;
108
109         RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
110                "vs- 12000: level: wrong FR %z", tb->FR[0]);
111         RFALSE(tb->blknum[0] > 1,
112                "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
113         RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0),
114                "PAP-12010: tree can not be empty");
115
116         ih = item_head(tbS0, item_pos);
117         buffer_info_init_tbS0(tb, &bi);
118
119         /* Delete or truncate the item */
120
121         switch (flag) {
122         case M_DELETE:          /* delete item in S[0] */
123
124                 RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
125                        "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
126                        -tb->insert_size[0], ih);
127
128                 leaf_delete_items(&bi, 0, item_pos, 1, -1);
129
130                 if (!item_pos && tb->CFL[0]) {
131                         if (B_NR_ITEMS(tbS0)) {
132                                 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0,
133                                             0);
134                         } else {
135                                 if (!PATH_H_POSITION(tb->tb_path, 1))
136                                         replace_key(tb, tb->CFL[0], tb->lkey[0],
137                                                     PATH_H_PPARENT(tb->tb_path,
138                                                                    0), 0);
139                         }
140                 }
141
142                 RFALSE(!item_pos && !tb->CFL[0],
143                        "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0],
144                        tb->L[0]);
145
146                 break;
147
148         case M_CUT:{            /* cut item in S[0] */
149                         if (is_direntry_le_ih(ih)) {
150
151                                 /*
152                                  * UFS unlink semantics are such that you
153                                  * can only delete one directory entry at
154                                  * a time.
155                                  */
156
157                                 /*
158                                  * when we cut a directory tb->insert_size[0]
159                                  * means number of entries to be cut (always 1)
160                                  */
161                                 tb->insert_size[0] = -1;
162                                 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
163                                                      -tb->insert_size[0]);
164
165                                 RFALSE(!item_pos && !pos_in_item && !tb->CFL[0],
166                                        "PAP-12030: can not change delimiting key. CFL[0]=%p",
167                                        tb->CFL[0]);
168
169                                 if (!item_pos && !pos_in_item && tb->CFL[0]) {
170                                         replace_key(tb, tb->CFL[0], tb->lkey[0],
171                                                     tbS0, 0);
172                                 }
173                         } else {
174                                 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
175                                                      -tb->insert_size[0]);
176
177                                 RFALSE(!ih_item_len(ih),
178                                        "PAP-12035: cut must leave non-zero dynamic length of item");
179                         }
180                         break;
181                 }
182
183         default:
184                 print_cur_tb("12040");
185                 reiserfs_panic(tb->tb_sb, "PAP-12040",
186                                "unexpected mode: %s(%d)",
187                                (flag ==
188                                 M_PASTE) ? "PASTE" : ((flag ==
189                                                        M_INSERT) ? "INSERT" :
190                                                       "UNKNOWN"), flag);
191         }
192
193         /*
194          * the rule is that no shifting occurs unless by shifting
195          * a node can be freed
196          */
197         n = B_NR_ITEMS(tbS0);
198         /* L[0] takes part in balancing */
199         if (tb->lnum[0]) {
200                 /* L[0] must be joined with S[0] */
201                 if (tb->lnum[0] == -1) {
202                         /* R[0] must be also joined with S[0] */
203                         if (tb->rnum[0] == -1) {
204                                 if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) {
205                                         /*
206                                          * all contents of all the 3 buffers
207                                          * will be in L[0]
208                                          */
209                                         if (PATH_H_POSITION(tb->tb_path, 1) == 0
210                                             && 1 < B_NR_ITEMS(tb->FR[0]))
211                                                 replace_key(tb, tb->CFL[0],
212                                                             tb->lkey[0],
213                                                             tb->FR[0], 1);
214
215                                         leaf_move_items(LEAF_FROM_S_TO_L, tb, n,
216                                                         -1, NULL);
217                                         leaf_move_items(LEAF_FROM_R_TO_L, tb,
218                                                         B_NR_ITEMS(tb->R[0]),
219                                                         -1, NULL);
220
221                                         reiserfs_invalidate_buffer(tb, tbS0);
222                                         reiserfs_invalidate_buffer(tb,
223                                                                    tb->R[0]);
224
225                                         return 0;
226                                 }
227                                 /*
228                                  * all contents of all the 3 buffers will
229                                  * be in R[0]
230                                  */
231                                 leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1,
232                                                 NULL);
233                                 leaf_move_items(LEAF_FROM_L_TO_R, tb,
234                                                 B_NR_ITEMS(tb->L[0]), -1, NULL);
235
236                                 /* right_delimiting_key is correct in R[0] */
237                                 replace_key(tb, tb->CFR[0], tb->rkey[0],
238                                             tb->R[0], 0);
239
240                                 reiserfs_invalidate_buffer(tb, tbS0);
241                                 reiserfs_invalidate_buffer(tb, tb->L[0]);
242
243                                 return -1;
244                         }
245
246                         RFALSE(tb->rnum[0] != 0,
247                                "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
248                         /* all contents of L[0] and S[0] will be in L[0] */
249                         leaf_shift_left(tb, n, -1);
250
251                         reiserfs_invalidate_buffer(tb, tbS0);
252
253                         return 0;
254                 }
255
256                 /*
257                  * a part of contents of S[0] will be in L[0] and the
258                  * rest part of S[0] will be in R[0]
259                  */
260
261                 RFALSE((tb->lnum[0] + tb->rnum[0] < n) ||
262                        (tb->lnum[0] + tb->rnum[0] > n + 1),
263                        "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
264                        tb->rnum[0], tb->lnum[0], n);
265                 RFALSE((tb->lnum[0] + tb->rnum[0] == n) &&
266                        (tb->lbytes != -1 || tb->rbytes != -1),
267                        "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
268                        tb->rbytes, tb->lbytes);
269                 RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) &&
270                        (tb->lbytes < 1 || tb->rbytes != -1),
271                        "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
272                        tb->rbytes, tb->lbytes);
273
274                 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
275                 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
276
277                 reiserfs_invalidate_buffer(tb, tbS0);
278
279                 return 0;
280         }
281
282         if (tb->rnum[0] == -1) {
283                 /* all contents of R[0] and S[0] will be in R[0] */
284                 leaf_shift_right(tb, n, -1);
285                 reiserfs_invalidate_buffer(tb, tbS0);
286                 return 0;
287         }
288
289         RFALSE(tb->rnum[0],
290                "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
291         return 0;
292 }
293
294 static int balance_leaf(struct tree_balance *tb, struct item_head *ih,  /* item header of inserted item (this is on little endian) */
295                         const char *body,       /* body  of inserted item or bytes to paste */
296                         int flag,       /* i - insert, d - delete, c - cut, p - paste
297                                            (see comment to do_balance) */
298                         struct item_head *insert_key,   /* in our processing of one level we sometimes determine what
299                                                            must be inserted into the next higher level.  This insertion
300                                                            consists of a key or two keys and their corresponding
301                                                            pointers */
302                         struct buffer_head **insert_ptr /* inserted node-ptrs for the next level */
303     )
304 {
305         struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
306         int item_pos = PATH_LAST_POSITION(tb->tb_path); /*  index into the array of item headers in S[0]
307                                                            of the affected item */
308         struct buffer_info bi;
309         struct buffer_head *S_new[2];   /* new nodes allocated to hold what could not fit into S */
310         int snum[2];            /* number of items that will be placed
311                                    into S_new (includes partially shifted
312                                    items) */
313         int sbytes[2];          /* if an item is partially shifted into S_new then
314                                    if it is a directory item
315                                    it is the number of entries from the item that are shifted into S_new
316                                    else
317                                    it is the number of bytes from the item that are shifted into S_new
318                                  */
319         int n, i;
320         int ret_val;
321         int pos_in_item;
322         int zeros_num;
323
324         PROC_INFO_INC(tb->tb_sb, balance_at[0]);
325
326         /* Make balance in case insert_size[0] < 0 */
327         if (tb->insert_size[0] < 0)
328                 return balance_leaf_when_delete(tb, flag);
329
330         zeros_num = 0;
331         if (flag == M_INSERT && !body)
332                 zeros_num = ih_item_len(ih);
333
334         pos_in_item = tb->tb_path->pos_in_item;
335         /* for indirect item pos_in_item is measured in unformatted node
336            pointers. Recalculate to bytes */
337         if (flag != M_INSERT
338             && is_indirect_le_ih(item_head(tbS0, item_pos)))
339                 pos_in_item *= UNFM_P_SIZE;
340
341         if (tb->lnum[0] > 0) {
342                 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */
343                 if (item_pos < tb->lnum[0]) {
344                         /* new item or it part falls to L[0], shift it too */
345                         n = B_NR_ITEMS(tb->L[0]);
346
347                         switch (flag) {
348                         case M_INSERT:  /* insert item into L[0] */
349
350                                 if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
351                                         /* part of new item falls into L[0] */
352                                         int new_item_len;
353                                         int version;
354
355                                         ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, -1);
356
357                                         /* Calculate item length to insert to S[0] */
358                                         new_item_len = ih_item_len(ih) - tb->lbytes;
359                                         /* Calculate and check item length to insert to L[0] */
360                                         put_ih_item_len(ih, ih_item_len(ih) - new_item_len);
361
362                                         RFALSE(ih_item_len(ih) <= 0,
363                                                "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
364                                                ih_item_len(ih));
365
366                                         /* Insert new item into L[0] */
367                                         buffer_info_init_left(tb, &bi);
368                                         leaf_insert_into_buf(&bi,
369                                                         n + item_pos - ret_val, ih, body,
370                                                         zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num);
371
372                                         version = ih_version(ih);
373
374                                         /* Calculate key component, item length and body to insert into S[0] */
375                                         set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
376                                                         (tb-> lbytes << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0)));
377
378                                         put_ih_item_len(ih, new_item_len);
379                                         if (tb->lbytes > zeros_num) {
380                                                 body += (tb->lbytes - zeros_num);
381                                                 zeros_num = 0;
382                                         } else
383                                                 zeros_num -= tb->lbytes;
384
385                                         RFALSE(ih_item_len(ih) <= 0,
386                                                "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
387                                                ih_item_len(ih));
388                                 } else {
389                                         /* new item in whole falls into L[0] */
390                                         /* Shift lnum[0]-1 items to L[0] */
391                                         ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, tb->lbytes);
392                                         /* Insert new item into L[0] */
393                                         buffer_info_init_left(tb, &bi);
394                                         leaf_insert_into_buf(&bi, n + item_pos - ret_val, ih, body, zeros_num);
395                                         tb->insert_size[0] = 0;
396                                         zeros_num = 0;
397                                 }
398                                 break;
399
400                         case M_PASTE:   /* append item in L[0] */
401
402                                 if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
403                                         /* we must shift the part of the appended item */
404                                         if (is_direntry_le_ih(item_head(tbS0, item_pos))) {
405
406                                                 RFALSE(zeros_num,
407                                                        "PAP-12090: invalid parameter in case of a directory");
408                                                 /* directory item */
409                                                 if (tb->lbytes > pos_in_item) {
410                                                         /* new directory entry falls into L[0] */
411                                                         struct item_head *pasted;
412                                                         int l_pos_in_item = pos_in_item;
413
414                                                         /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
415                                                         ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes-1);
416                                                         if (ret_val && !item_pos) {
417                                                                 pasted = item_head(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1);
418                                                                 l_pos_in_item += ih_entry_count(pasted) - (tb->lbytes -1);
419                                                         }
420
421                                                         /* Append given directory entry to directory item */
422                                                         buffer_info_init_left(tb, &bi);
423                                                         leaf_paste_in_buffer(&bi, n + item_pos - ret_val, l_pos_in_item, tb->insert_size[0], body, zeros_num);
424
425                                                         /* previous string prepared space for pasting new entry, following string pastes this entry */
426
427                                                         /* when we have merge directory item, pos_in_item has been changed too */
428
429                                                         /* paste new directory entry. 1 is entry number */
430                                                         leaf_paste_entries(&bi, n + item_pos - ret_val, l_pos_in_item,
431                                                                            1, (struct reiserfs_de_head *) body,
432                                                                            body + DEH_SIZE, tb->insert_size[0]);
433                                                         tb->insert_size[0] = 0;
434                                                 } else {
435                                                         /* new directory item doesn't fall into L[0] */
436                                                         /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
437                                                         leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
438                                                 }
439                                                 /* Calculate new position to append in item body */
440                                                 pos_in_item -= tb->lbytes;
441                                         } else {
442                                                 /* regular object */
443                                                 RFALSE(tb->lbytes <= 0, "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", tb->lbytes);
444                                                 RFALSE(pos_in_item != ih_item_len(item_head(tbS0, item_pos)),
445                                                        "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
446                                                        ih_item_len(item_head(tbS0, item_pos)),pos_in_item);
447
448                                                 if (tb->lbytes >= pos_in_item) {
449                                                         /* appended item will be in L[0] in whole */
450                                                         int l_n;
451
452                                                         /* this bytes number must be appended to the last item of L[h] */
453                                                         l_n = tb->lbytes - pos_in_item;
454
455                                                         /* Calculate new insert_size[0] */
456                                                         tb->insert_size[0] -= l_n;
457
458                                                         RFALSE(tb->insert_size[0] <= 0,
459                                                                "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
460                                                                tb->insert_size[0]);
461                                                         ret_val = leaf_shift_left(tb, tb->lnum[0], ih_item_len
462                                                                             (item_head(tbS0, item_pos)));
463                                                         /* Append to body of item in L[0] */
464                                                         buffer_info_init_left(tb, &bi);
465                                                         leaf_paste_in_buffer
466                                                             (&bi, n + item_pos - ret_val, ih_item_len
467                                                              (item_head(tb->L[0], n + item_pos - ret_val)),
468                                                              l_n, body,
469                                                              zeros_num > l_n ? l_n : zeros_num);
470                                                         /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */
471                                                         {
472                                                                 int version;
473                                                                 int temp_l = l_n;
474
475                                                                 RFALSE(ih_item_len(item_head(tbS0, 0)),
476                                                                      "PAP-12106: item length must be 0");
477                                                                 RFALSE(comp_short_le_keys(leaf_key(tbS0, 0), leaf_key
478                                                                       (tb->L[0], n + item_pos - ret_val)),
479                                                                      "PAP-12107: items must be of the same file");
480                                                                 if (is_indirect_le_ih(item_head(tb->L[0], n + item_pos - ret_val))) {
481                                                                         temp_l = l_n << (tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT);
482                                                                 }
483                                                                 /* update key of first item in S0 */
484                                                                 version = ih_version(item_head(tbS0, 0));
485                                                                 set_le_key_k_offset(version, leaf_key(tbS0, 0),
486                                                                      le_key_k_offset(version,leaf_key(tbS0, 0)) + temp_l);
487                                                                 /* update left delimiting key */
488                                                                 set_le_key_k_offset(version, internal_key(tb->CFL[0], tb->lkey[0]),
489                                                                      le_key_k_offset(version, internal_key(tb->CFL[0], tb->lkey[0])) + temp_l);
490                                                         }
491
492                                                         /* Calculate new body, position in item and insert_size[0] */
493                                                         if (l_n > zeros_num) {
494                                                                 body += (l_n - zeros_num);
495                                                                 zeros_num = 0;
496                                                         } else
497                                                                 zeros_num -= l_n;
498                                                         pos_in_item = 0;
499
500                                                         RFALSE(comp_short_le_keys(leaf_key(tbS0, 0), leaf_key(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1))
501                                                              || !op_is_left_mergeable(leaf_key(tbS0, 0), tbS0->b_size)
502                                                              || !op_is_left_mergeable(internal_key(tb->CFL[0], tb->lkey[0]), tbS0->b_size),
503                                                              "PAP-12120: item must be merge-able with left neighboring item");
504                                                 } else {        /* only part of the appended item will be in L[0] */
505
506                                                         /* Calculate position in item for append in S[0] */
507                                                         pos_in_item -= tb->lbytes;
508
509                                                         RFALSE(pos_in_item <= 0, "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item);
510
511                                                         /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
512                                                         leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
513                                                 }
514                                         }
515                                 } else {        /* appended item will be in L[0] in whole */
516
517                                         struct item_head *pasted;
518
519                                         if (!item_pos && op_is_left_mergeable(leaf_key(tbS0, 0), tbS0->b_size)) {       /* if we paste into first item of S[0] and it is left mergable */
520                                                 /* then increment pos_in_item by the size of the last item in L[0] */
521                                                 pasted = item_head(tb->L[0], n - 1);
522                                                 if (is_direntry_le_ih(pasted))
523                                                         pos_in_item += ih_entry_count(pasted);
524                                                 else
525                                                         pos_in_item += ih_item_len(pasted);
526                                         }
527
528                                         /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
529                                         ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
530                                         /* Append to body of item in L[0] */
531                                         buffer_info_init_left(tb, &bi);
532                                         leaf_paste_in_buffer(&bi, n + item_pos - ret_val,
533                                                              pos_in_item,
534                                                              tb->insert_size[0],
535                                                              body, zeros_num);
536
537                                         /* if appended item is directory, paste entry */
538                                         pasted = item_head(tb->L[0], n + item_pos - ret_val);
539                                         if (is_direntry_le_ih(pasted))
540                                                 leaf_paste_entries(&bi, n + item_pos - ret_val,
541                                                                    pos_in_item, 1,
542                                                                    (struct reiserfs_de_head *) body,
543                                                                    body + DEH_SIZE,
544                                                                    tb->insert_size[0]);
545                                         /* if appended item is indirect item, put unformatted node into un list */
546                                         if (is_indirect_le_ih(pasted))
547                                                 set_ih_free_space(pasted, 0);
548                                         tb->insert_size[0] = 0;
549                                         zeros_num = 0;
550                                 }
551                                 break;
552                         default:        /* cases d and t */
553                                 reiserfs_panic(tb->tb_sb, "PAP-12130",
554                                                "lnum > 0: unexpected mode: "
555                                                " %s(%d)",
556                                                (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
557                         }
558                 } else {
559                         /* new item doesn't fall into L[0] */
560                         leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
561                 }
562         }
563
564         /* tb->lnum[0] > 0 */
565         /* Calculate new item position */
566         item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0));
567
568         if (tb->rnum[0] > 0) {
569                 /* shift rnum[0] items from S[0] to the right neighbor R[0] */
570                 n = B_NR_ITEMS(tbS0);
571                 switch (flag) {
572
573                 case M_INSERT:  /* insert item */
574                         if (n - tb->rnum[0] < item_pos) {       /* new item or its part falls to R[0] */
575                                 if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) {      /* part of new item falls into R[0] */
576                                         loff_t old_key_comp, old_len, r_zeros_number;
577                                         const char *r_body;
578                                         int version;
579                                         loff_t offset;
580
581                                         leaf_shift_right(tb, tb->rnum[0] - 1, -1);
582
583                                         version = ih_version(ih);
584                                         /* Remember key component and item length */
585                                         old_key_comp = le_ih_k_offset(ih);
586                                         old_len = ih_item_len(ih);
587
588                                         /* Calculate key component and item length to insert into R[0] */
589                                         offset = le_ih_k_offset(ih) + ((old_len - tb->rbytes) << (is_indirect_le_ih(ih) ? tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT : 0));
590                                         set_le_ih_k_offset(ih, offset);
591                                         put_ih_item_len(ih, tb->rbytes);
592                                         /* Insert part of the item into R[0] */
593                                         buffer_info_init_right(tb, &bi);
594                                         if ((old_len - tb->rbytes) > zeros_num) {
595                                                 r_zeros_number = 0;
596                                                 r_body = body + (old_len - tb->rbytes) - zeros_num;
597                                         } else {
598                                                 r_body = body;
599                                                 r_zeros_number = zeros_num - (old_len - tb->rbytes);
600                                                 zeros_num -= r_zeros_number;
601                                         }
602
603                                         leaf_insert_into_buf(&bi, 0, ih, r_body,
604                                                              r_zeros_number);
605
606                                         /* Replace right delimiting key by first key in R[0] */
607                                         replace_key(tb, tb->CFR[0], tb->rkey[0],
608                                                     tb->R[0], 0);
609
610                                         /* Calculate key component and item length to insert into S[0] */
611                                         set_le_ih_k_offset(ih, old_key_comp);
612                                         put_ih_item_len(ih, old_len - tb->rbytes);
613
614                                         tb->insert_size[0] -= tb->rbytes;
615
616                                 } else {        /* whole new item falls into R[0] */
617
618                                         /* Shift rnum[0]-1 items to R[0] */
619                                         ret_val = leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes);
620                                         /* Insert new item into R[0] */
621                                         buffer_info_init_right(tb, &bi);
622                                         leaf_insert_into_buf(&bi, item_pos - n + tb->rnum[0] - 1,
623                                                              ih, body, zeros_num);
624
625                                         if (item_pos - n + tb->rnum[0] - 1 == 0) {
626                                                 replace_key(tb, tb->CFR[0],
627                                                             tb->rkey[0],
628                                                             tb->R[0], 0);
629
630                                         }
631                                         zeros_num = tb->insert_size[0] = 0;
632                                 }
633                         } else {        /* new item or part of it doesn't fall into R[0] */
634
635                                 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
636                         }
637                         break;
638
639                 case M_PASTE:   /* append item */
640
641                         if (n - tb->rnum[0] <= item_pos) {      /* pasted item or part of it falls to R[0] */
642                                 if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) {  /* we must shift the part of the appended item */
643                                         if (is_direntry_le_ih(item_head(tbS0, item_pos))) {     /* we append to directory item */
644                                                 int entry_count;
645
646                                                 RFALSE(zeros_num,
647                                                        "PAP-12145: invalid parameter in case of a directory");
648                                                 entry_count = ih_entry_count(item_head
649                                                                   (tbS0, item_pos));
650                                                 if (entry_count - tb->rbytes <
651                                                     pos_in_item)
652                                                         /* new directory entry falls into R[0] */
653                                                 {
654                                                         int paste_entry_position;
655
656                                                         RFALSE(tb->rbytes - 1 >= entry_count || !tb-> insert_size[0],
657                                                                "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
658                                                                tb->rbytes, entry_count);
659                                                         /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
660                                                         leaf_shift_right(tb, tb->rnum[0], tb->rbytes - 1);
661                                                         /* Paste given directory entry to directory item */
662                                                         paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1;
663                                                         buffer_info_init_right(tb, &bi);
664                                                         leaf_paste_in_buffer(&bi, 0, paste_entry_position, tb->insert_size[0], body, zeros_num);
665                                                         /* paste entry */
666                                                         leaf_paste_entries(&bi, 0, paste_entry_position, 1,
667                                                                            (struct reiserfs_de_head *) body,
668                                                                            body + DEH_SIZE, tb->insert_size[0]);
669
670                                                         if (paste_entry_position == 0) {
671                                                                 /* change delimiting keys */
672                                                                 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0],0);
673                                                         }
674
675                                                         tb->insert_size[0] = 0;
676                                                         pos_in_item++;
677                                                 } else {        /* new directory entry doesn't fall into R[0] */
678
679                                                         leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
680                                                 }
681                                         } else {        /* regular object */
682
683                                                 int n_shift, n_rem, r_zeros_number;
684                                                 const char *r_body;
685
686                                                 /* Calculate number of bytes which must be shifted from appended item */
687                                                 if ((n_shift = tb->rbytes - tb->insert_size[0]) < 0)
688                                                         n_shift = 0;
689
690                                                 RFALSE(pos_in_item != ih_item_len
691                                                        (item_head(tbS0, item_pos)),
692                                                        "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
693                                                        pos_in_item, ih_item_len
694                                                        (item_head(tbS0, item_pos)));
695
696                                                 leaf_shift_right(tb, tb->rnum[0], n_shift);
697                                                 /* Calculate number of bytes which must remain in body after appending to R[0] */
698                                                 if ((n_rem = tb->insert_size[0] - tb->rbytes) < 0)
699                                                         n_rem = 0;
700
701                                                 {
702                                                         int version;
703                                                         unsigned long temp_rem = n_rem;
704
705                                                         version = ih_version(item_head(tb->R[0], 0));
706                                                         if (is_indirect_le_key(version, leaf_key(tb->R[0], 0))) {
707                                                                 temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT);
708                                                         }
709                                                         set_le_key_k_offset(version, leaf_key(tb->R[0], 0),
710                                                              le_key_k_offset(version, leaf_key(tb->R[0], 0)) + temp_rem);
711                                                         set_le_key_k_offset(version, internal_key(tb->CFR[0], tb->rkey[0]),
712                                                              le_key_k_offset(version, internal_key(tb->CFR[0], tb->rkey[0])) + temp_rem);
713                                                 }
714 /*                k_offset (leaf_key(tb->R[0],0)) += n_rem;
715                   k_offset (internal_key(tb->CFR[0],tb->rkey[0])) += n_rem;*/
716                                                 do_balance_mark_internal_dirty(tb, tb->CFR[0], 0);
717
718                                                 /* Append part of body into R[0] */
719                                                 buffer_info_init_right(tb, &bi);
720                                                 if (n_rem > zeros_num) {
721                                                         r_zeros_number = 0;
722                                                         r_body = body + n_rem - zeros_num;
723                                                 } else {
724                                                         r_body = body;
725                                                         r_zeros_number = zeros_num - n_rem;
726                                                         zeros_num -= r_zeros_number;
727                                                 }
728
729                                                 leaf_paste_in_buffer(&bi, 0, n_shift,
730                                                                      tb->insert_size[0] - n_rem,
731                                                                      r_body, r_zeros_number);
732
733                                                 if (is_indirect_le_ih(item_head(tb->R[0], 0))) {
734 #if 0
735                                                         RFALSE(n_rem,
736                                                                "PAP-12160: paste more than one unformatted node pointer");
737 #endif
738                                                         set_ih_free_space(item_head(tb->R[0], 0), 0);
739                                                 }
740                                                 tb->insert_size[0] = n_rem;
741                                                 if (!n_rem)
742                                                         pos_in_item++;
743                                         }
744                                 } else {        /* pasted item in whole falls into R[0] */
745
746                                         struct item_head *pasted;
747
748                                         ret_val = leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
749                                         /* append item in R[0] */
750                                         if (pos_in_item >= 0) {
751                                                 buffer_info_init_right(tb, &bi);
752                                                 leaf_paste_in_buffer(&bi, item_pos - n + tb->rnum[0], pos_in_item,
753                                                                      tb->insert_size[0], body, zeros_num);
754                                         }
755
756                                         /* paste new entry, if item is directory item */
757                                         pasted = item_head(tb->R[0], item_pos - n + tb->rnum[0]);
758                                         if (is_direntry_le_ih(pasted) && pos_in_item >= 0) {
759                                                 leaf_paste_entries(&bi, item_pos - n + tb->rnum[0],
760                                                                    pos_in_item, 1,
761                                                                    (struct reiserfs_de_head *) body,
762                                                                    body + DEH_SIZE, tb->insert_size[0]);
763                                                 if (!pos_in_item) {
764
765                                                         RFALSE(item_pos - n + tb->rnum[0],
766                                                                "PAP-12165: directory item must be first item of node when pasting is in 0th position");
767
768                                                         /* update delimiting keys */
769                                                         replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
770                                                 }
771                                         }
772
773                                         if (is_indirect_le_ih(pasted))
774                                                 set_ih_free_space(pasted, 0);
775                                         zeros_num = tb->insert_size[0] = 0;
776                                 }
777                         } else {        /* new item doesn't fall into R[0] */
778
779                                 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
780                         }
781                         break;
782                 default:        /* cases d and t */
783                         reiserfs_panic(tb->tb_sb, "PAP-12175",
784                                        "rnum > 0: unexpected mode: %s(%d)",
785                                        (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
786                 }
787
788         }
789
790         /* tb->rnum[0] > 0 */
791         RFALSE(tb->blknum[0] > 3,
792                "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]);
793         RFALSE(tb->blknum[0] < 0,
794                "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]);
795
796         /* if while adding to a node we discover that it is possible to split
797            it in two, and merge the left part into the left neighbor and the
798            right part into the right neighbor, eliminating the node */
799         if (tb->blknum[0] == 0) {       /* node S[0] is empty now */
800
801                 RFALSE(!tb->lnum[0] || !tb->rnum[0],
802                        "PAP-12190: lnum and rnum must not be zero");
803                 /* if insertion was done before 0-th position in R[0], right
804                    delimiting key of the tb->L[0]'s and left delimiting key are
805                    not set correctly */
806                 if (tb->CFL[0]) {
807                         if (!tb->CFR[0])
808                                 reiserfs_panic(tb->tb_sb, "vs-12195",
809                                                "CFR not initialized");
810                         copy_key(internal_key(tb->CFL[0], tb->lkey[0]),
811                                  internal_key(tb->CFR[0], tb->rkey[0]));
812                         do_balance_mark_internal_dirty(tb, tb->CFL[0], 0);
813                 }
814
815                 reiserfs_invalidate_buffer(tb, tbS0);
816                 return 0;
817         }
818
819         /* Fill new nodes that appear in place of S[0] */
820
821         /* I am told that this copying is because we need an array to enable
822            the looping code. -Hans */
823         snum[0] = tb->s1num, snum[1] = tb->s2num;
824         sbytes[0] = tb->s1bytes;
825         sbytes[1] = tb->s2bytes;
826         for (i = tb->blknum[0] - 2; i >= 0; i--) {
827
828                 RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i,
829                        snum[i]);
830
831                 /* here we shift from S to S_new nodes */
832
833                 S_new[i] = get_FEB(tb);
834
835                 /* initialized block type and tree level */
836                 set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL);
837
838                 n = B_NR_ITEMS(tbS0);
839
840                 switch (flag) {
841                 case M_INSERT:  /* insert item */
842
843                         if (n - snum[i] < item_pos) {   /* new item or it's part falls to first new node S_new[i] */
844                                 if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) {   /* part of new item falls into S_new[i] */
845                                         int old_key_comp, old_len, r_zeros_number;
846                                         const char *r_body;
847                                         int version;
848
849                                         /* Move snum[i]-1 items from S[0] to S_new[i] */
850                                         leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
851                                                         snum[i] - 1, -1,
852                                                         S_new[i]);
853                                         /* Remember key component and item length */
854                                         version = ih_version(ih);
855                                         old_key_comp = le_ih_k_offset(ih);
856                                         old_len = ih_item_len(ih);
857
858                                         /* Calculate key component and item length to insert into S_new[i] */
859                                         set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
860                                                            ((old_len - sbytes[i]) << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0)));
861
862                                         put_ih_item_len(ih, sbytes[i]);
863
864                                         /* Insert part of the item into S_new[i] before 0-th item */
865                                         buffer_info_init_bh(tb, &bi, S_new[i]);
866
867                                         if ((old_len - sbytes[i]) > zeros_num) {
868                                                 r_zeros_number = 0;
869                                                 r_body = body + (old_len - sbytes[i]) - zeros_num;
870                                         } else {
871                                                 r_body = body;
872                                                 r_zeros_number = zeros_num - (old_len - sbytes[i]);
873                                                 zeros_num -= r_zeros_number;
874                                         }
875
876                                         leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeros_number);
877
878                                         /* Calculate key component and item length to insert into S[i] */
879                                         set_le_ih_k_offset(ih, old_key_comp);
880                                         put_ih_item_len(ih, old_len - sbytes[i]);
881                                         tb->insert_size[0] -= sbytes[i];
882                                 } else {        /* whole new item falls into S_new[i] */
883
884                                         /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
885                                         leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
886                                                         snum[i] - 1, sbytes[i], S_new[i]);
887
888                                         /* Insert new item into S_new[i] */
889                                         buffer_info_init_bh(tb, &bi, S_new[i]);
890                                         leaf_insert_into_buf(&bi, item_pos - n + snum[i] - 1,
891                                                              ih, body, zeros_num);
892
893                                         zeros_num = tb->insert_size[0] = 0;
894                                 }
895                         }
896
897                         else {  /* new item or it part don't falls into S_new[i] */
898
899                                 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
900                                                 snum[i], sbytes[i], S_new[i]);
901                         }
902                         break;
903
904                 case M_PASTE:   /* append item */
905
906                         if (n - snum[i] <= item_pos) {  /* pasted item or part if it falls to S_new[i] */
907                                 if (item_pos == n - snum[i] && sbytes[i] != -1) {       /* we must shift part of the appended item */
908                                         struct item_head *aux_ih;
909
910                                         RFALSE(ih, "PAP-12210: ih must be 0");
911
912                                         aux_ih = item_head(tbS0, item_pos);
913                                         if (is_direntry_le_ih(aux_ih)) {
914                                                 /* we append to directory item */
915
916                                                 int entry_count;
917
918                                                 entry_count = ih_entry_count(aux_ih);
919
920                                                 if (entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count) {
921                                                         /* new directory entry falls into S_new[i] */
922
923                                                         RFALSE(!tb->insert_size[0], "PAP-12215: insert_size is already 0");
924                                                         RFALSE(sbytes[i] - 1 >= entry_count,
925                                                                "PAP-12220: there are no so much entries (%d), only %d",
926                                                                sbytes[i] - 1, entry_count);
927
928                                                         /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
929                                                         leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i] - 1, S_new[i]);
930                                                         /* Paste given directory entry to directory item */
931                                                         buffer_info_init_bh(tb, &bi, S_new[i]);
932                                                         leaf_paste_in_buffer(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1,
933                                                              tb->insert_size[0], body, zeros_num);
934                                                         /* paste new directory entry */
935                                                         leaf_paste_entries(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1, 1,
936                                                                            (struct reiserfs_de_head *) body,
937                                                                            body + DEH_SIZE, tb->insert_size[0]);
938                                                         tb->insert_size[0] = 0;
939                                                         pos_in_item++;
940                                                 } else {        /* new directory entry doesn't fall into S_new[i] */
941                                                         leaf_move_items(LEAF_FROM_S_TO_SNEW,tb, snum[i], sbytes[i], S_new[i]);
942                                                 }
943                                         } else {        /* regular object */
944
945                                                 int n_shift, n_rem, r_zeros_number;
946                                                 const char *r_body;
947
948                                                 RFALSE(pos_in_item != ih_item_len(item_head(tbS0, item_pos)) || tb->insert_size[0] <= 0,
949                                                        "PAP-12225: item too short or insert_size <= 0");
950
951                                                 /* Calculate number of bytes which must be shifted from appended item */
952                                                 n_shift = sbytes[i] - tb->insert_size[0];
953                                                 if (n_shift < 0)
954                                                         n_shift = 0;
955                                                 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]);
956
957                                                 /* Calculate number of bytes which must remain in body after append to S_new[i] */
958                                                 n_rem = tb->insert_size[0] - sbytes[i];
959                                                 if (n_rem < 0)
960                                                         n_rem = 0;
961                                                 /* Append part of body into S_new[0] */
962                                                 buffer_info_init_bh(tb, &bi, S_new[i]);
963                                                 if (n_rem > zeros_num) {
964                                                         r_zeros_number = 0;
965                                                         r_body = body + n_rem - zeros_num;
966                                                 } else {
967                                                         r_body = body;
968                                                         r_zeros_number = zeros_num - n_rem;
969                                                         zeros_num -= r_zeros_number;
970                                                 }
971
972                                                 leaf_paste_in_buffer(&bi, 0, n_shift,
973                                                                      tb->insert_size[0] - n_rem,
974                                                                      r_body, r_zeros_number);
975                                                 {
976                                                         struct item_head *tmp;
977
978                                                         tmp = item_head(S_new[i], 0);
979                                                         if (is_indirect_le_ih
980                                                             (tmp)) {
981                                                                 set_ih_free_space(tmp, 0);
982                                                                 set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT)));
983                                                         } else {
984                                                                 set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + n_rem);
985                                                         }
986                                                 }
987
988                                                 tb->insert_size[0] = n_rem;
989                                                 if (!n_rem)
990                                                         pos_in_item++;
991                                         }
992                                 } else
993                                         /* item falls wholly into S_new[i] */
994                                 {
995                                         int leaf_mi;
996                                         struct item_head *pasted;
997
998 #ifdef CONFIG_REISERFS_CHECK
999                                         struct item_head *ih_check = item_head(tbS0, item_pos);
1000
1001                                         if (!is_direntry_le_ih(ih_check)
1002                                             && (pos_in_item != ih_item_len(ih_check)
1003                                                 || tb->insert_size[0] <= 0))
1004                                                 reiserfs_panic(tb->tb_sb,
1005                                                              "PAP-12235",
1006                                                              "pos_in_item "
1007                                                              "must be equal "
1008                                                              "to ih_item_len");
1009 #endif                          /* CONFIG_REISERFS_CHECK */
1010
1011                                         leaf_mi = leaf_move_items(LEAF_FROM_S_TO_SNEW,
1012                                                             tb, snum[i],
1013                                                             sbytes[i],
1014                                                             S_new[i]);
1015
1016                                         RFALSE(leaf_mi,
1017                                                "PAP-12240: unexpected value returned by leaf_move_items (%d)",
1018                                                leaf_mi);
1019
1020                                         /* paste into item */
1021                                         buffer_info_init_bh(tb, &bi, S_new[i]);
1022                                         leaf_paste_in_buffer(&bi,
1023                                                              item_pos - n + snum[i],
1024                                                              pos_in_item,
1025                                                              tb->insert_size[0],
1026                                                              body, zeros_num);
1027
1028                                         pasted = item_head(S_new[i], item_pos - n + snum[i]);
1029                                         if (is_direntry_le_ih(pasted)) {
1030                                                 leaf_paste_entries(&bi,
1031                                                                    item_pos - n + snum[i],
1032                                                                    pos_in_item, 1,
1033                                                                    (struct reiserfs_de_head *)body,
1034                                                                    body + DEH_SIZE,
1035                                                                    tb->insert_size[0]
1036                                                     );
1037                                         }
1038
1039                                         /* if we paste to indirect item update ih_free_space */
1040                                         if (is_indirect_le_ih(pasted))
1041                                                 set_ih_free_space(pasted, 0);
1042                                         zeros_num = tb->insert_size[0] = 0;
1043                                 }
1044                         }
1045
1046                         else {  /* pasted item doesn't fall into S_new[i] */
1047
1048                                 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1049                                                 snum[i], sbytes[i], S_new[i]);
1050                         }
1051                         break;
1052                 default:        /* cases d and t */
1053                         reiserfs_panic(tb->tb_sb, "PAP-12245",
1054                                        "blknum > 2: unexpected mode: %s(%d)",
1055                                        (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
1056                 }
1057
1058                 memcpy(insert_key + i, leaf_key(S_new[i], 0), KEY_SIZE);
1059                 insert_ptr[i] = S_new[i];
1060
1061                 RFALSE(!buffer_journaled(S_new[i])
1062                        || buffer_journal_dirty(S_new[i])
1063                        || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)",
1064                        i, S_new[i]);
1065         }
1066
1067         /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1068            affected item which remains in S */
1069         if (0 <= item_pos && item_pos < tb->s0num) {    /* if we must insert or append into buffer S[0] */
1070
1071                 switch (flag) {
1072                 case M_INSERT:  /* insert item into S[0] */
1073                         buffer_info_init_tbS0(tb, &bi);
1074                         leaf_insert_into_buf(&bi, item_pos, ih, body,
1075                                              zeros_num);
1076
1077                         /* If we insert the first key change the delimiting key */
1078                         if (item_pos == 0) {
1079                                 if (tb->CFL[0]) /* can be 0 in reiserfsck */
1080                                         replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
1081                         }
1082                         break;
1083
1084                 case M_PASTE:{  /* append item in S[0] */
1085                                 struct item_head *pasted;
1086
1087                                 pasted = item_head(tbS0, item_pos);
1088                                 /* when directory, may be new entry already pasted */
1089                                 if (is_direntry_le_ih(pasted)) {
1090                                         if (pos_in_item >= 0 && pos_in_item <= ih_entry_count(pasted)) {
1091
1092                                                 RFALSE(!tb->insert_size[0],
1093                                                        "PAP-12260: insert_size is 0 already");
1094
1095                                                 /* prepare space */
1096                                                 buffer_info_init_tbS0(tb, &bi);
1097                                                 leaf_paste_in_buffer(&bi, item_pos, pos_in_item,
1098                                                                      tb->insert_size[0], body,
1099                                                                      zeros_num);
1100
1101                                                 /* paste entry */
1102                                                 leaf_paste_entries(&bi, item_pos, pos_in_item, 1,
1103                                                                    (struct reiserfs_de_head *)body,
1104                                                                    body + DEH_SIZE,
1105                                                                    tb->insert_size[0]);
1106                                                 if (!item_pos && !pos_in_item) {
1107                                                         RFALSE(!tb->CFL[0] || !tb->L[0],
1108                                                                "PAP-12270: CFL[0]/L[0] must be specified");
1109                                                         if (tb->CFL[0])
1110                                                                 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
1111                                                 }
1112                                                 tb->insert_size[0] = 0;
1113                                         }
1114                                 } else {        /* regular object */
1115                                         if (pos_in_item == ih_item_len(pasted)) {
1116
1117                                                 RFALSE(tb->insert_size[0] <= 0,
1118                                                        "PAP-12275: insert size must not be %d",
1119                                                        tb->insert_size[0]);
1120                                                 buffer_info_init_tbS0(tb, &bi);
1121                                                 leaf_paste_in_buffer(&bi, item_pos, pos_in_item,
1122                                                                      tb->insert_size[0], body, zeros_num);
1123
1124                                                 if (is_indirect_le_ih(pasted)) {
1125 #if 0
1126                                                         RFALSE(tb->
1127                                                                insert_size[0] !=
1128                                                                UNFM_P_SIZE,
1129                                                                "PAP-12280: insert_size for indirect item must be %d, not %d",
1130                                                                UNFM_P_SIZE,
1131                                                                tb->
1132                                                                insert_size[0]);
1133 #endif
1134                                                         set_ih_free_space(pasted, 0);
1135                                                 }
1136                                                 tb->insert_size[0] = 0;
1137                                         }
1138 #ifdef CONFIG_REISERFS_CHECK
1139                                         else {
1140                                                 if (tb->insert_size[0]) {
1141                                                         print_cur_tb("12285");
1142                                                         reiserfs_panic(tb->tb_sb,
1143                                                             "PAP-12285",
1144                                                             "insert_size "
1145                                                             "must be 0 "
1146                                                             "(%d)",
1147                                                             tb->insert_size[0]);
1148                                                 }
1149                                         }
1150 #endif                          /* CONFIG_REISERFS_CHECK */
1151
1152                                 }
1153                         }       /* case M_PASTE: */
1154                 }
1155         }
1156 #ifdef CONFIG_REISERFS_CHECK
1157         if (flag == M_PASTE && tb->insert_size[0]) {
1158                 print_cur_tb("12290");
1159                 reiserfs_panic(tb->tb_sb,
1160                                "PAP-12290", "insert_size is still not 0 (%d)",
1161                                tb->insert_size[0]);
1162         }
1163 #endif                          /* CONFIG_REISERFS_CHECK */
1164         return 0;
1165 }                               /* Leaf level of the tree is balanced (end of balance_leaf) */
1166
1167 /* Make empty node */
1168 void make_empty_node(struct buffer_info *bi)
1169 {
1170         struct block_head *blkh;
1171
1172         RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
1173
1174         blkh = B_BLK_HEAD(bi->bi_bh);
1175         set_blkh_nr_item(blkh, 0);
1176         set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh));
1177
1178         if (bi->bi_parent)
1179                 B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */
1180 }
1181
1182 /* Get first empty buffer */
1183 struct buffer_head *get_FEB(struct tree_balance *tb)
1184 {
1185         int i;
1186         struct buffer_info bi;
1187
1188         for (i = 0; i < MAX_FEB_SIZE; i++)
1189                 if (tb->FEB[i] != NULL)
1190                         break;
1191
1192         if (i == MAX_FEB_SIZE)
1193                 reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty");
1194
1195         buffer_info_init_bh(tb, &bi, tb->FEB[i]);
1196         make_empty_node(&bi);
1197         set_buffer_uptodate(tb->FEB[i]);
1198         tb->used[i] = tb->FEB[i];
1199         tb->FEB[i] = NULL;
1200
1201         return tb->used[i];
1202 }
1203
1204 /* This is now used because reiserfs_free_block has to be able to schedule. */
1205 static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
1206 {
1207         int i;
1208
1209         if (buffer_dirty(bh))
1210                 reiserfs_warning(tb->tb_sb, "reiserfs-12320",
1211                                  "called with dirty buffer");
1212         for (i = 0; i < ARRAY_SIZE(tb->thrown); i++)
1213                 if (!tb->thrown[i]) {
1214                         tb->thrown[i] = bh;
1215                         get_bh(bh);     /* free_thrown puts this */
1216                         return;
1217                 }
1218         reiserfs_warning(tb->tb_sb, "reiserfs-12321",
1219                          "too many thrown buffers");
1220 }
1221
1222 static void free_thrown(struct tree_balance *tb)
1223 {
1224         int i;
1225         b_blocknr_t blocknr;
1226         for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) {
1227                 if (tb->thrown[i]) {
1228                         blocknr = tb->thrown[i]->b_blocknr;
1229                         if (buffer_dirty(tb->thrown[i]))
1230                                 reiserfs_warning(tb->tb_sb, "reiserfs-12322",
1231                                                  "called with dirty buffer %d",
1232                                                  blocknr);
1233                         brelse(tb->thrown[i]);  /* incremented in store_thrown */
1234                         reiserfs_free_block(tb->transaction_handle, NULL,
1235                                             blocknr, 0);
1236                 }
1237         }
1238 }
1239
1240 void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh)
1241 {
1242         struct block_head *blkh;
1243         blkh = B_BLK_HEAD(bh);
1244         set_blkh_level(blkh, FREE_LEVEL);
1245         set_blkh_nr_item(blkh, 0);
1246
1247         clear_buffer_dirty(bh);
1248         store_thrown(tb, bh);
1249 }
1250
1251 /* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
1252 void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest,
1253                  struct buffer_head *src, int n_src)
1254 {
1255
1256         RFALSE(dest == NULL || src == NULL,
1257                "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1258                src, dest);
1259         RFALSE(!B_IS_KEYS_LEVEL(dest),
1260                "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1261                dest);
1262         RFALSE(n_dest < 0 || n_src < 0,
1263                "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
1264         RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
1265                "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1266                n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
1267
1268         if (B_IS_ITEMS_LEVEL(src))
1269                 /* source buffer contains leaf node */
1270                 memcpy(internal_key(dest, n_dest), item_head(src, n_src),
1271                        KEY_SIZE);
1272         else
1273                 memcpy(internal_key(dest, n_dest), internal_key(src, n_src),
1274                        KEY_SIZE);
1275
1276         do_balance_mark_internal_dirty(tb, dest, 0);
1277 }
1278
1279 int get_left_neighbor_position(struct tree_balance *tb, int h)
1280 {
1281         int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1282
1283         RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FL[h] == NULL,
1284                "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1285                h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h));
1286
1287         if (Sh_position == 0)
1288                 return B_NR_ITEMS(tb->FL[h]);
1289         else
1290                 return Sh_position - 1;
1291 }
1292
1293 int get_right_neighbor_position(struct tree_balance *tb, int h)
1294 {
1295         int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1296
1297         RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FR[h] == NULL,
1298                "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1299                h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]);
1300
1301         if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h)))
1302                 return 0;
1303         else
1304                 return Sh_position + 1;
1305 }
1306
1307 #ifdef CONFIG_REISERFS_CHECK
1308
1309 int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value);
1310 static void check_internal_node(struct super_block *s, struct buffer_head *bh,
1311                                 char *mes)
1312 {
1313         struct disk_child *dc;
1314         int i;
1315
1316         RFALSE(!bh, "PAP-12336: bh == 0");
1317
1318         if (!bh || !B_IS_IN_TREE(bh))
1319                 return;
1320
1321         RFALSE(!buffer_dirty(bh) &&
1322                !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1323                "PAP-12337: buffer (%b) must be dirty", bh);
1324         dc = B_N_CHILD(bh, 0);
1325
1326         for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) {
1327                 if (!is_reusable(s, dc_block_number(dc), 1)) {
1328                         print_cur_tb(mes);
1329                         reiserfs_panic(s, "PAP-12338",
1330                                        "invalid child pointer %y in %b",
1331                                        dc, bh);
1332                 }
1333         }
1334 }
1335
1336 static int locked_or_not_in_tree(struct tree_balance *tb,
1337                                   struct buffer_head *bh, char *which)
1338 {
1339         if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) ||
1340             !B_IS_IN_TREE(bh)) {
1341                 reiserfs_warning(tb->tb_sb, "vs-12339", "%s (%b)", which, bh);
1342                 return 1;
1343         }
1344         return 0;
1345 }
1346
1347 static int check_before_balancing(struct tree_balance *tb)
1348 {
1349         int retval = 0;
1350
1351         if (REISERFS_SB(tb->tb_sb)->cur_tb) {
1352                 reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule "
1353                                "occurred based on cur_tb not being null at "
1354                                "this point in code. do_balance cannot properly "
1355                                "handle concurrent tree accesses on a same "
1356                                "mount point.");
1357         }
1358
1359         /*
1360          * double check that buffers that we will modify are unlocked.
1361          * (fix_nodes should already have prepped all of these for us).
1362          */
1363         if (tb->lnum[0]) {
1364                 retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]");
1365                 retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]");
1366                 retval |= locked_or_not_in_tree(tb, tb->CFL[0], "CFL[0]");
1367                 check_leaf(tb->L[0]);
1368         }
1369         if (tb->rnum[0]) {
1370                 retval |= locked_or_not_in_tree(tb, tb->R[0], "R[0]");
1371                 retval |= locked_or_not_in_tree(tb, tb->FR[0], "FR[0]");
1372                 retval |= locked_or_not_in_tree(tb, tb->CFR[0], "CFR[0]");
1373                 check_leaf(tb->R[0]);
1374         }
1375         retval |= locked_or_not_in_tree(tb, PATH_PLAST_BUFFER(tb->tb_path),
1376                                         "S[0]");
1377         check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1378
1379         return retval;
1380 }
1381
1382 static void check_after_balance_leaf(struct tree_balance *tb)
1383 {
1384         if (tb->lnum[0]) {
1385                 if (B_FREE_SPACE(tb->L[0]) !=
1386                     MAX_CHILD_SIZE(tb->L[0]) -
1387                     dc_size(B_N_CHILD
1388                             (tb->FL[0], get_left_neighbor_position(tb, 0)))) {
1389                         print_cur_tb("12221");
1390                         reiserfs_panic(tb->tb_sb, "PAP-12355",
1391                                        "shift to left was incorrect");
1392                 }
1393         }
1394         if (tb->rnum[0]) {
1395                 if (B_FREE_SPACE(tb->R[0]) !=
1396                     MAX_CHILD_SIZE(tb->R[0]) -
1397                     dc_size(B_N_CHILD
1398                             (tb->FR[0], get_right_neighbor_position(tb, 0)))) {
1399                         print_cur_tb("12222");
1400                         reiserfs_panic(tb->tb_sb, "PAP-12360",
1401                                        "shift to right was incorrect");
1402                 }
1403         }
1404         if (PATH_H_PBUFFER(tb->tb_path, 1) &&
1405             (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) !=
1406              (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1407               dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1408                                 PATH_H_POSITION(tb->tb_path, 1)))))) {
1409                 int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0));
1410                 int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1411                              dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1412                                                PATH_H_POSITION(tb->tb_path,
1413                                                                1))));
1414                 print_cur_tb("12223");
1415                 reiserfs_warning(tb->tb_sb, "reiserfs-12363",
1416                                  "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1417                                  "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1418                                  left,
1419                                  MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)),
1420                                  PATH_H_PBUFFER(tb->tb_path, 1),
1421                                  PATH_H_POSITION(tb->tb_path, 1),
1422                                  dc_size(B_N_CHILD
1423                                          (PATH_H_PBUFFER(tb->tb_path, 1),
1424                                           PATH_H_POSITION(tb->tb_path, 1))),
1425                                  right);
1426                 reiserfs_panic(tb->tb_sb, "PAP-12365", "S is incorrect");
1427         }
1428 }
1429
1430 static void check_leaf_level(struct tree_balance *tb)
1431 {
1432         check_leaf(tb->L[0]);
1433         check_leaf(tb->R[0]);
1434         check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1435 }
1436
1437 static void check_internal_levels(struct tree_balance *tb)
1438 {
1439         int h;
1440
1441         /* check all internal nodes */
1442         for (h = 1; tb->insert_size[h]; h++) {
1443                 check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h),
1444                                     "BAD BUFFER ON PATH");
1445                 if (tb->lnum[h])
1446                         check_internal_node(tb->tb_sb, tb->L[h], "BAD L");
1447                 if (tb->rnum[h])
1448                         check_internal_node(tb->tb_sb, tb->R[h], "BAD R");
1449         }
1450
1451 }
1452
1453 #endif
1454
1455 /*
1456  * Now we have all of the buffers that must be used in balancing of
1457  * the tree.  We rely on the assumption that schedule() will not occur
1458  * while do_balance works. ( Only interrupt handlers are acceptable.)
1459  * We balance the tree according to the analysis made before this,
1460  * using buffers already obtained.  For SMP support it will someday be
1461  * necessary to add ordered locking of tb.
1462  */
1463
1464 /*
1465  * Some interesting rules of balancing:
1466  * we delete a maximum of two nodes per level per balancing: we never
1467  * delete R, when we delete two of three nodes L, S, R then we move
1468  * them into R.
1469  *
1470  * we only delete L if we are deleting two nodes, if we delete only
1471  * one node we delete S
1472  *
1473  * if we shift leaves then we shift as much as we can: this is a
1474  * deliberate policy of extremism in node packing which results in
1475  * higher average utilization after repeated random balance operations
1476  * at the cost of more memory copies and more balancing as a result of
1477  * small insertions to full nodes.
1478  *
1479  * if we shift internal nodes we try to evenly balance the node
1480  * utilization, with consequent less balancing at the cost of lower
1481  * utilization.
1482  *
1483  * one could argue that the policy for directories in leaves should be
1484  * that of internal nodes, but we will wait until another day to
1485  * evaluate this....  It would be nice to someday measure and prove
1486  * these assumptions as to what is optimal....
1487  */
1488
1489 static inline void do_balance_starts(struct tree_balance *tb)
1490 {
1491         /* use print_cur_tb() to see initial state of struct tree_balance */
1492
1493         /* store_print_tb (tb); */
1494
1495         /* do not delete, just comment it out */
1496         /*
1497         print_tb(flag, PATH_LAST_POSITION(tb->tb_path),
1498                  tb->tb_path->pos_in_item, tb, "check");
1499         */
1500         RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
1501 #ifdef CONFIG_REISERFS_CHECK
1502         REISERFS_SB(tb->tb_sb)->cur_tb = tb;
1503 #endif
1504 }
1505
1506 static inline void do_balance_completed(struct tree_balance *tb)
1507 {
1508
1509 #ifdef CONFIG_REISERFS_CHECK
1510         check_leaf_level(tb);
1511         check_internal_levels(tb);
1512         REISERFS_SB(tb->tb_sb)->cur_tb = NULL;
1513 #endif
1514
1515         /*
1516          * reiserfs_free_block is no longer schedule safe.  So, we need to
1517          * put the buffers we want freed on the thrown list during do_balance,
1518          * and then free them now
1519          */
1520
1521         REISERFS_SB(tb->tb_sb)->s_do_balance++;
1522
1523         /* release all nodes hold to perform the balancing */
1524         unfix_nodes(tb);
1525
1526         free_thrown(tb);
1527 }
1528
1529 /*
1530  * do_balance - balance the tree
1531  *
1532  * @tb: tree_balance structure
1533  * @ih: item header of inserted item
1534  * @body: body of inserted item or bytes to paste
1535  * @flag: 'i' - insert, 'd' - delete, 'c' - cut, 'p' paste
1536  *
1537  * Cut means delete part of an item (includes removing an entry from a
1538  * directory).
1539  *
1540  * Delete means delete whole item.
1541  *
1542  * Insert means add a new item into the tree.
1543  *
1544  * Paste means to append to the end of an existing file or to
1545  * insert a directory entry.
1546  */
1547 void do_balance(struct tree_balance *tb, struct item_head *ih,
1548                 const char *body, int flag)
1549 {
1550         int child_pos;          /* position of a child node in its parent */
1551         int h;                  /* level of the tree being processed */
1552
1553         /*
1554          * in our processing of one level we sometimes determine what
1555          * must be inserted into the next higher level.  This insertion
1556          * consists of a key or two keys and their corresponding
1557          * pointers
1558          */
1559         struct item_head insert_key[2];
1560
1561         /* inserted node-ptrs for the next level */
1562         struct buffer_head *insert_ptr[2];
1563
1564         tb->tb_mode = flag;
1565         tb->need_balance_dirty = 0;
1566
1567         if (FILESYSTEM_CHANGED_TB(tb)) {
1568                 reiserfs_panic(tb->tb_sb, "clm-6000", "fs generation has "
1569                                "changed");
1570         }
1571         /* if we have no real work to do  */
1572         if (!tb->insert_size[0]) {
1573                 reiserfs_warning(tb->tb_sb, "PAP-12350",
1574                                  "insert_size == 0, mode == %c", flag);
1575                 unfix_nodes(tb);
1576                 return;
1577         }
1578
1579         atomic_inc(&(fs_generation(tb->tb_sb)));
1580         do_balance_starts(tb);
1581
1582         /*
1583          * balance_leaf returns 0 except if combining L R and S into
1584          * one node.  see balance_internal() for explanation of this
1585          * line of code.
1586          */
1587         child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) +
1588             balance_leaf(tb, ih, body, flag, insert_key, insert_ptr);
1589
1590 #ifdef CONFIG_REISERFS_CHECK
1591         check_after_balance_leaf(tb);
1592 #endif
1593
1594         /* Balance internal level of the tree. */
1595         for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++)
1596                 child_pos =
1597                     balance_internal(tb, h, child_pos, insert_key, insert_ptr);
1598
1599         do_balance_completed(tb);
1600
1601 }