Merge tag 'asm-generic-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[cascardo/linux.git] / fs / btrfs / check-integrity.c
1 /*
2  * Copyright (C) STRATO AG 2011.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 /*
20  * This module can be used to catch cases when the btrfs kernel
21  * code executes write requests to the disk that bring the file
22  * system in an inconsistent state. In such a state, a power-loss
23  * or kernel panic event would cause that the data on disk is
24  * lost or at least damaged.
25  *
26  * Code is added that examines all block write requests during
27  * runtime (including writes of the super block). Three rules
28  * are verified and an error is printed on violation of the
29  * rules:
30  * 1. It is not allowed to write a disk block which is
31  *    currently referenced by the super block (either directly
32  *    or indirectly).
33  * 2. When a super block is written, it is verified that all
34  *    referenced (directly or indirectly) blocks fulfill the
35  *    following requirements:
36  *    2a. All referenced blocks have either been present when
37  *        the file system was mounted, (i.e., they have been
38  *        referenced by the super block) or they have been
39  *        written since then and the write completion callback
40  *        was called and no write error was indicated and a
41  *        FLUSH request to the device where these blocks are
42  *        located was received and completed.
43  *    2b. All referenced blocks need to have a generation
44  *        number which is equal to the parent's number.
45  *
46  * One issue that was found using this module was that the log
47  * tree on disk became temporarily corrupted because disk blocks
48  * that had been in use for the log tree had been freed and
49  * reused too early, while being referenced by the written super
50  * block.
51  *
52  * The search term in the kernel log that can be used to filter
53  * on the existence of detected integrity issues is
54  * "btrfs: attempt".
55  *
56  * The integrity check is enabled via mount options. These
57  * mount options are only supported if the integrity check
58  * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY.
59  *
60  * Example #1, apply integrity checks to all metadata:
61  * mount /dev/sdb1 /mnt -o check_int
62  *
63  * Example #2, apply integrity checks to all metadata and
64  * to data extents:
65  * mount /dev/sdb1 /mnt -o check_int_data
66  *
67  * Example #3, apply integrity checks to all metadata and dump
68  * the tree that the super block references to kernel messages
69  * each time after a super block was written:
70  * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263
71  *
72  * If the integrity check tool is included and activated in
73  * the mount options, plenty of kernel memory is used, and
74  * plenty of additional CPU cycles are spent. Enabling this
75  * functionality is not intended for normal use. In most
76  * cases, unless you are a btrfs developer who needs to verify
77  * the integrity of (super)-block write requests, do not
78  * enable the config option BTRFS_FS_CHECK_INTEGRITY to
79  * include and compile the integrity check tool.
80  *
81  * Expect millions of lines of information in the kernel log with an
82  * enabled check_int_print_mask. Therefore set LOG_BUF_SHIFT in the
83  * kernel config to at least 26 (which is 64MB). Usually the value is
84  * limited to 21 (which is 2MB) in init/Kconfig. The file needs to be
85  * changed like this before LOG_BUF_SHIFT can be set to a high value:
86  * config LOG_BUF_SHIFT
87  *       int "Kernel log buffer size (16 => 64KB, 17 => 128KB)"
88  *       range 12 30
89  */
90
91 #include <linux/sched.h>
92 #include <linux/slab.h>
93 #include <linux/buffer_head.h>
94 #include <linux/mutex.h>
95 #include <linux/genhd.h>
96 #include <linux/blkdev.h>
97 #include <linux/vmalloc.h>
98 #include "ctree.h"
99 #include "disk-io.h"
100 #include "hash.h"
101 #include "transaction.h"
102 #include "extent_io.h"
103 #include "volumes.h"
104 #include "print-tree.h"
105 #include "locking.h"
106 #include "check-integrity.h"
107 #include "rcu-string.h"
108
109 #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
110 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
111 #define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100
112 #define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051
113 #define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807
114 #define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530
115 #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300
116 #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6)    /* in characters,
117                                                          * excluding " [...]" */
118 #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1)
119
120 /*
121  * The definition of the bitmask fields for the print_mask.
122  * They are specified with the mount option check_integrity_print_mask.
123  */
124 #define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE                     0x00000001
125 #define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION         0x00000002
126 #define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE                  0x00000004
127 #define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE                 0x00000008
128 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH                        0x00000010
129 #define BTRFSIC_PRINT_MASK_END_IO_BIO_BH                        0x00000020
130 #define BTRFSIC_PRINT_MASK_VERBOSE                              0x00000040
131 #define BTRFSIC_PRINT_MASK_VERY_VERBOSE                         0x00000080
132 #define BTRFSIC_PRINT_MASK_INITIAL_TREE                         0x00000100
133 #define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES                    0x00000200
134 #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE                     0x00000400
135 #define BTRFSIC_PRINT_MASK_NUM_COPIES                           0x00000800
136 #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS                0x00001000
137 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE                0x00002000
138
139 struct btrfsic_dev_state;
140 struct btrfsic_state;
141
142 struct btrfsic_block {
143         u32 magic_num;          /* only used for debug purposes */
144         unsigned int is_metadata:1;     /* if it is meta-data, not data-data */
145         unsigned int is_superblock:1;   /* if it is one of the superblocks */
146         unsigned int is_iodone:1;       /* if is done by lower subsystem */
147         unsigned int iodone_w_error:1;  /* error was indicated to endio */
148         unsigned int never_written:1;   /* block was added because it was
149                                          * referenced, not because it was
150                                          * written */
151         unsigned int mirror_num;        /* large enough to hold
152                                          * BTRFS_SUPER_MIRROR_MAX */
153         struct btrfsic_dev_state *dev_state;
154         u64 dev_bytenr;         /* key, physical byte num on disk */
155         u64 logical_bytenr;     /* logical byte num on disk */
156         u64 generation;
157         struct btrfs_disk_key disk_key; /* extra info to print in case of
158                                          * issues, will not always be correct */
159         struct list_head collision_resolving_node;      /* list node */
160         struct list_head all_blocks_node;       /* list node */
161
162         /* the following two lists contain block_link items */
163         struct list_head ref_to_list;   /* list */
164         struct list_head ref_from_list; /* list */
165         struct btrfsic_block *next_in_same_bio;
166         void *orig_bio_bh_private;
167         union {
168                 bio_end_io_t *bio;
169                 bh_end_io_t *bh;
170         } orig_bio_bh_end_io;
171         int submit_bio_bh_rw;
172         u64 flush_gen; /* only valid if !never_written */
173 };
174
175 /*
176  * Elements of this type are allocated dynamically and required because
177  * each block object can refer to and can be ref from multiple blocks.
178  * The key to lookup them in the hashtable is the dev_bytenr of
179  * the block ref to plus the one from the block refered from.
180  * The fact that they are searchable via a hashtable and that a
181  * ref_cnt is maintained is not required for the btrfs integrity
182  * check algorithm itself, it is only used to make the output more
183  * beautiful in case that an error is detected (an error is defined
184  * as a write operation to a block while that block is still referenced).
185  */
186 struct btrfsic_block_link {
187         u32 magic_num;          /* only used for debug purposes */
188         u32 ref_cnt;
189         struct list_head node_ref_to;   /* list node */
190         struct list_head node_ref_from; /* list node */
191         struct list_head collision_resolving_node;      /* list node */
192         struct btrfsic_block *block_ref_to;
193         struct btrfsic_block *block_ref_from;
194         u64 parent_generation;
195 };
196
197 struct btrfsic_dev_state {
198         u32 magic_num;          /* only used for debug purposes */
199         struct block_device *bdev;
200         struct btrfsic_state *state;
201         struct list_head collision_resolving_node;      /* list node */
202         struct btrfsic_block dummy_block_for_bio_bh_flush;
203         u64 last_flush_gen;
204         char name[BDEVNAME_SIZE];
205 };
206
207 struct btrfsic_block_hashtable {
208         struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE];
209 };
210
211 struct btrfsic_block_link_hashtable {
212         struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE];
213 };
214
215 struct btrfsic_dev_state_hashtable {
216         struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE];
217 };
218
219 struct btrfsic_block_data_ctx {
220         u64 start;              /* virtual bytenr */
221         u64 dev_bytenr;         /* physical bytenr on device */
222         u32 len;
223         struct btrfsic_dev_state *dev;
224         char **datav;
225         struct page **pagev;
226         void *mem_to_free;
227 };
228
229 /* This structure is used to implement recursion without occupying
230  * any stack space, refer to btrfsic_process_metablock() */
231 struct btrfsic_stack_frame {
232         u32 magic;
233         u32 nr;
234         int error;
235         int i;
236         int limit_nesting;
237         int num_copies;
238         int mirror_num;
239         struct btrfsic_block *block;
240         struct btrfsic_block_data_ctx *block_ctx;
241         struct btrfsic_block *next_block;
242         struct btrfsic_block_data_ctx next_block_ctx;
243         struct btrfs_header *hdr;
244         struct btrfsic_stack_frame *prev;
245 };
246
247 /* Some state per mounted filesystem */
248 struct btrfsic_state {
249         u32 print_mask;
250         int include_extent_data;
251         int csum_size;
252         struct list_head all_blocks_list;
253         struct btrfsic_block_hashtable block_hashtable;
254         struct btrfsic_block_link_hashtable block_link_hashtable;
255         struct btrfs_root *root;
256         u64 max_superblock_generation;
257         struct btrfsic_block *latest_superblock;
258         u32 metablock_size;
259         u32 datablock_size;
260 };
261
262 static void btrfsic_block_init(struct btrfsic_block *b);
263 static struct btrfsic_block *btrfsic_block_alloc(void);
264 static void btrfsic_block_free(struct btrfsic_block *b);
265 static void btrfsic_block_link_init(struct btrfsic_block_link *n);
266 static struct btrfsic_block_link *btrfsic_block_link_alloc(void);
267 static void btrfsic_block_link_free(struct btrfsic_block_link *n);
268 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds);
269 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void);
270 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds);
271 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h);
272 static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
273                                         struct btrfsic_block_hashtable *h);
274 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b);
275 static struct btrfsic_block *btrfsic_block_hashtable_lookup(
276                 struct block_device *bdev,
277                 u64 dev_bytenr,
278                 struct btrfsic_block_hashtable *h);
279 static void btrfsic_block_link_hashtable_init(
280                 struct btrfsic_block_link_hashtable *h);
281 static void btrfsic_block_link_hashtable_add(
282                 struct btrfsic_block_link *l,
283                 struct btrfsic_block_link_hashtable *h);
284 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l);
285 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
286                 struct block_device *bdev_ref_to,
287                 u64 dev_bytenr_ref_to,
288                 struct block_device *bdev_ref_from,
289                 u64 dev_bytenr_ref_from,
290                 struct btrfsic_block_link_hashtable *h);
291 static void btrfsic_dev_state_hashtable_init(
292                 struct btrfsic_dev_state_hashtable *h);
293 static void btrfsic_dev_state_hashtable_add(
294                 struct btrfsic_dev_state *ds,
295                 struct btrfsic_dev_state_hashtable *h);
296 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds);
297 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
298                 struct block_device *bdev,
299                 struct btrfsic_dev_state_hashtable *h);
300 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void);
301 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf);
302 static int btrfsic_process_superblock(struct btrfsic_state *state,
303                                       struct btrfs_fs_devices *fs_devices);
304 static int btrfsic_process_metablock(struct btrfsic_state *state,
305                                      struct btrfsic_block *block,
306                                      struct btrfsic_block_data_ctx *block_ctx,
307                                      int limit_nesting, int force_iodone_flag);
308 static void btrfsic_read_from_block_data(
309         struct btrfsic_block_data_ctx *block_ctx,
310         void *dst, u32 offset, size_t len);
311 static int btrfsic_create_link_to_next_block(
312                 struct btrfsic_state *state,
313                 struct btrfsic_block *block,
314                 struct btrfsic_block_data_ctx
315                 *block_ctx, u64 next_bytenr,
316                 int limit_nesting,
317                 struct btrfsic_block_data_ctx *next_block_ctx,
318                 struct btrfsic_block **next_blockp,
319                 int force_iodone_flag,
320                 int *num_copiesp, int *mirror_nump,
321                 struct btrfs_disk_key *disk_key,
322                 u64 parent_generation);
323 static int btrfsic_handle_extent_data(struct btrfsic_state *state,
324                                       struct btrfsic_block *block,
325                                       struct btrfsic_block_data_ctx *block_ctx,
326                                       u32 item_offset, int force_iodone_flag);
327 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
328                              struct btrfsic_block_data_ctx *block_ctx_out,
329                              int mirror_num);
330 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
331 static int btrfsic_read_block(struct btrfsic_state *state,
332                               struct btrfsic_block_data_ctx *block_ctx);
333 static void btrfsic_dump_database(struct btrfsic_state *state);
334 static int btrfsic_test_for_metadata(struct btrfsic_state *state,
335                                      char **datav, unsigned int num_pages);
336 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
337                                           u64 dev_bytenr, char **mapped_datav,
338                                           unsigned int num_pages,
339                                           struct bio *bio, int *bio_is_patched,
340                                           struct buffer_head *bh,
341                                           int submit_bio_bh_rw);
342 static int btrfsic_process_written_superblock(
343                 struct btrfsic_state *state,
344                 struct btrfsic_block *const block,
345                 struct btrfs_super_block *const super_hdr);
346 static void btrfsic_bio_end_io(struct bio *bp);
347 static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate);
348 static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state,
349                                               const struct btrfsic_block *block,
350                                               int recursion_level);
351 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
352                                         struct btrfsic_block *const block,
353                                         int recursion_level);
354 static void btrfsic_print_add_link(const struct btrfsic_state *state,
355                                    const struct btrfsic_block_link *l);
356 static void btrfsic_print_rem_link(const struct btrfsic_state *state,
357                                    const struct btrfsic_block_link *l);
358 static char btrfsic_get_block_type(const struct btrfsic_state *state,
359                                    const struct btrfsic_block *block);
360 static void btrfsic_dump_tree(const struct btrfsic_state *state);
361 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
362                                   const struct btrfsic_block *block,
363                                   int indent_level);
364 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
365                 struct btrfsic_state *state,
366                 struct btrfsic_block_data_ctx *next_block_ctx,
367                 struct btrfsic_block *next_block,
368                 struct btrfsic_block *from_block,
369                 u64 parent_generation);
370 static struct btrfsic_block *btrfsic_block_lookup_or_add(
371                 struct btrfsic_state *state,
372                 struct btrfsic_block_data_ctx *block_ctx,
373                 const char *additional_string,
374                 int is_metadata,
375                 int is_iodone,
376                 int never_written,
377                 int mirror_num,
378                 int *was_created);
379 static int btrfsic_process_superblock_dev_mirror(
380                 struct btrfsic_state *state,
381                 struct btrfsic_dev_state *dev_state,
382                 struct btrfs_device *device,
383                 int superblock_mirror_num,
384                 struct btrfsic_dev_state **selected_dev_state,
385                 struct btrfs_super_block *selected_super);
386 static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
387                 struct block_device *bdev);
388 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
389                                            u64 bytenr,
390                                            struct btrfsic_dev_state *dev_state,
391                                            u64 dev_bytenr);
392
393 static struct mutex btrfsic_mutex;
394 static int btrfsic_is_initialized;
395 static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable;
396
397
398 static void btrfsic_block_init(struct btrfsic_block *b)
399 {
400         b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER;
401         b->dev_state = NULL;
402         b->dev_bytenr = 0;
403         b->logical_bytenr = 0;
404         b->generation = BTRFSIC_GENERATION_UNKNOWN;
405         b->disk_key.objectid = 0;
406         b->disk_key.type = 0;
407         b->disk_key.offset = 0;
408         b->is_metadata = 0;
409         b->is_superblock = 0;
410         b->is_iodone = 0;
411         b->iodone_w_error = 0;
412         b->never_written = 0;
413         b->mirror_num = 0;
414         b->next_in_same_bio = NULL;
415         b->orig_bio_bh_private = NULL;
416         b->orig_bio_bh_end_io.bio = NULL;
417         INIT_LIST_HEAD(&b->collision_resolving_node);
418         INIT_LIST_HEAD(&b->all_blocks_node);
419         INIT_LIST_HEAD(&b->ref_to_list);
420         INIT_LIST_HEAD(&b->ref_from_list);
421         b->submit_bio_bh_rw = 0;
422         b->flush_gen = 0;
423 }
424
425 static struct btrfsic_block *btrfsic_block_alloc(void)
426 {
427         struct btrfsic_block *b;
428
429         b = kzalloc(sizeof(*b), GFP_NOFS);
430         if (NULL != b)
431                 btrfsic_block_init(b);
432
433         return b;
434 }
435
436 static void btrfsic_block_free(struct btrfsic_block *b)
437 {
438         BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num));
439         kfree(b);
440 }
441
442 static void btrfsic_block_link_init(struct btrfsic_block_link *l)
443 {
444         l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER;
445         l->ref_cnt = 1;
446         INIT_LIST_HEAD(&l->node_ref_to);
447         INIT_LIST_HEAD(&l->node_ref_from);
448         INIT_LIST_HEAD(&l->collision_resolving_node);
449         l->block_ref_to = NULL;
450         l->block_ref_from = NULL;
451 }
452
453 static struct btrfsic_block_link *btrfsic_block_link_alloc(void)
454 {
455         struct btrfsic_block_link *l;
456
457         l = kzalloc(sizeof(*l), GFP_NOFS);
458         if (NULL != l)
459                 btrfsic_block_link_init(l);
460
461         return l;
462 }
463
464 static void btrfsic_block_link_free(struct btrfsic_block_link *l)
465 {
466         BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num));
467         kfree(l);
468 }
469
470 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds)
471 {
472         ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER;
473         ds->bdev = NULL;
474         ds->state = NULL;
475         ds->name[0] = '\0';
476         INIT_LIST_HEAD(&ds->collision_resolving_node);
477         ds->last_flush_gen = 0;
478         btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush);
479         ds->dummy_block_for_bio_bh_flush.is_iodone = 1;
480         ds->dummy_block_for_bio_bh_flush.dev_state = ds;
481 }
482
483 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void)
484 {
485         struct btrfsic_dev_state *ds;
486
487         ds = kzalloc(sizeof(*ds), GFP_NOFS);
488         if (NULL != ds)
489                 btrfsic_dev_state_init(ds);
490
491         return ds;
492 }
493
494 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds)
495 {
496         BUG_ON(!(NULL == ds ||
497                  BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num));
498         kfree(ds);
499 }
500
501 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h)
502 {
503         int i;
504
505         for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++)
506                 INIT_LIST_HEAD(h->table + i);
507 }
508
509 static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
510                                         struct btrfsic_block_hashtable *h)
511 {
512         const unsigned int hashval =
513             (((unsigned int)(b->dev_bytenr >> 16)) ^
514              ((unsigned int)((uintptr_t)b->dev_state->bdev))) &
515              (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
516
517         list_add(&b->collision_resolving_node, h->table + hashval);
518 }
519
520 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b)
521 {
522         list_del(&b->collision_resolving_node);
523 }
524
525 static struct btrfsic_block *btrfsic_block_hashtable_lookup(
526                 struct block_device *bdev,
527                 u64 dev_bytenr,
528                 struct btrfsic_block_hashtable *h)
529 {
530         const unsigned int hashval =
531             (((unsigned int)(dev_bytenr >> 16)) ^
532              ((unsigned int)((uintptr_t)bdev))) &
533              (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
534         struct btrfsic_block *b;
535
536         list_for_each_entry(b, h->table + hashval, collision_resolving_node) {
537                 if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr)
538                         return b;
539         }
540
541         return NULL;
542 }
543
544 static void btrfsic_block_link_hashtable_init(
545                 struct btrfsic_block_link_hashtable *h)
546 {
547         int i;
548
549         for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++)
550                 INIT_LIST_HEAD(h->table + i);
551 }
552
553 static void btrfsic_block_link_hashtable_add(
554                 struct btrfsic_block_link *l,
555                 struct btrfsic_block_link_hashtable *h)
556 {
557         const unsigned int hashval =
558             (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^
559              ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^
560              ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^
561              ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev)))
562              & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
563
564         BUG_ON(NULL == l->block_ref_to);
565         BUG_ON(NULL == l->block_ref_from);
566         list_add(&l->collision_resolving_node, h->table + hashval);
567 }
568
569 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l)
570 {
571         list_del(&l->collision_resolving_node);
572 }
573
574 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
575                 struct block_device *bdev_ref_to,
576                 u64 dev_bytenr_ref_to,
577                 struct block_device *bdev_ref_from,
578                 u64 dev_bytenr_ref_from,
579                 struct btrfsic_block_link_hashtable *h)
580 {
581         const unsigned int hashval =
582             (((unsigned int)(dev_bytenr_ref_to >> 16)) ^
583              ((unsigned int)(dev_bytenr_ref_from >> 16)) ^
584              ((unsigned int)((uintptr_t)bdev_ref_to)) ^
585              ((unsigned int)((uintptr_t)bdev_ref_from))) &
586              (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
587         struct btrfsic_block_link *l;
588
589         list_for_each_entry(l, h->table + hashval, collision_resolving_node) {
590                 BUG_ON(NULL == l->block_ref_to);
591                 BUG_ON(NULL == l->block_ref_from);
592                 if (l->block_ref_to->dev_state->bdev == bdev_ref_to &&
593                     l->block_ref_to->dev_bytenr == dev_bytenr_ref_to &&
594                     l->block_ref_from->dev_state->bdev == bdev_ref_from &&
595                     l->block_ref_from->dev_bytenr == dev_bytenr_ref_from)
596                         return l;
597         }
598
599         return NULL;
600 }
601
602 static void btrfsic_dev_state_hashtable_init(
603                 struct btrfsic_dev_state_hashtable *h)
604 {
605         int i;
606
607         for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++)
608                 INIT_LIST_HEAD(h->table + i);
609 }
610
611 static void btrfsic_dev_state_hashtable_add(
612                 struct btrfsic_dev_state *ds,
613                 struct btrfsic_dev_state_hashtable *h)
614 {
615         const unsigned int hashval =
616             (((unsigned int)((uintptr_t)ds->bdev)) &
617              (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
618
619         list_add(&ds->collision_resolving_node, h->table + hashval);
620 }
621
622 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds)
623 {
624         list_del(&ds->collision_resolving_node);
625 }
626
627 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
628                 struct block_device *bdev,
629                 struct btrfsic_dev_state_hashtable *h)
630 {
631         const unsigned int hashval =
632             (((unsigned int)((uintptr_t)bdev)) &
633              (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
634         struct btrfsic_dev_state *ds;
635
636         list_for_each_entry(ds, h->table + hashval, collision_resolving_node) {
637                 if (ds->bdev == bdev)
638                         return ds;
639         }
640
641         return NULL;
642 }
643
644 static int btrfsic_process_superblock(struct btrfsic_state *state,
645                                       struct btrfs_fs_devices *fs_devices)
646 {
647         int ret = 0;
648         struct btrfs_super_block *selected_super;
649         struct list_head *dev_head = &fs_devices->devices;
650         struct btrfs_device *device;
651         struct btrfsic_dev_state *selected_dev_state = NULL;
652         int pass;
653
654         BUG_ON(NULL == state);
655         selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
656         if (NULL == selected_super) {
657                 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
658                 return -ENOMEM;
659         }
660
661         list_for_each_entry(device, dev_head, dev_list) {
662                 int i;
663                 struct btrfsic_dev_state *dev_state;
664
665                 if (!device->bdev || !device->name)
666                         continue;
667
668                 dev_state = btrfsic_dev_state_lookup(device->bdev);
669                 BUG_ON(NULL == dev_state);
670                 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
671                         ret = btrfsic_process_superblock_dev_mirror(
672                                         state, dev_state, device, i,
673                                         &selected_dev_state, selected_super);
674                         if (0 != ret && 0 == i) {
675                                 kfree(selected_super);
676                                 return ret;
677                         }
678                 }
679         }
680
681         if (NULL == state->latest_superblock) {
682                 printk(KERN_INFO "btrfsic: no superblock found!\n");
683                 kfree(selected_super);
684                 return -1;
685         }
686
687         state->csum_size = btrfs_super_csum_size(selected_super);
688
689         for (pass = 0; pass < 3; pass++) {
690                 int num_copies;
691                 int mirror_num;
692                 u64 next_bytenr;
693
694                 switch (pass) {
695                 case 0:
696                         next_bytenr = btrfs_super_root(selected_super);
697                         if (state->print_mask &
698                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
699                                 printk(KERN_INFO "root@%llu\n", next_bytenr);
700                         break;
701                 case 1:
702                         next_bytenr = btrfs_super_chunk_root(selected_super);
703                         if (state->print_mask &
704                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
705                                 printk(KERN_INFO "chunk@%llu\n", next_bytenr);
706                         break;
707                 case 2:
708                         next_bytenr = btrfs_super_log_root(selected_super);
709                         if (0 == next_bytenr)
710                                 continue;
711                         if (state->print_mask &
712                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
713                                 printk(KERN_INFO "log@%llu\n", next_bytenr);
714                         break;
715                 }
716
717                 num_copies =
718                     btrfs_num_copies(state->root->fs_info,
719                                      next_bytenr, state->metablock_size);
720                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
721                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
722                                next_bytenr, num_copies);
723
724                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
725                         struct btrfsic_block *next_block;
726                         struct btrfsic_block_data_ctx tmp_next_block_ctx;
727                         struct btrfsic_block_link *l;
728
729                         ret = btrfsic_map_block(state, next_bytenr,
730                                                 state->metablock_size,
731                                                 &tmp_next_block_ctx,
732                                                 mirror_num);
733                         if (ret) {
734                                 printk(KERN_INFO "btrfsic:"
735                                        " btrfsic_map_block(root @%llu,"
736                                        " mirror %d) failed!\n",
737                                        next_bytenr, mirror_num);
738                                 kfree(selected_super);
739                                 return -1;
740                         }
741
742                         next_block = btrfsic_block_hashtable_lookup(
743                                         tmp_next_block_ctx.dev->bdev,
744                                         tmp_next_block_ctx.dev_bytenr,
745                                         &state->block_hashtable);
746                         BUG_ON(NULL == next_block);
747
748                         l = btrfsic_block_link_hashtable_lookup(
749                                         tmp_next_block_ctx.dev->bdev,
750                                         tmp_next_block_ctx.dev_bytenr,
751                                         state->latest_superblock->dev_state->
752                                         bdev,
753                                         state->latest_superblock->dev_bytenr,
754                                         &state->block_link_hashtable);
755                         BUG_ON(NULL == l);
756
757                         ret = btrfsic_read_block(state, &tmp_next_block_ctx);
758                         if (ret < (int)PAGE_CACHE_SIZE) {
759                                 printk(KERN_INFO
760                                        "btrfsic: read @logical %llu failed!\n",
761                                        tmp_next_block_ctx.start);
762                                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
763                                 kfree(selected_super);
764                                 return -1;
765                         }
766
767                         ret = btrfsic_process_metablock(state,
768                                                         next_block,
769                                                         &tmp_next_block_ctx,
770                                                         BTRFS_MAX_LEVEL + 3, 1);
771                         btrfsic_release_block_ctx(&tmp_next_block_ctx);
772                 }
773         }
774
775         kfree(selected_super);
776         return ret;
777 }
778
779 static int btrfsic_process_superblock_dev_mirror(
780                 struct btrfsic_state *state,
781                 struct btrfsic_dev_state *dev_state,
782                 struct btrfs_device *device,
783                 int superblock_mirror_num,
784                 struct btrfsic_dev_state **selected_dev_state,
785                 struct btrfs_super_block *selected_super)
786 {
787         struct btrfs_super_block *super_tmp;
788         u64 dev_bytenr;
789         struct buffer_head *bh;
790         struct btrfsic_block *superblock_tmp;
791         int pass;
792         struct block_device *const superblock_bdev = device->bdev;
793
794         /* super block bytenr is always the unmapped device bytenr */
795         dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
796         if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->commit_total_bytes)
797                 return -1;
798         bh = __bread(superblock_bdev, dev_bytenr / 4096,
799                      BTRFS_SUPER_INFO_SIZE);
800         if (NULL == bh)
801                 return -1;
802         super_tmp = (struct btrfs_super_block *)
803             (bh->b_data + (dev_bytenr & 4095));
804
805         if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
806             btrfs_super_magic(super_tmp) != BTRFS_MAGIC ||
807             memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
808             btrfs_super_nodesize(super_tmp) != state->metablock_size ||
809             btrfs_super_sectorsize(super_tmp) != state->datablock_size) {
810                 brelse(bh);
811                 return 0;
812         }
813
814         superblock_tmp =
815             btrfsic_block_hashtable_lookup(superblock_bdev,
816                                            dev_bytenr,
817                                            &state->block_hashtable);
818         if (NULL == superblock_tmp) {
819                 superblock_tmp = btrfsic_block_alloc();
820                 if (NULL == superblock_tmp) {
821                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
822                         brelse(bh);
823                         return -1;
824                 }
825                 /* for superblock, only the dev_bytenr makes sense */
826                 superblock_tmp->dev_bytenr = dev_bytenr;
827                 superblock_tmp->dev_state = dev_state;
828                 superblock_tmp->logical_bytenr = dev_bytenr;
829                 superblock_tmp->generation = btrfs_super_generation(super_tmp);
830                 superblock_tmp->is_metadata = 1;
831                 superblock_tmp->is_superblock = 1;
832                 superblock_tmp->is_iodone = 1;
833                 superblock_tmp->never_written = 0;
834                 superblock_tmp->mirror_num = 1 + superblock_mirror_num;
835                 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
836                         btrfs_info_in_rcu(device->dev_root->fs_info,
837                                 "new initial S-block (bdev %p, %s) @%llu (%s/%llu/%d)",
838                                      superblock_bdev,
839                                      rcu_str_deref(device->name), dev_bytenr,
840                                      dev_state->name, dev_bytenr,
841                                      superblock_mirror_num);
842                 list_add(&superblock_tmp->all_blocks_node,
843                          &state->all_blocks_list);
844                 btrfsic_block_hashtable_add(superblock_tmp,
845                                             &state->block_hashtable);
846         }
847
848         /* select the one with the highest generation field */
849         if (btrfs_super_generation(super_tmp) >
850             state->max_superblock_generation ||
851             0 == state->max_superblock_generation) {
852                 memcpy(selected_super, super_tmp, sizeof(*selected_super));
853                 *selected_dev_state = dev_state;
854                 state->max_superblock_generation =
855                     btrfs_super_generation(super_tmp);
856                 state->latest_superblock = superblock_tmp;
857         }
858
859         for (pass = 0; pass < 3; pass++) {
860                 u64 next_bytenr;
861                 int num_copies;
862                 int mirror_num;
863                 const char *additional_string = NULL;
864                 struct btrfs_disk_key tmp_disk_key;
865
866                 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
867                 tmp_disk_key.offset = 0;
868                 switch (pass) {
869                 case 0:
870                         btrfs_set_disk_key_objectid(&tmp_disk_key,
871                                                     BTRFS_ROOT_TREE_OBJECTID);
872                         additional_string = "initial root ";
873                         next_bytenr = btrfs_super_root(super_tmp);
874                         break;
875                 case 1:
876                         btrfs_set_disk_key_objectid(&tmp_disk_key,
877                                                     BTRFS_CHUNK_TREE_OBJECTID);
878                         additional_string = "initial chunk ";
879                         next_bytenr = btrfs_super_chunk_root(super_tmp);
880                         break;
881                 case 2:
882                         btrfs_set_disk_key_objectid(&tmp_disk_key,
883                                                     BTRFS_TREE_LOG_OBJECTID);
884                         additional_string = "initial log ";
885                         next_bytenr = btrfs_super_log_root(super_tmp);
886                         if (0 == next_bytenr)
887                                 continue;
888                         break;
889                 }
890
891                 num_copies =
892                     btrfs_num_copies(state->root->fs_info,
893                                      next_bytenr, state->metablock_size);
894                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
895                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
896                                next_bytenr, num_copies);
897                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
898                         struct btrfsic_block *next_block;
899                         struct btrfsic_block_data_ctx tmp_next_block_ctx;
900                         struct btrfsic_block_link *l;
901
902                         if (btrfsic_map_block(state, next_bytenr,
903                                               state->metablock_size,
904                                               &tmp_next_block_ctx,
905                                               mirror_num)) {
906                                 printk(KERN_INFO "btrfsic: btrfsic_map_block("
907                                        "bytenr @%llu, mirror %d) failed!\n",
908                                        next_bytenr, mirror_num);
909                                 brelse(bh);
910                                 return -1;
911                         }
912
913                         next_block = btrfsic_block_lookup_or_add(
914                                         state, &tmp_next_block_ctx,
915                                         additional_string, 1, 1, 0,
916                                         mirror_num, NULL);
917                         if (NULL == next_block) {
918                                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
919                                 brelse(bh);
920                                 return -1;
921                         }
922
923                         next_block->disk_key = tmp_disk_key;
924                         next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
925                         l = btrfsic_block_link_lookup_or_add(
926                                         state, &tmp_next_block_ctx,
927                                         next_block, superblock_tmp,
928                                         BTRFSIC_GENERATION_UNKNOWN);
929                         btrfsic_release_block_ctx(&tmp_next_block_ctx);
930                         if (NULL == l) {
931                                 brelse(bh);
932                                 return -1;
933                         }
934                 }
935         }
936         if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES)
937                 btrfsic_dump_tree_sub(state, superblock_tmp, 0);
938
939         brelse(bh);
940         return 0;
941 }
942
943 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void)
944 {
945         struct btrfsic_stack_frame *sf;
946
947         sf = kzalloc(sizeof(*sf), GFP_NOFS);
948         if (NULL == sf)
949                 printk(KERN_INFO "btrfsic: alloc memory failed!\n");
950         else
951                 sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER;
952         return sf;
953 }
954
955 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf)
956 {
957         BUG_ON(!(NULL == sf ||
958                  BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic));
959         kfree(sf);
960 }
961
962 static int btrfsic_process_metablock(
963                 struct btrfsic_state *state,
964                 struct btrfsic_block *const first_block,
965                 struct btrfsic_block_data_ctx *const first_block_ctx,
966                 int first_limit_nesting, int force_iodone_flag)
967 {
968         struct btrfsic_stack_frame initial_stack_frame = { 0 };
969         struct btrfsic_stack_frame *sf;
970         struct btrfsic_stack_frame *next_stack;
971         struct btrfs_header *const first_hdr =
972                 (struct btrfs_header *)first_block_ctx->datav[0];
973
974         BUG_ON(!first_hdr);
975         sf = &initial_stack_frame;
976         sf->error = 0;
977         sf->i = -1;
978         sf->limit_nesting = first_limit_nesting;
979         sf->block = first_block;
980         sf->block_ctx = first_block_ctx;
981         sf->next_block = NULL;
982         sf->hdr = first_hdr;
983         sf->prev = NULL;
984
985 continue_with_new_stack_frame:
986         sf->block->generation = le64_to_cpu(sf->hdr->generation);
987         if (0 == sf->hdr->level) {
988                 struct btrfs_leaf *const leafhdr =
989                     (struct btrfs_leaf *)sf->hdr;
990
991                 if (-1 == sf->i) {
992                         sf->nr = btrfs_stack_header_nritems(&leafhdr->header);
993
994                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
995                                 printk(KERN_INFO
996                                        "leaf %llu items %d generation %llu"
997                                        " owner %llu\n",
998                                        sf->block_ctx->start, sf->nr,
999                                        btrfs_stack_header_generation(
1000                                                &leafhdr->header),
1001                                        btrfs_stack_header_owner(
1002                                                &leafhdr->header));
1003                 }
1004
1005 continue_with_current_leaf_stack_frame:
1006                 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1007                         sf->i++;
1008                         sf->num_copies = 0;
1009                 }
1010
1011                 if (sf->i < sf->nr) {
1012                         struct btrfs_item disk_item;
1013                         u32 disk_item_offset =
1014                                 (uintptr_t)(leafhdr->items + sf->i) -
1015                                 (uintptr_t)leafhdr;
1016                         struct btrfs_disk_key *disk_key;
1017                         u8 type;
1018                         u32 item_offset;
1019                         u32 item_size;
1020
1021                         if (disk_item_offset + sizeof(struct btrfs_item) >
1022                             sf->block_ctx->len) {
1023 leaf_item_out_of_bounce_error:
1024                                 printk(KERN_INFO
1025                                        "btrfsic: leaf item out of bounce at logical %llu, dev %s\n",
1026                                        sf->block_ctx->start,
1027                                        sf->block_ctx->dev->name);
1028                                 goto one_stack_frame_backwards;
1029                         }
1030                         btrfsic_read_from_block_data(sf->block_ctx,
1031                                                      &disk_item,
1032                                                      disk_item_offset,
1033                                                      sizeof(struct btrfs_item));
1034                         item_offset = btrfs_stack_item_offset(&disk_item);
1035                         item_size = btrfs_stack_item_size(&disk_item);
1036                         disk_key = &disk_item.key;
1037                         type = btrfs_disk_key_type(disk_key);
1038
1039                         if (BTRFS_ROOT_ITEM_KEY == type) {
1040                                 struct btrfs_root_item root_item;
1041                                 u32 root_item_offset;
1042                                 u64 next_bytenr;
1043
1044                                 root_item_offset = item_offset +
1045                                         offsetof(struct btrfs_leaf, items);
1046                                 if (root_item_offset + item_size >
1047                                     sf->block_ctx->len)
1048                                         goto leaf_item_out_of_bounce_error;
1049                                 btrfsic_read_from_block_data(
1050                                         sf->block_ctx, &root_item,
1051                                         root_item_offset,
1052                                         item_size);
1053                                 next_bytenr = btrfs_root_bytenr(&root_item);
1054
1055                                 sf->error =
1056                                     btrfsic_create_link_to_next_block(
1057                                                 state,
1058                                                 sf->block,
1059                                                 sf->block_ctx,
1060                                                 next_bytenr,
1061                                                 sf->limit_nesting,
1062                                                 &sf->next_block_ctx,
1063                                                 &sf->next_block,
1064                                                 force_iodone_flag,
1065                                                 &sf->num_copies,
1066                                                 &sf->mirror_num,
1067                                                 disk_key,
1068                                                 btrfs_root_generation(
1069                                                 &root_item));
1070                                 if (sf->error)
1071                                         goto one_stack_frame_backwards;
1072
1073                                 if (NULL != sf->next_block) {
1074                                         struct btrfs_header *const next_hdr =
1075                                             (struct btrfs_header *)
1076                                             sf->next_block_ctx.datav[0];
1077
1078                                         next_stack =
1079                                             btrfsic_stack_frame_alloc();
1080                                         if (NULL == next_stack) {
1081                                                 sf->error = -1;
1082                                                 btrfsic_release_block_ctx(
1083                                                                 &sf->
1084                                                                 next_block_ctx);
1085                                                 goto one_stack_frame_backwards;
1086                                         }
1087
1088                                         next_stack->i = -1;
1089                                         next_stack->block = sf->next_block;
1090                                         next_stack->block_ctx =
1091                                             &sf->next_block_ctx;
1092                                         next_stack->next_block = NULL;
1093                                         next_stack->hdr = next_hdr;
1094                                         next_stack->limit_nesting =
1095                                             sf->limit_nesting - 1;
1096                                         next_stack->prev = sf;
1097                                         sf = next_stack;
1098                                         goto continue_with_new_stack_frame;
1099                                 }
1100                         } else if (BTRFS_EXTENT_DATA_KEY == type &&
1101                                    state->include_extent_data) {
1102                                 sf->error = btrfsic_handle_extent_data(
1103                                                 state,
1104                                                 sf->block,
1105                                                 sf->block_ctx,
1106                                                 item_offset,
1107                                                 force_iodone_flag);
1108                                 if (sf->error)
1109                                         goto one_stack_frame_backwards;
1110                         }
1111
1112                         goto continue_with_current_leaf_stack_frame;
1113                 }
1114         } else {
1115                 struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr;
1116
1117                 if (-1 == sf->i) {
1118                         sf->nr = btrfs_stack_header_nritems(&nodehdr->header);
1119
1120                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1121                                 printk(KERN_INFO "node %llu level %d items %d"
1122                                        " generation %llu owner %llu\n",
1123                                        sf->block_ctx->start,
1124                                        nodehdr->header.level, sf->nr,
1125                                        btrfs_stack_header_generation(
1126                                        &nodehdr->header),
1127                                        btrfs_stack_header_owner(
1128                                        &nodehdr->header));
1129                 }
1130
1131 continue_with_current_node_stack_frame:
1132                 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1133                         sf->i++;
1134                         sf->num_copies = 0;
1135                 }
1136
1137                 if (sf->i < sf->nr) {
1138                         struct btrfs_key_ptr key_ptr;
1139                         u32 key_ptr_offset;
1140                         u64 next_bytenr;
1141
1142                         key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) -
1143                                           (uintptr_t)nodehdr;
1144                         if (key_ptr_offset + sizeof(struct btrfs_key_ptr) >
1145                             sf->block_ctx->len) {
1146                                 printk(KERN_INFO
1147                                        "btrfsic: node item out of bounce at logical %llu, dev %s\n",
1148                                        sf->block_ctx->start,
1149                                        sf->block_ctx->dev->name);
1150                                 goto one_stack_frame_backwards;
1151                         }
1152                         btrfsic_read_from_block_data(
1153                                 sf->block_ctx, &key_ptr, key_ptr_offset,
1154                                 sizeof(struct btrfs_key_ptr));
1155                         next_bytenr = btrfs_stack_key_blockptr(&key_ptr);
1156
1157                         sf->error = btrfsic_create_link_to_next_block(
1158                                         state,
1159                                         sf->block,
1160                                         sf->block_ctx,
1161                                         next_bytenr,
1162                                         sf->limit_nesting,
1163                                         &sf->next_block_ctx,
1164                                         &sf->next_block,
1165                                         force_iodone_flag,
1166                                         &sf->num_copies,
1167                                         &sf->mirror_num,
1168                                         &key_ptr.key,
1169                                         btrfs_stack_key_generation(&key_ptr));
1170                         if (sf->error)
1171                                 goto one_stack_frame_backwards;
1172
1173                         if (NULL != sf->next_block) {
1174                                 struct btrfs_header *const next_hdr =
1175                                     (struct btrfs_header *)
1176                                     sf->next_block_ctx.datav[0];
1177
1178                                 next_stack = btrfsic_stack_frame_alloc();
1179                                 if (NULL == next_stack) {
1180                                         sf->error = -1;
1181                                         goto one_stack_frame_backwards;
1182                                 }
1183
1184                                 next_stack->i = -1;
1185                                 next_stack->block = sf->next_block;
1186                                 next_stack->block_ctx = &sf->next_block_ctx;
1187                                 next_stack->next_block = NULL;
1188                                 next_stack->hdr = next_hdr;
1189                                 next_stack->limit_nesting =
1190                                     sf->limit_nesting - 1;
1191                                 next_stack->prev = sf;
1192                                 sf = next_stack;
1193                                 goto continue_with_new_stack_frame;
1194                         }
1195
1196                         goto continue_with_current_node_stack_frame;
1197                 }
1198         }
1199
1200 one_stack_frame_backwards:
1201         if (NULL != sf->prev) {
1202                 struct btrfsic_stack_frame *const prev = sf->prev;
1203
1204                 /* the one for the initial block is freed in the caller */
1205                 btrfsic_release_block_ctx(sf->block_ctx);
1206
1207                 if (sf->error) {
1208                         prev->error = sf->error;
1209                         btrfsic_stack_frame_free(sf);
1210                         sf = prev;
1211                         goto one_stack_frame_backwards;
1212                 }
1213
1214                 btrfsic_stack_frame_free(sf);
1215                 sf = prev;
1216                 goto continue_with_new_stack_frame;
1217         } else {
1218                 BUG_ON(&initial_stack_frame != sf);
1219         }
1220
1221         return sf->error;
1222 }
1223
1224 static void btrfsic_read_from_block_data(
1225         struct btrfsic_block_data_ctx *block_ctx,
1226         void *dstv, u32 offset, size_t len)
1227 {
1228         size_t cur;
1229         size_t offset_in_page;
1230         char *kaddr;
1231         char *dst = (char *)dstv;
1232         size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1);
1233         unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT;
1234
1235         WARN_ON(offset + len > block_ctx->len);
1236         offset_in_page = (start_offset + offset) & (PAGE_CACHE_SIZE - 1);
1237
1238         while (len > 0) {
1239                 cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page));
1240                 BUG_ON(i >= DIV_ROUND_UP(block_ctx->len, PAGE_CACHE_SIZE));
1241                 kaddr = block_ctx->datav[i];
1242                 memcpy(dst, kaddr + offset_in_page, cur);
1243
1244                 dst += cur;
1245                 len -= cur;
1246                 offset_in_page = 0;
1247                 i++;
1248         }
1249 }
1250
1251 static int btrfsic_create_link_to_next_block(
1252                 struct btrfsic_state *state,
1253                 struct btrfsic_block *block,
1254                 struct btrfsic_block_data_ctx *block_ctx,
1255                 u64 next_bytenr,
1256                 int limit_nesting,
1257                 struct btrfsic_block_data_ctx *next_block_ctx,
1258                 struct btrfsic_block **next_blockp,
1259                 int force_iodone_flag,
1260                 int *num_copiesp, int *mirror_nump,
1261                 struct btrfs_disk_key *disk_key,
1262                 u64 parent_generation)
1263 {
1264         struct btrfsic_block *next_block = NULL;
1265         int ret;
1266         struct btrfsic_block_link *l;
1267         int did_alloc_block_link;
1268         int block_was_created;
1269
1270         *next_blockp = NULL;
1271         if (0 == *num_copiesp) {
1272                 *num_copiesp =
1273                     btrfs_num_copies(state->root->fs_info,
1274                                      next_bytenr, state->metablock_size);
1275                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1276                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1277                                next_bytenr, *num_copiesp);
1278                 *mirror_nump = 1;
1279         }
1280
1281         if (*mirror_nump > *num_copiesp)
1282                 return 0;
1283
1284         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1285                 printk(KERN_INFO
1286                        "btrfsic_create_link_to_next_block(mirror_num=%d)\n",
1287                        *mirror_nump);
1288         ret = btrfsic_map_block(state, next_bytenr,
1289                                 state->metablock_size,
1290                                 next_block_ctx, *mirror_nump);
1291         if (ret) {
1292                 printk(KERN_INFO
1293                        "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
1294                        next_bytenr, *mirror_nump);
1295                 btrfsic_release_block_ctx(next_block_ctx);
1296                 *next_blockp = NULL;
1297                 return -1;
1298         }
1299
1300         next_block = btrfsic_block_lookup_or_add(state,
1301                                                  next_block_ctx, "referenced ",
1302                                                  1, force_iodone_flag,
1303                                                  !force_iodone_flag,
1304                                                  *mirror_nump,
1305                                                  &block_was_created);
1306         if (NULL == next_block) {
1307                 btrfsic_release_block_ctx(next_block_ctx);
1308                 *next_blockp = NULL;
1309                 return -1;
1310         }
1311         if (block_was_created) {
1312                 l = NULL;
1313                 next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
1314         } else {
1315                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) {
1316                         if (next_block->logical_bytenr != next_bytenr &&
1317                             !(!next_block->is_metadata &&
1318                               0 == next_block->logical_bytenr))
1319                                 printk(KERN_INFO
1320                                        "Referenced block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n",
1321                                        next_bytenr, next_block_ctx->dev->name,
1322                                        next_block_ctx->dev_bytenr, *mirror_nump,
1323                                        btrfsic_get_block_type(state,
1324                                                               next_block),
1325                                        next_block->logical_bytenr);
1326                         else
1327                                 printk(KERN_INFO
1328                                        "Referenced block @%llu (%s/%llu/%d) found in hash table, %c.\n",
1329                                        next_bytenr, next_block_ctx->dev->name,
1330                                        next_block_ctx->dev_bytenr, *mirror_nump,
1331                                        btrfsic_get_block_type(state,
1332                                                               next_block));
1333                 }
1334                 next_block->logical_bytenr = next_bytenr;
1335
1336                 next_block->mirror_num = *mirror_nump;
1337                 l = btrfsic_block_link_hashtable_lookup(
1338                                 next_block_ctx->dev->bdev,
1339                                 next_block_ctx->dev_bytenr,
1340                                 block_ctx->dev->bdev,
1341                                 block_ctx->dev_bytenr,
1342                                 &state->block_link_hashtable);
1343         }
1344
1345         next_block->disk_key = *disk_key;
1346         if (NULL == l) {
1347                 l = btrfsic_block_link_alloc();
1348                 if (NULL == l) {
1349                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
1350                         btrfsic_release_block_ctx(next_block_ctx);
1351                         *next_blockp = NULL;
1352                         return -1;
1353                 }
1354
1355                 did_alloc_block_link = 1;
1356                 l->block_ref_to = next_block;
1357                 l->block_ref_from = block;
1358                 l->ref_cnt = 1;
1359                 l->parent_generation = parent_generation;
1360
1361                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1362                         btrfsic_print_add_link(state, l);
1363
1364                 list_add(&l->node_ref_to, &block->ref_to_list);
1365                 list_add(&l->node_ref_from, &next_block->ref_from_list);
1366
1367                 btrfsic_block_link_hashtable_add(l,
1368                                                  &state->block_link_hashtable);
1369         } else {
1370                 did_alloc_block_link = 0;
1371                 if (0 == limit_nesting) {
1372                         l->ref_cnt++;
1373                         l->parent_generation = parent_generation;
1374                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1375                                 btrfsic_print_add_link(state, l);
1376                 }
1377         }
1378
1379         if (limit_nesting > 0 && did_alloc_block_link) {
1380                 ret = btrfsic_read_block(state, next_block_ctx);
1381                 if (ret < (int)next_block_ctx->len) {
1382                         printk(KERN_INFO
1383                                "btrfsic: read block @logical %llu failed!\n",
1384                                next_bytenr);
1385                         btrfsic_release_block_ctx(next_block_ctx);
1386                         *next_blockp = NULL;
1387                         return -1;
1388                 }
1389
1390                 *next_blockp = next_block;
1391         } else {
1392                 *next_blockp = NULL;
1393         }
1394         (*mirror_nump)++;
1395
1396         return 0;
1397 }
1398
1399 static int btrfsic_handle_extent_data(
1400                 struct btrfsic_state *state,
1401                 struct btrfsic_block *block,
1402                 struct btrfsic_block_data_ctx *block_ctx,
1403                 u32 item_offset, int force_iodone_flag)
1404 {
1405         int ret;
1406         struct btrfs_file_extent_item file_extent_item;
1407         u64 file_extent_item_offset;
1408         u64 next_bytenr;
1409         u64 num_bytes;
1410         u64 generation;
1411         struct btrfsic_block_link *l;
1412
1413         file_extent_item_offset = offsetof(struct btrfs_leaf, items) +
1414                                   item_offset;
1415         if (file_extent_item_offset +
1416             offsetof(struct btrfs_file_extent_item, disk_num_bytes) >
1417             block_ctx->len) {
1418                 printk(KERN_INFO
1419                        "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1420                        block_ctx->start, block_ctx->dev->name);
1421                 return -1;
1422         }
1423
1424         btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1425                 file_extent_item_offset,
1426                 offsetof(struct btrfs_file_extent_item, disk_num_bytes));
1427         if (BTRFS_FILE_EXTENT_REG != file_extent_item.type ||
1428             btrfs_stack_file_extent_disk_bytenr(&file_extent_item) == 0) {
1429                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1430                         printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n",
1431                                file_extent_item.type,
1432                                btrfs_stack_file_extent_disk_bytenr(
1433                                &file_extent_item));
1434                 return 0;
1435         }
1436
1437         if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) >
1438             block_ctx->len) {
1439                 printk(KERN_INFO
1440                        "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1441                        block_ctx->start, block_ctx->dev->name);
1442                 return -1;
1443         }
1444         btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1445                                      file_extent_item_offset,
1446                                      sizeof(struct btrfs_file_extent_item));
1447         next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item);
1448         if (btrfs_stack_file_extent_compression(&file_extent_item) ==
1449             BTRFS_COMPRESS_NONE) {
1450                 next_bytenr += btrfs_stack_file_extent_offset(&file_extent_item);
1451                 num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item);
1452         } else {
1453                 num_bytes = btrfs_stack_file_extent_disk_num_bytes(&file_extent_item);
1454         }
1455         generation = btrfs_stack_file_extent_generation(&file_extent_item);
1456
1457         if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1458                 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu,"
1459                        " offset = %llu, num_bytes = %llu\n",
1460                        file_extent_item.type,
1461                        btrfs_stack_file_extent_disk_bytenr(&file_extent_item),
1462                        btrfs_stack_file_extent_offset(&file_extent_item),
1463                        num_bytes);
1464         while (num_bytes > 0) {
1465                 u32 chunk_len;
1466                 int num_copies;
1467                 int mirror_num;
1468
1469                 if (num_bytes > state->datablock_size)
1470                         chunk_len = state->datablock_size;
1471                 else
1472                         chunk_len = num_bytes;
1473
1474                 num_copies =
1475                     btrfs_num_copies(state->root->fs_info,
1476                                      next_bytenr, state->datablock_size);
1477                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1478                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1479                                next_bytenr, num_copies);
1480                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
1481                         struct btrfsic_block_data_ctx next_block_ctx;
1482                         struct btrfsic_block *next_block;
1483                         int block_was_created;
1484
1485                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1486                                 printk(KERN_INFO "btrfsic_handle_extent_data("
1487                                        "mirror_num=%d)\n", mirror_num);
1488                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1489                                 printk(KERN_INFO
1490                                        "\tdisk_bytenr = %llu, num_bytes %u\n",
1491                                        next_bytenr, chunk_len);
1492                         ret = btrfsic_map_block(state, next_bytenr,
1493                                                 chunk_len, &next_block_ctx,
1494                                                 mirror_num);
1495                         if (ret) {
1496                                 printk(KERN_INFO
1497                                        "btrfsic: btrfsic_map_block(@%llu,"
1498                                        " mirror=%d) failed!\n",
1499                                        next_bytenr, mirror_num);
1500                                 return -1;
1501                         }
1502
1503                         next_block = btrfsic_block_lookup_or_add(
1504                                         state,
1505                                         &next_block_ctx,
1506                                         "referenced ",
1507                                         0,
1508                                         force_iodone_flag,
1509                                         !force_iodone_flag,
1510                                         mirror_num,
1511                                         &block_was_created);
1512                         if (NULL == next_block) {
1513                                 printk(KERN_INFO
1514                                        "btrfsic: error, kmalloc failed!\n");
1515                                 btrfsic_release_block_ctx(&next_block_ctx);
1516                                 return -1;
1517                         }
1518                         if (!block_was_created) {
1519                                 if ((state->print_mask &
1520                                      BTRFSIC_PRINT_MASK_VERBOSE) &&
1521                                     next_block->logical_bytenr != next_bytenr &&
1522                                     !(!next_block->is_metadata &&
1523                                       0 == next_block->logical_bytenr)) {
1524                                         printk(KERN_INFO
1525                                                "Referenced block"
1526                                                " @%llu (%s/%llu/%d)"
1527                                                " found in hash table, D,"
1528                                                " bytenr mismatch"
1529                                                " (!= stored %llu).\n",
1530                                                next_bytenr,
1531                                                next_block_ctx.dev->name,
1532                                                next_block_ctx.dev_bytenr,
1533                                                mirror_num,
1534                                                next_block->logical_bytenr);
1535                                 }
1536                                 next_block->logical_bytenr = next_bytenr;
1537                                 next_block->mirror_num = mirror_num;
1538                         }
1539
1540                         l = btrfsic_block_link_lookup_or_add(state,
1541                                                              &next_block_ctx,
1542                                                              next_block, block,
1543                                                              generation);
1544                         btrfsic_release_block_ctx(&next_block_ctx);
1545                         if (NULL == l)
1546                                 return -1;
1547                 }
1548
1549                 next_bytenr += chunk_len;
1550                 num_bytes -= chunk_len;
1551         }
1552
1553         return 0;
1554 }
1555
1556 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
1557                              struct btrfsic_block_data_ctx *block_ctx_out,
1558                              int mirror_num)
1559 {
1560         int ret;
1561         u64 length;
1562         struct btrfs_bio *multi = NULL;
1563         struct btrfs_device *device;
1564
1565         length = len;
1566         ret = btrfs_map_block(state->root->fs_info, READ,
1567                               bytenr, &length, &multi, mirror_num);
1568
1569         if (ret) {
1570                 block_ctx_out->start = 0;
1571                 block_ctx_out->dev_bytenr = 0;
1572                 block_ctx_out->len = 0;
1573                 block_ctx_out->dev = NULL;
1574                 block_ctx_out->datav = NULL;
1575                 block_ctx_out->pagev = NULL;
1576                 block_ctx_out->mem_to_free = NULL;
1577
1578                 return ret;
1579         }
1580
1581         device = multi->stripes[0].dev;
1582         block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev);
1583         block_ctx_out->dev_bytenr = multi->stripes[0].physical;
1584         block_ctx_out->start = bytenr;
1585         block_ctx_out->len = len;
1586         block_ctx_out->datav = NULL;
1587         block_ctx_out->pagev = NULL;
1588         block_ctx_out->mem_to_free = NULL;
1589
1590         kfree(multi);
1591         if (NULL == block_ctx_out->dev) {
1592                 ret = -ENXIO;
1593                 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n");
1594         }
1595
1596         return ret;
1597 }
1598
1599 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
1600 {
1601         if (block_ctx->mem_to_free) {
1602                 unsigned int num_pages;
1603
1604                 BUG_ON(!block_ctx->datav);
1605                 BUG_ON(!block_ctx->pagev);
1606                 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1607                             PAGE_CACHE_SHIFT;
1608                 while (num_pages > 0) {
1609                         num_pages--;
1610                         if (block_ctx->datav[num_pages]) {
1611                                 kunmap(block_ctx->pagev[num_pages]);
1612                                 block_ctx->datav[num_pages] = NULL;
1613                         }
1614                         if (block_ctx->pagev[num_pages]) {
1615                                 __free_page(block_ctx->pagev[num_pages]);
1616                                 block_ctx->pagev[num_pages] = NULL;
1617                         }
1618                 }
1619
1620                 kfree(block_ctx->mem_to_free);
1621                 block_ctx->mem_to_free = NULL;
1622                 block_ctx->pagev = NULL;
1623                 block_ctx->datav = NULL;
1624         }
1625 }
1626
1627 static int btrfsic_read_block(struct btrfsic_state *state,
1628                               struct btrfsic_block_data_ctx *block_ctx)
1629 {
1630         unsigned int num_pages;
1631         unsigned int i;
1632         u64 dev_bytenr;
1633         int ret;
1634
1635         BUG_ON(block_ctx->datav);
1636         BUG_ON(block_ctx->pagev);
1637         BUG_ON(block_ctx->mem_to_free);
1638         if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) {
1639                 printk(KERN_INFO
1640                        "btrfsic: read_block() with unaligned bytenr %llu\n",
1641                        block_ctx->dev_bytenr);
1642                 return -1;
1643         }
1644
1645         num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1646                     PAGE_CACHE_SHIFT;
1647         block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) +
1648                                           sizeof(*block_ctx->pagev)) *
1649                                          num_pages, GFP_NOFS);
1650         if (!block_ctx->mem_to_free)
1651                 return -ENOMEM;
1652         block_ctx->datav = block_ctx->mem_to_free;
1653         block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages);
1654         for (i = 0; i < num_pages; i++) {
1655                 block_ctx->pagev[i] = alloc_page(GFP_NOFS);
1656                 if (!block_ctx->pagev[i])
1657                         return -1;
1658         }
1659
1660         dev_bytenr = block_ctx->dev_bytenr;
1661         for (i = 0; i < num_pages;) {
1662                 struct bio *bio;
1663                 unsigned int j;
1664
1665                 bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i);
1666                 if (!bio) {
1667                         printk(KERN_INFO
1668                                "btrfsic: bio_alloc() for %u pages failed!\n",
1669                                num_pages - i);
1670                         return -1;
1671                 }
1672                 bio->bi_bdev = block_ctx->dev->bdev;
1673                 bio->bi_iter.bi_sector = dev_bytenr >> 9;
1674
1675                 for (j = i; j < num_pages; j++) {
1676                         ret = bio_add_page(bio, block_ctx->pagev[j],
1677                                            PAGE_CACHE_SIZE, 0);
1678                         if (PAGE_CACHE_SIZE != ret)
1679                                 break;
1680                 }
1681                 if (j == i) {
1682                         printk(KERN_INFO
1683                                "btrfsic: error, failed to add a single page!\n");
1684                         return -1;
1685                 }
1686                 if (submit_bio_wait(READ, bio)) {
1687                         printk(KERN_INFO
1688                                "btrfsic: read error at logical %llu dev %s!\n",
1689                                block_ctx->start, block_ctx->dev->name);
1690                         bio_put(bio);
1691                         return -1;
1692                 }
1693                 bio_put(bio);
1694                 dev_bytenr += (j - i) * PAGE_CACHE_SIZE;
1695                 i = j;
1696         }
1697         for (i = 0; i < num_pages; i++) {
1698                 block_ctx->datav[i] = kmap(block_ctx->pagev[i]);
1699                 if (!block_ctx->datav[i]) {
1700                         printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n",
1701                                block_ctx->dev->name);
1702                         return -1;
1703                 }
1704         }
1705
1706         return block_ctx->len;
1707 }
1708
1709 static void btrfsic_dump_database(struct btrfsic_state *state)
1710 {
1711         const struct btrfsic_block *b_all;
1712
1713         BUG_ON(NULL == state);
1714
1715         printk(KERN_INFO "all_blocks_list:\n");
1716         list_for_each_entry(b_all, &state->all_blocks_list, all_blocks_node) {
1717                 const struct btrfsic_block_link *l;
1718
1719                 printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n",
1720                        btrfsic_get_block_type(state, b_all),
1721                        b_all->logical_bytenr, b_all->dev_state->name,
1722                        b_all->dev_bytenr, b_all->mirror_num);
1723
1724                 list_for_each_entry(l, &b_all->ref_to_list, node_ref_to) {
1725                         printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
1726                                " refers %u* to"
1727                                " %c @%llu (%s/%llu/%d)\n",
1728                                btrfsic_get_block_type(state, b_all),
1729                                b_all->logical_bytenr, b_all->dev_state->name,
1730                                b_all->dev_bytenr, b_all->mirror_num,
1731                                l->ref_cnt,
1732                                btrfsic_get_block_type(state, l->block_ref_to),
1733                                l->block_ref_to->logical_bytenr,
1734                                l->block_ref_to->dev_state->name,
1735                                l->block_ref_to->dev_bytenr,
1736                                l->block_ref_to->mirror_num);
1737                 }
1738
1739                 list_for_each_entry(l, &b_all->ref_from_list, node_ref_from) {
1740                         printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
1741                                " is ref %u* from"
1742                                " %c @%llu (%s/%llu/%d)\n",
1743                                btrfsic_get_block_type(state, b_all),
1744                                b_all->logical_bytenr, b_all->dev_state->name,
1745                                b_all->dev_bytenr, b_all->mirror_num,
1746                                l->ref_cnt,
1747                                btrfsic_get_block_type(state, l->block_ref_from),
1748                                l->block_ref_from->logical_bytenr,
1749                                l->block_ref_from->dev_state->name,
1750                                l->block_ref_from->dev_bytenr,
1751                                l->block_ref_from->mirror_num);
1752                 }
1753
1754                 printk(KERN_INFO "\n");
1755         }
1756 }
1757
1758 /*
1759  * Test whether the disk block contains a tree block (leaf or node)
1760  * (note that this test fails for the super block)
1761  */
1762 static int btrfsic_test_for_metadata(struct btrfsic_state *state,
1763                                      char **datav, unsigned int num_pages)
1764 {
1765         struct btrfs_header *h;
1766         u8 csum[BTRFS_CSUM_SIZE];
1767         u32 crc = ~(u32)0;
1768         unsigned int i;
1769
1770         if (num_pages * PAGE_CACHE_SIZE < state->metablock_size)
1771                 return 1; /* not metadata */
1772         num_pages = state->metablock_size >> PAGE_CACHE_SHIFT;
1773         h = (struct btrfs_header *)datav[0];
1774
1775         if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE))
1776                 return 1;
1777
1778         for (i = 0; i < num_pages; i++) {
1779                 u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
1780                 size_t sublen = i ? PAGE_CACHE_SIZE :
1781                                     (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE);
1782
1783                 crc = btrfs_crc32c(crc, data, sublen);
1784         }
1785         btrfs_csum_final(crc, csum);
1786         if (memcmp(csum, h->csum, state->csum_size))
1787                 return 1;
1788
1789         return 0; /* is metadata */
1790 }
1791
1792 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1793                                           u64 dev_bytenr, char **mapped_datav,
1794                                           unsigned int num_pages,
1795                                           struct bio *bio, int *bio_is_patched,
1796                                           struct buffer_head *bh,
1797                                           int submit_bio_bh_rw)
1798 {
1799         int is_metadata;
1800         struct btrfsic_block *block;
1801         struct btrfsic_block_data_ctx block_ctx;
1802         int ret;
1803         struct btrfsic_state *state = dev_state->state;
1804         struct block_device *bdev = dev_state->bdev;
1805         unsigned int processed_len;
1806
1807         if (NULL != bio_is_patched)
1808                 *bio_is_patched = 0;
1809
1810 again:
1811         if (num_pages == 0)
1812                 return;
1813
1814         processed_len = 0;
1815         is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav,
1816                                                       num_pages));
1817
1818         block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr,
1819                                                &state->block_hashtable);
1820         if (NULL != block) {
1821                 u64 bytenr = 0;
1822                 struct btrfsic_block_link *l, *tmp;
1823
1824                 if (block->is_superblock) {
1825                         bytenr = btrfs_super_bytenr((struct btrfs_super_block *)
1826                                                     mapped_datav[0]);
1827                         if (num_pages * PAGE_CACHE_SIZE <
1828                             BTRFS_SUPER_INFO_SIZE) {
1829                                 printk(KERN_INFO
1830                                        "btrfsic: cannot work with too short bios!\n");
1831                                 return;
1832                         }
1833                         is_metadata = 1;
1834                         BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1));
1835                         processed_len = BTRFS_SUPER_INFO_SIZE;
1836                         if (state->print_mask &
1837                             BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) {
1838                                 printk(KERN_INFO
1839                                        "[before new superblock is written]:\n");
1840                                 btrfsic_dump_tree_sub(state, block, 0);
1841                         }
1842                 }
1843                 if (is_metadata) {
1844                         if (!block->is_superblock) {
1845                                 if (num_pages * PAGE_CACHE_SIZE <
1846                                     state->metablock_size) {
1847                                         printk(KERN_INFO
1848                                                "btrfsic: cannot work with too short bios!\n");
1849                                         return;
1850                                 }
1851                                 processed_len = state->metablock_size;
1852                                 bytenr = btrfs_stack_header_bytenr(
1853                                                 (struct btrfs_header *)
1854                                                 mapped_datav[0]);
1855                                 btrfsic_cmp_log_and_dev_bytenr(state, bytenr,
1856                                                                dev_state,
1857                                                                dev_bytenr);
1858                         }
1859                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) {
1860                                 if (block->logical_bytenr != bytenr &&
1861                                     !(!block->is_metadata &&
1862                                       block->logical_bytenr == 0))
1863                                         printk(KERN_INFO
1864                                                "Written block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n",
1865                                                bytenr, dev_state->name,
1866                                                dev_bytenr,
1867                                                block->mirror_num,
1868                                                btrfsic_get_block_type(state,
1869                                                                       block),
1870                                                block->logical_bytenr);
1871                                 else
1872                                         printk(KERN_INFO
1873                                                "Written block @%llu (%s/%llu/%d) found in hash table, %c.\n",
1874                                                bytenr, dev_state->name,
1875                                                dev_bytenr, block->mirror_num,
1876                                                btrfsic_get_block_type(state,
1877                                                                       block));
1878                         }
1879                         block->logical_bytenr = bytenr;
1880                 } else {
1881                         if (num_pages * PAGE_CACHE_SIZE <
1882                             state->datablock_size) {
1883                                 printk(KERN_INFO
1884                                        "btrfsic: cannot work with too short bios!\n");
1885                                 return;
1886                         }
1887                         processed_len = state->datablock_size;
1888                         bytenr = block->logical_bytenr;
1889                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1890                                 printk(KERN_INFO
1891                                        "Written block @%llu (%s/%llu/%d)"
1892                                        " found in hash table, %c.\n",
1893                                        bytenr, dev_state->name, dev_bytenr,
1894                                        block->mirror_num,
1895                                        btrfsic_get_block_type(state, block));
1896                 }
1897
1898                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1899                         printk(KERN_INFO
1900                                "ref_to_list: %cE, ref_from_list: %cE\n",
1901                                list_empty(&block->ref_to_list) ? ' ' : '!',
1902                                list_empty(&block->ref_from_list) ? ' ' : '!');
1903                 if (btrfsic_is_block_ref_by_superblock(state, block, 0)) {
1904                         printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
1905                                " @%llu (%s/%llu/%d), old(gen=%llu,"
1906                                " objectid=%llu, type=%d, offset=%llu),"
1907                                " new(gen=%llu),"
1908                                " which is referenced by most recent superblock"
1909                                " (superblockgen=%llu)!\n",
1910                                btrfsic_get_block_type(state, block), bytenr,
1911                                dev_state->name, dev_bytenr, block->mirror_num,
1912                                block->generation,
1913                                btrfs_disk_key_objectid(&block->disk_key),
1914                                block->disk_key.type,
1915                                btrfs_disk_key_offset(&block->disk_key),
1916                                btrfs_stack_header_generation(
1917                                        (struct btrfs_header *) mapped_datav[0]),
1918                                state->max_superblock_generation);
1919                         btrfsic_dump_tree(state);
1920                 }
1921
1922                 if (!block->is_iodone && !block->never_written) {
1923                         printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
1924                                " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu,"
1925                                " which is not yet iodone!\n",
1926                                btrfsic_get_block_type(state, block), bytenr,
1927                                dev_state->name, dev_bytenr, block->mirror_num,
1928                                block->generation,
1929                                btrfs_stack_header_generation(
1930                                        (struct btrfs_header *)
1931                                        mapped_datav[0]));
1932                         /* it would not be safe to go on */
1933                         btrfsic_dump_tree(state);
1934                         goto continue_loop;
1935                 }
1936
1937                 /*
1938                  * Clear all references of this block. Do not free
1939                  * the block itself even if is not referenced anymore
1940                  * because it still carries valueable information
1941                  * like whether it was ever written and IO completed.
1942                  */
1943                 list_for_each_entry_safe(l, tmp, &block->ref_to_list,
1944                                          node_ref_to) {
1945                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1946                                 btrfsic_print_rem_link(state, l);
1947                         l->ref_cnt--;
1948                         if (0 == l->ref_cnt) {
1949                                 list_del(&l->node_ref_to);
1950                                 list_del(&l->node_ref_from);
1951                                 btrfsic_block_link_hashtable_remove(l);
1952                                 btrfsic_block_link_free(l);
1953                         }
1954                 }
1955
1956                 block_ctx.dev = dev_state;
1957                 block_ctx.dev_bytenr = dev_bytenr;
1958                 block_ctx.start = bytenr;
1959                 block_ctx.len = processed_len;
1960                 block_ctx.pagev = NULL;
1961                 block_ctx.mem_to_free = NULL;
1962                 block_ctx.datav = mapped_datav;
1963
1964                 if (is_metadata || state->include_extent_data) {
1965                         block->never_written = 0;
1966                         block->iodone_w_error = 0;
1967                         if (NULL != bio) {
1968                                 block->is_iodone = 0;
1969                                 BUG_ON(NULL == bio_is_patched);
1970                                 if (!*bio_is_patched) {
1971                                         block->orig_bio_bh_private =
1972                                             bio->bi_private;
1973                                         block->orig_bio_bh_end_io.bio =
1974                                             bio->bi_end_io;
1975                                         block->next_in_same_bio = NULL;
1976                                         bio->bi_private = block;
1977                                         bio->bi_end_io = btrfsic_bio_end_io;
1978                                         *bio_is_patched = 1;
1979                                 } else {
1980                                         struct btrfsic_block *chained_block =
1981                                             (struct btrfsic_block *)
1982                                             bio->bi_private;
1983
1984                                         BUG_ON(NULL == chained_block);
1985                                         block->orig_bio_bh_private =
1986                                             chained_block->orig_bio_bh_private;
1987                                         block->orig_bio_bh_end_io.bio =
1988                                             chained_block->orig_bio_bh_end_io.
1989                                             bio;
1990                                         block->next_in_same_bio = chained_block;
1991                                         bio->bi_private = block;
1992                                 }
1993                         } else if (NULL != bh) {
1994                                 block->is_iodone = 0;
1995                                 block->orig_bio_bh_private = bh->b_private;
1996                                 block->orig_bio_bh_end_io.bh = bh->b_end_io;
1997                                 block->next_in_same_bio = NULL;
1998                                 bh->b_private = block;
1999                                 bh->b_end_io = btrfsic_bh_end_io;
2000                         } else {
2001                                 block->is_iodone = 1;
2002                                 block->orig_bio_bh_private = NULL;
2003                                 block->orig_bio_bh_end_io.bio = NULL;
2004                                 block->next_in_same_bio = NULL;
2005                         }
2006                 }
2007
2008                 block->flush_gen = dev_state->last_flush_gen + 1;
2009                 block->submit_bio_bh_rw = submit_bio_bh_rw;
2010                 if (is_metadata) {
2011                         block->logical_bytenr = bytenr;
2012                         block->is_metadata = 1;
2013                         if (block->is_superblock) {
2014                                 BUG_ON(PAGE_CACHE_SIZE !=
2015                                        BTRFS_SUPER_INFO_SIZE);
2016                                 ret = btrfsic_process_written_superblock(
2017                                                 state,
2018                                                 block,
2019                                                 (struct btrfs_super_block *)
2020                                                 mapped_datav[0]);
2021                                 if (state->print_mask &
2022                                     BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) {
2023                                         printk(KERN_INFO
2024                                         "[after new superblock is written]:\n");
2025                                         btrfsic_dump_tree_sub(state, block, 0);
2026                                 }
2027                         } else {
2028                                 block->mirror_num = 0;  /* unknown */
2029                                 ret = btrfsic_process_metablock(
2030                                                 state,
2031                                                 block,
2032                                                 &block_ctx,
2033                                                 0, 0);
2034                         }
2035                         if (ret)
2036                                 printk(KERN_INFO
2037                                        "btrfsic: btrfsic_process_metablock"
2038                                        "(root @%llu) failed!\n",
2039                                        dev_bytenr);
2040                 } else {
2041                         block->is_metadata = 0;
2042                         block->mirror_num = 0;  /* unknown */
2043                         block->generation = BTRFSIC_GENERATION_UNKNOWN;
2044                         if (!state->include_extent_data
2045                             && list_empty(&block->ref_from_list)) {
2046                                 /*
2047                                  * disk block is overwritten with extent
2048                                  * data (not meta data) and we are configured
2049                                  * to not include extent data: take the
2050                                  * chance and free the block's memory
2051                                  */
2052                                 btrfsic_block_hashtable_remove(block);
2053                                 list_del(&block->all_blocks_node);
2054                                 btrfsic_block_free(block);
2055                         }
2056                 }
2057                 btrfsic_release_block_ctx(&block_ctx);
2058         } else {
2059                 /* block has not been found in hash table */
2060                 u64 bytenr;
2061
2062                 if (!is_metadata) {
2063                         processed_len = state->datablock_size;
2064                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2065                                 printk(KERN_INFO "Written block (%s/%llu/?)"
2066                                        " !found in hash table, D.\n",
2067                                        dev_state->name, dev_bytenr);
2068                         if (!state->include_extent_data) {
2069                                 /* ignore that written D block */
2070                                 goto continue_loop;
2071                         }
2072
2073                         /* this is getting ugly for the
2074                          * include_extent_data case... */
2075                         bytenr = 0;     /* unknown */
2076                 } else {
2077                         processed_len = state->metablock_size;
2078                         bytenr = btrfs_stack_header_bytenr(
2079                                         (struct btrfs_header *)
2080                                         mapped_datav[0]);
2081                         btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
2082                                                        dev_bytenr);
2083                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2084                                 printk(KERN_INFO
2085                                        "Written block @%llu (%s/%llu/?)"
2086                                        " !found in hash table, M.\n",
2087                                        bytenr, dev_state->name, dev_bytenr);
2088                 }
2089
2090                 block_ctx.dev = dev_state;
2091                 block_ctx.dev_bytenr = dev_bytenr;
2092                 block_ctx.start = bytenr;
2093                 block_ctx.len = processed_len;
2094                 block_ctx.pagev = NULL;
2095                 block_ctx.mem_to_free = NULL;
2096                 block_ctx.datav = mapped_datav;
2097
2098                 block = btrfsic_block_alloc();
2099                 if (NULL == block) {
2100                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2101                         btrfsic_release_block_ctx(&block_ctx);
2102                         goto continue_loop;
2103                 }
2104                 block->dev_state = dev_state;
2105                 block->dev_bytenr = dev_bytenr;
2106                 block->logical_bytenr = bytenr;
2107                 block->is_metadata = is_metadata;
2108                 block->never_written = 0;
2109                 block->iodone_w_error = 0;
2110                 block->mirror_num = 0;  /* unknown */
2111                 block->flush_gen = dev_state->last_flush_gen + 1;
2112                 block->submit_bio_bh_rw = submit_bio_bh_rw;
2113                 if (NULL != bio) {
2114                         block->is_iodone = 0;
2115                         BUG_ON(NULL == bio_is_patched);
2116                         if (!*bio_is_patched) {
2117                                 block->orig_bio_bh_private = bio->bi_private;
2118                                 block->orig_bio_bh_end_io.bio = bio->bi_end_io;
2119                                 block->next_in_same_bio = NULL;
2120                                 bio->bi_private = block;
2121                                 bio->bi_end_io = btrfsic_bio_end_io;
2122                                 *bio_is_patched = 1;
2123                         } else {
2124                                 struct btrfsic_block *chained_block =
2125                                     (struct btrfsic_block *)
2126                                     bio->bi_private;
2127
2128                                 BUG_ON(NULL == chained_block);
2129                                 block->orig_bio_bh_private =
2130                                     chained_block->orig_bio_bh_private;
2131                                 block->orig_bio_bh_end_io.bio =
2132                                     chained_block->orig_bio_bh_end_io.bio;
2133                                 block->next_in_same_bio = chained_block;
2134                                 bio->bi_private = block;
2135                         }
2136                 } else if (NULL != bh) {
2137                         block->is_iodone = 0;
2138                         block->orig_bio_bh_private = bh->b_private;
2139                         block->orig_bio_bh_end_io.bh = bh->b_end_io;
2140                         block->next_in_same_bio = NULL;
2141                         bh->b_private = block;
2142                         bh->b_end_io = btrfsic_bh_end_io;
2143                 } else {
2144                         block->is_iodone = 1;
2145                         block->orig_bio_bh_private = NULL;
2146                         block->orig_bio_bh_end_io.bio = NULL;
2147                         block->next_in_same_bio = NULL;
2148                 }
2149                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2150                         printk(KERN_INFO
2151                                "New written %c-block @%llu (%s/%llu/%d)\n",
2152                                is_metadata ? 'M' : 'D',
2153                                block->logical_bytenr, block->dev_state->name,
2154                                block->dev_bytenr, block->mirror_num);
2155                 list_add(&block->all_blocks_node, &state->all_blocks_list);
2156                 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2157
2158                 if (is_metadata) {
2159                         ret = btrfsic_process_metablock(state, block,
2160                                                         &block_ctx, 0, 0);
2161                         if (ret)
2162                                 printk(KERN_INFO
2163                                        "btrfsic: process_metablock(root @%llu)"
2164                                        " failed!\n",
2165                                        dev_bytenr);
2166                 }
2167                 btrfsic_release_block_ctx(&block_ctx);
2168         }
2169
2170 continue_loop:
2171         BUG_ON(!processed_len);
2172         dev_bytenr += processed_len;
2173         mapped_datav += processed_len >> PAGE_CACHE_SHIFT;
2174         num_pages -= processed_len >> PAGE_CACHE_SHIFT;
2175         goto again;
2176 }
2177
2178 static void btrfsic_bio_end_io(struct bio *bp)
2179 {
2180         struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private;
2181         int iodone_w_error;
2182
2183         /* mutex is not held! This is not save if IO is not yet completed
2184          * on umount */
2185         iodone_w_error = 0;
2186         if (bp->bi_error)
2187                 iodone_w_error = 1;
2188
2189         BUG_ON(NULL == block);
2190         bp->bi_private = block->orig_bio_bh_private;
2191         bp->bi_end_io = block->orig_bio_bh_end_io.bio;
2192
2193         do {
2194                 struct btrfsic_block *next_block;
2195                 struct btrfsic_dev_state *const dev_state = block->dev_state;
2196
2197                 if ((dev_state->state->print_mask &
2198                      BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2199                         printk(KERN_INFO
2200                                "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
2201                                bp->bi_error,
2202                                btrfsic_get_block_type(dev_state->state, block),
2203                                block->logical_bytenr, dev_state->name,
2204                                block->dev_bytenr, block->mirror_num);
2205                 next_block = block->next_in_same_bio;
2206                 block->iodone_w_error = iodone_w_error;
2207                 if (block->submit_bio_bh_rw & REQ_FLUSH) {
2208                         dev_state->last_flush_gen++;
2209                         if ((dev_state->state->print_mask &
2210                              BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2211                                 printk(KERN_INFO
2212                                        "bio_end_io() new %s flush_gen=%llu\n",
2213                                        dev_state->name,
2214                                        dev_state->last_flush_gen);
2215                 }
2216                 if (block->submit_bio_bh_rw & REQ_FUA)
2217                         block->flush_gen = 0; /* FUA completed means block is
2218                                                * on disk */
2219                 block->is_iodone = 1; /* for FLUSH, this releases the block */
2220                 block = next_block;
2221         } while (NULL != block);
2222
2223         bp->bi_end_io(bp);
2224 }
2225
2226 static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
2227 {
2228         struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private;
2229         int iodone_w_error = !uptodate;
2230         struct btrfsic_dev_state *dev_state;
2231
2232         BUG_ON(NULL == block);
2233         dev_state = block->dev_state;
2234         if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2235                 printk(KERN_INFO
2236                        "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n",
2237                        iodone_w_error,
2238                        btrfsic_get_block_type(dev_state->state, block),
2239                        block->logical_bytenr, block->dev_state->name,
2240                        block->dev_bytenr, block->mirror_num);
2241
2242         block->iodone_w_error = iodone_w_error;
2243         if (block->submit_bio_bh_rw & REQ_FLUSH) {
2244                 dev_state->last_flush_gen++;
2245                 if ((dev_state->state->print_mask &
2246                      BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2247                         printk(KERN_INFO
2248                                "bh_end_io() new %s flush_gen=%llu\n",
2249                                dev_state->name, dev_state->last_flush_gen);
2250         }
2251         if (block->submit_bio_bh_rw & REQ_FUA)
2252                 block->flush_gen = 0; /* FUA completed means block is on disk */
2253
2254         bh->b_private = block->orig_bio_bh_private;
2255         bh->b_end_io = block->orig_bio_bh_end_io.bh;
2256         block->is_iodone = 1; /* for FLUSH, this releases the block */
2257         bh->b_end_io(bh, uptodate);
2258 }
2259
2260 static int btrfsic_process_written_superblock(
2261                 struct btrfsic_state *state,
2262                 struct btrfsic_block *const superblock,
2263                 struct btrfs_super_block *const super_hdr)
2264 {
2265         int pass;
2266
2267         superblock->generation = btrfs_super_generation(super_hdr);
2268         if (!(superblock->generation > state->max_superblock_generation ||
2269               0 == state->max_superblock_generation)) {
2270                 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2271                         printk(KERN_INFO
2272                                "btrfsic: superblock @%llu (%s/%llu/%d)"
2273                                " with old gen %llu <= %llu\n",
2274                                superblock->logical_bytenr,
2275                                superblock->dev_state->name,
2276                                superblock->dev_bytenr, superblock->mirror_num,
2277                                btrfs_super_generation(super_hdr),
2278                                state->max_superblock_generation);
2279         } else {
2280                 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2281                         printk(KERN_INFO
2282                                "btrfsic: got new superblock @%llu (%s/%llu/%d)"
2283                                " with new gen %llu > %llu\n",
2284                                superblock->logical_bytenr,
2285                                superblock->dev_state->name,
2286                                superblock->dev_bytenr, superblock->mirror_num,
2287                                btrfs_super_generation(super_hdr),
2288                                state->max_superblock_generation);
2289
2290                 state->max_superblock_generation =
2291                     btrfs_super_generation(super_hdr);
2292                 state->latest_superblock = superblock;
2293         }
2294
2295         for (pass = 0; pass < 3; pass++) {
2296                 int ret;
2297                 u64 next_bytenr;
2298                 struct btrfsic_block *next_block;
2299                 struct btrfsic_block_data_ctx tmp_next_block_ctx;
2300                 struct btrfsic_block_link *l;
2301                 int num_copies;
2302                 int mirror_num;
2303                 const char *additional_string = NULL;
2304                 struct btrfs_disk_key tmp_disk_key = {0};
2305
2306                 btrfs_set_disk_key_objectid(&tmp_disk_key,
2307                                             BTRFS_ROOT_ITEM_KEY);
2308                 btrfs_set_disk_key_objectid(&tmp_disk_key, 0);
2309
2310                 switch (pass) {
2311                 case 0:
2312                         btrfs_set_disk_key_objectid(&tmp_disk_key,
2313                                                     BTRFS_ROOT_TREE_OBJECTID);
2314                         additional_string = "root ";
2315                         next_bytenr = btrfs_super_root(super_hdr);
2316                         if (state->print_mask &
2317                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2318                                 printk(KERN_INFO "root@%llu\n", next_bytenr);
2319                         break;
2320                 case 1:
2321                         btrfs_set_disk_key_objectid(&tmp_disk_key,
2322                                                     BTRFS_CHUNK_TREE_OBJECTID);
2323                         additional_string = "chunk ";
2324                         next_bytenr = btrfs_super_chunk_root(super_hdr);
2325                         if (state->print_mask &
2326                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2327                                 printk(KERN_INFO "chunk@%llu\n", next_bytenr);
2328                         break;
2329                 case 2:
2330                         btrfs_set_disk_key_objectid(&tmp_disk_key,
2331                                                     BTRFS_TREE_LOG_OBJECTID);
2332                         additional_string = "log ";
2333                         next_bytenr = btrfs_super_log_root(super_hdr);
2334                         if (0 == next_bytenr)
2335                                 continue;
2336                         if (state->print_mask &
2337                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2338                                 printk(KERN_INFO "log@%llu\n", next_bytenr);
2339                         break;
2340                 }
2341
2342                 num_copies =
2343                     btrfs_num_copies(state->root->fs_info,
2344                                      next_bytenr, BTRFS_SUPER_INFO_SIZE);
2345                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
2346                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
2347                                next_bytenr, num_copies);
2348                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2349                         int was_created;
2350
2351                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2352                                 printk(KERN_INFO
2353                                        "btrfsic_process_written_superblock("
2354                                        "mirror_num=%d)\n", mirror_num);
2355                         ret = btrfsic_map_block(state, next_bytenr,
2356                                                 BTRFS_SUPER_INFO_SIZE,
2357                                                 &tmp_next_block_ctx,
2358                                                 mirror_num);
2359                         if (ret) {
2360                                 printk(KERN_INFO
2361                                        "btrfsic: btrfsic_map_block(@%llu,"
2362                                        " mirror=%d) failed!\n",
2363                                        next_bytenr, mirror_num);
2364                                 return -1;
2365                         }
2366
2367                         next_block = btrfsic_block_lookup_or_add(
2368                                         state,
2369                                         &tmp_next_block_ctx,
2370                                         additional_string,
2371                                         1, 0, 1,
2372                                         mirror_num,
2373                                         &was_created);
2374                         if (NULL == next_block) {
2375                                 printk(KERN_INFO
2376                                        "btrfsic: error, kmalloc failed!\n");
2377                                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
2378                                 return -1;
2379                         }
2380
2381                         next_block->disk_key = tmp_disk_key;
2382                         if (was_created)
2383                                 next_block->generation =
2384                                     BTRFSIC_GENERATION_UNKNOWN;
2385                         l = btrfsic_block_link_lookup_or_add(
2386                                         state,
2387                                         &tmp_next_block_ctx,
2388                                         next_block,
2389                                         superblock,
2390                                         BTRFSIC_GENERATION_UNKNOWN);
2391                         btrfsic_release_block_ctx(&tmp_next_block_ctx);
2392                         if (NULL == l)
2393                                 return -1;
2394                 }
2395         }
2396
2397         if (WARN_ON(-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)))
2398                 btrfsic_dump_tree(state);
2399
2400         return 0;
2401 }
2402
2403 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
2404                                         struct btrfsic_block *const block,
2405                                         int recursion_level)
2406 {
2407         const struct btrfsic_block_link *l;
2408         int ret = 0;
2409
2410         if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2411                 /*
2412                  * Note that this situation can happen and does not
2413                  * indicate an error in regular cases. It happens
2414                  * when disk blocks are freed and later reused.
2415                  * The check-integrity module is not aware of any
2416                  * block free operations, it just recognizes block
2417                  * write operations. Therefore it keeps the linkage
2418                  * information for a block until a block is
2419                  * rewritten. This can temporarily cause incorrect
2420                  * and even circular linkage informations. This
2421                  * causes no harm unless such blocks are referenced
2422                  * by the most recent super block.
2423                  */
2424                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2425                         printk(KERN_INFO
2426                                "btrfsic: abort cyclic linkage (case 1).\n");
2427
2428                 return ret;
2429         }
2430
2431         /*
2432          * This algorithm is recursive because the amount of used stack
2433          * space is very small and the max recursion depth is limited.
2434          */
2435         list_for_each_entry(l, &block->ref_to_list, node_ref_to) {
2436                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2437                         printk(KERN_INFO
2438                                "rl=%d, %c @%llu (%s/%llu/%d)"
2439                                " %u* refers to %c @%llu (%s/%llu/%d)\n",
2440                                recursion_level,
2441                                btrfsic_get_block_type(state, block),
2442                                block->logical_bytenr, block->dev_state->name,
2443                                block->dev_bytenr, block->mirror_num,
2444                                l->ref_cnt,
2445                                btrfsic_get_block_type(state, l->block_ref_to),
2446                                l->block_ref_to->logical_bytenr,
2447                                l->block_ref_to->dev_state->name,
2448                                l->block_ref_to->dev_bytenr,
2449                                l->block_ref_to->mirror_num);
2450                 if (l->block_ref_to->never_written) {
2451                         printk(KERN_INFO "btrfs: attempt to write superblock"
2452                                " which references block %c @%llu (%s/%llu/%d)"
2453                                " which is never written!\n",
2454                                btrfsic_get_block_type(state, l->block_ref_to),
2455                                l->block_ref_to->logical_bytenr,
2456                                l->block_ref_to->dev_state->name,
2457                                l->block_ref_to->dev_bytenr,
2458                                l->block_ref_to->mirror_num);
2459                         ret = -1;
2460                 } else if (!l->block_ref_to->is_iodone) {
2461                         printk(KERN_INFO "btrfs: attempt to write superblock"
2462                                " which references block %c @%llu (%s/%llu/%d)"
2463                                " which is not yet iodone!\n",
2464                                btrfsic_get_block_type(state, l->block_ref_to),
2465                                l->block_ref_to->logical_bytenr,
2466                                l->block_ref_to->dev_state->name,
2467                                l->block_ref_to->dev_bytenr,
2468                                l->block_ref_to->mirror_num);
2469                         ret = -1;
2470                 } else if (l->block_ref_to->iodone_w_error) {
2471                         printk(KERN_INFO "btrfs: attempt to write superblock"
2472                                " which references block %c @%llu (%s/%llu/%d)"
2473                                " which has write error!\n",
2474                                btrfsic_get_block_type(state, l->block_ref_to),
2475                                l->block_ref_to->logical_bytenr,
2476                                l->block_ref_to->dev_state->name,
2477                                l->block_ref_to->dev_bytenr,
2478                                l->block_ref_to->mirror_num);
2479                         ret = -1;
2480                 } else if (l->parent_generation !=
2481                            l->block_ref_to->generation &&
2482                            BTRFSIC_GENERATION_UNKNOWN !=
2483                            l->parent_generation &&
2484                            BTRFSIC_GENERATION_UNKNOWN !=
2485                            l->block_ref_to->generation) {
2486                         printk(KERN_INFO "btrfs: attempt to write superblock"
2487                                " which references block %c @%llu (%s/%llu/%d)"
2488                                " with generation %llu !="
2489                                " parent generation %llu!\n",
2490                                btrfsic_get_block_type(state, l->block_ref_to),
2491                                l->block_ref_to->logical_bytenr,
2492                                l->block_ref_to->dev_state->name,
2493                                l->block_ref_to->dev_bytenr,
2494                                l->block_ref_to->mirror_num,
2495                                l->block_ref_to->generation,
2496                                l->parent_generation);
2497                         ret = -1;
2498                 } else if (l->block_ref_to->flush_gen >
2499                            l->block_ref_to->dev_state->last_flush_gen) {
2500                         printk(KERN_INFO "btrfs: attempt to write superblock"
2501                                " which references block %c @%llu (%s/%llu/%d)"
2502                                " which is not flushed out of disk's write cache"
2503                                " (block flush_gen=%llu,"
2504                                " dev->flush_gen=%llu)!\n",
2505                                btrfsic_get_block_type(state, l->block_ref_to),
2506                                l->block_ref_to->logical_bytenr,
2507                                l->block_ref_to->dev_state->name,
2508                                l->block_ref_to->dev_bytenr,
2509                                l->block_ref_to->mirror_num, block->flush_gen,
2510                                l->block_ref_to->dev_state->last_flush_gen);
2511                         ret = -1;
2512                 } else if (-1 == btrfsic_check_all_ref_blocks(state,
2513                                                               l->block_ref_to,
2514                                                               recursion_level +
2515                                                               1)) {
2516                         ret = -1;
2517                 }
2518         }
2519
2520         return ret;
2521 }
2522
2523 static int btrfsic_is_block_ref_by_superblock(
2524                 const struct btrfsic_state *state,
2525                 const struct btrfsic_block *block,
2526                 int recursion_level)
2527 {
2528         const struct btrfsic_block_link *l;
2529
2530         if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2531                 /* refer to comment at "abort cyclic linkage (case 1)" */
2532                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2533                         printk(KERN_INFO
2534                                "btrfsic: abort cyclic linkage (case 2).\n");
2535
2536                 return 0;
2537         }
2538
2539         /*
2540          * This algorithm is recursive because the amount of used stack space
2541          * is very small and the max recursion depth is limited.
2542          */
2543         list_for_each_entry(l, &block->ref_from_list, node_ref_from) {
2544                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2545                         printk(KERN_INFO
2546                                "rl=%d, %c @%llu (%s/%llu/%d)"
2547                                " is ref %u* from %c @%llu (%s/%llu/%d)\n",
2548                                recursion_level,
2549                                btrfsic_get_block_type(state, block),
2550                                block->logical_bytenr, block->dev_state->name,
2551                                block->dev_bytenr, block->mirror_num,
2552                                l->ref_cnt,
2553                                btrfsic_get_block_type(state, l->block_ref_from),
2554                                l->block_ref_from->logical_bytenr,
2555                                l->block_ref_from->dev_state->name,
2556                                l->block_ref_from->dev_bytenr,
2557                                l->block_ref_from->mirror_num);
2558                 if (l->block_ref_from->is_superblock &&
2559                     state->latest_superblock->dev_bytenr ==
2560                     l->block_ref_from->dev_bytenr &&
2561                     state->latest_superblock->dev_state->bdev ==
2562                     l->block_ref_from->dev_state->bdev)
2563                         return 1;
2564                 else if (btrfsic_is_block_ref_by_superblock(state,
2565                                                             l->block_ref_from,
2566                                                             recursion_level +
2567                                                             1))
2568                         return 1;
2569         }
2570
2571         return 0;
2572 }
2573
2574 static void btrfsic_print_add_link(const struct btrfsic_state *state,
2575                                    const struct btrfsic_block_link *l)
2576 {
2577         printk(KERN_INFO
2578                "Add %u* link from %c @%llu (%s/%llu/%d)"
2579                " to %c @%llu (%s/%llu/%d).\n",
2580                l->ref_cnt,
2581                btrfsic_get_block_type(state, l->block_ref_from),
2582                l->block_ref_from->logical_bytenr,
2583                l->block_ref_from->dev_state->name,
2584                l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
2585                btrfsic_get_block_type(state, l->block_ref_to),
2586                l->block_ref_to->logical_bytenr,
2587                l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr,
2588                l->block_ref_to->mirror_num);
2589 }
2590
2591 static void btrfsic_print_rem_link(const struct btrfsic_state *state,
2592                                    const struct btrfsic_block_link *l)
2593 {
2594         printk(KERN_INFO
2595                "Rem %u* link from %c @%llu (%s/%llu/%d)"
2596                " to %c @%llu (%s/%llu/%d).\n",
2597                l->ref_cnt,
2598                btrfsic_get_block_type(state, l->block_ref_from),
2599                l->block_ref_from->logical_bytenr,
2600                l->block_ref_from->dev_state->name,
2601                l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
2602                btrfsic_get_block_type(state, l->block_ref_to),
2603                l->block_ref_to->logical_bytenr,
2604                l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr,
2605                l->block_ref_to->mirror_num);
2606 }
2607
2608 static char btrfsic_get_block_type(const struct btrfsic_state *state,
2609                                    const struct btrfsic_block *block)
2610 {
2611         if (block->is_superblock &&
2612             state->latest_superblock->dev_bytenr == block->dev_bytenr &&
2613             state->latest_superblock->dev_state->bdev == block->dev_state->bdev)
2614                 return 'S';
2615         else if (block->is_superblock)
2616                 return 's';
2617         else if (block->is_metadata)
2618                 return 'M';
2619         else
2620                 return 'D';
2621 }
2622
2623 static void btrfsic_dump_tree(const struct btrfsic_state *state)
2624 {
2625         btrfsic_dump_tree_sub(state, state->latest_superblock, 0);
2626 }
2627
2628 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
2629                                   const struct btrfsic_block *block,
2630                                   int indent_level)
2631 {
2632         const struct btrfsic_block_link *l;
2633         int indent_add;
2634         static char buf[80];
2635         int cursor_position;
2636
2637         /*
2638          * Should better fill an on-stack buffer with a complete line and
2639          * dump it at once when it is time to print a newline character.
2640          */
2641
2642         /*
2643          * This algorithm is recursive because the amount of used stack space
2644          * is very small and the max recursion depth is limited.
2645          */
2646         indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)",
2647                              btrfsic_get_block_type(state, block),
2648                              block->logical_bytenr, block->dev_state->name,
2649                              block->dev_bytenr, block->mirror_num);
2650         if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2651                 printk("[...]\n");
2652                 return;
2653         }
2654         printk(buf);
2655         indent_level += indent_add;
2656         if (list_empty(&block->ref_to_list)) {
2657                 printk("\n");
2658                 return;
2659         }
2660         if (block->mirror_num > 1 &&
2661             !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) {
2662                 printk(" [...]\n");
2663                 return;
2664         }
2665
2666         cursor_position = indent_level;
2667         list_for_each_entry(l, &block->ref_to_list, node_ref_to) {
2668                 while (cursor_position < indent_level) {
2669                         printk(" ");
2670                         cursor_position++;
2671                 }
2672                 if (l->ref_cnt > 1)
2673                         indent_add = sprintf(buf, " %d*--> ", l->ref_cnt);
2674                 else
2675                         indent_add = sprintf(buf, " --> ");
2676                 if (indent_level + indent_add >
2677                     BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2678                         printk("[...]\n");
2679                         cursor_position = 0;
2680                         continue;
2681                 }
2682
2683                 printk(buf);
2684
2685                 btrfsic_dump_tree_sub(state, l->block_ref_to,
2686                                       indent_level + indent_add);
2687                 cursor_position = 0;
2688         }
2689 }
2690
2691 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
2692                 struct btrfsic_state *state,
2693                 struct btrfsic_block_data_ctx *next_block_ctx,
2694                 struct btrfsic_block *next_block,
2695                 struct btrfsic_block *from_block,
2696                 u64 parent_generation)
2697 {
2698         struct btrfsic_block_link *l;
2699
2700         l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev,
2701                                                 next_block_ctx->dev_bytenr,
2702                                                 from_block->dev_state->bdev,
2703                                                 from_block->dev_bytenr,
2704                                                 &state->block_link_hashtable);
2705         if (NULL == l) {
2706                 l = btrfsic_block_link_alloc();
2707                 if (NULL == l) {
2708                         printk(KERN_INFO
2709                                "btrfsic: error, kmalloc" " failed!\n");
2710                         return NULL;
2711                 }
2712
2713                 l->block_ref_to = next_block;
2714                 l->block_ref_from = from_block;
2715                 l->ref_cnt = 1;
2716                 l->parent_generation = parent_generation;
2717
2718                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2719                         btrfsic_print_add_link(state, l);
2720
2721                 list_add(&l->node_ref_to, &from_block->ref_to_list);
2722                 list_add(&l->node_ref_from, &next_block->ref_from_list);
2723
2724                 btrfsic_block_link_hashtable_add(l,
2725                                                  &state->block_link_hashtable);
2726         } else {
2727                 l->ref_cnt++;
2728                 l->parent_generation = parent_generation;
2729                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2730                         btrfsic_print_add_link(state, l);
2731         }
2732
2733         return l;
2734 }
2735
2736 static struct btrfsic_block *btrfsic_block_lookup_or_add(
2737                 struct btrfsic_state *state,
2738                 struct btrfsic_block_data_ctx *block_ctx,
2739                 const char *additional_string,
2740                 int is_metadata,
2741                 int is_iodone,
2742                 int never_written,
2743                 int mirror_num,
2744                 int *was_created)
2745 {
2746         struct btrfsic_block *block;
2747
2748         block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev,
2749                                                block_ctx->dev_bytenr,
2750                                                &state->block_hashtable);
2751         if (NULL == block) {
2752                 struct btrfsic_dev_state *dev_state;
2753
2754                 block = btrfsic_block_alloc();
2755                 if (NULL == block) {
2756                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2757                         return NULL;
2758                 }
2759                 dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev);
2760                 if (NULL == dev_state) {
2761                         printk(KERN_INFO
2762                                "btrfsic: error, lookup dev_state failed!\n");
2763                         btrfsic_block_free(block);
2764                         return NULL;
2765                 }
2766                 block->dev_state = dev_state;
2767                 block->dev_bytenr = block_ctx->dev_bytenr;
2768                 block->logical_bytenr = block_ctx->start;
2769                 block->is_metadata = is_metadata;
2770                 block->is_iodone = is_iodone;
2771                 block->never_written = never_written;
2772                 block->mirror_num = mirror_num;
2773                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2774                         printk(KERN_INFO
2775                                "New %s%c-block @%llu (%s/%llu/%d)\n",
2776                                additional_string,
2777                                btrfsic_get_block_type(state, block),
2778                                block->logical_bytenr, dev_state->name,
2779                                block->dev_bytenr, mirror_num);
2780                 list_add(&block->all_blocks_node, &state->all_blocks_list);
2781                 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2782                 if (NULL != was_created)
2783                         *was_created = 1;
2784         } else {
2785                 if (NULL != was_created)
2786                         *was_created = 0;
2787         }
2788
2789         return block;
2790 }
2791
2792 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2793                                            u64 bytenr,
2794                                            struct btrfsic_dev_state *dev_state,
2795                                            u64 dev_bytenr)
2796 {
2797         int num_copies;
2798         int mirror_num;
2799         int ret;
2800         struct btrfsic_block_data_ctx block_ctx;
2801         int match = 0;
2802
2803         num_copies = btrfs_num_copies(state->root->fs_info,
2804                                       bytenr, state->metablock_size);
2805
2806         for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2807                 ret = btrfsic_map_block(state, bytenr, state->metablock_size,
2808                                         &block_ctx, mirror_num);
2809                 if (ret) {
2810                         printk(KERN_INFO "btrfsic:"
2811                                " btrfsic_map_block(logical @%llu,"
2812                                " mirror %d) failed!\n",
2813                                bytenr, mirror_num);
2814                         continue;
2815                 }
2816
2817                 if (dev_state->bdev == block_ctx.dev->bdev &&
2818                     dev_bytenr == block_ctx.dev_bytenr) {
2819                         match++;
2820                         btrfsic_release_block_ctx(&block_ctx);
2821                         break;
2822                 }
2823                 btrfsic_release_block_ctx(&block_ctx);
2824         }
2825
2826         if (WARN_ON(!match)) {
2827                 printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio,"
2828                        " buffer->log_bytenr=%llu, submit_bio(bdev=%s,"
2829                        " phys_bytenr=%llu)!\n",
2830                        bytenr, dev_state->name, dev_bytenr);
2831                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2832                         ret = btrfsic_map_block(state, bytenr,
2833                                                 state->metablock_size,
2834                                                 &block_ctx, mirror_num);
2835                         if (ret)
2836                                 continue;
2837
2838                         printk(KERN_INFO "Read logical bytenr @%llu maps to"
2839                                " (%s/%llu/%d)\n",
2840                                bytenr, block_ctx.dev->name,
2841                                block_ctx.dev_bytenr, mirror_num);
2842                 }
2843         }
2844 }
2845
2846 static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
2847                 struct block_device *bdev)
2848 {
2849         struct btrfsic_dev_state *ds;
2850
2851         ds = btrfsic_dev_state_hashtable_lookup(bdev,
2852                                                 &btrfsic_dev_state_hashtable);
2853         return ds;
2854 }
2855
2856 int btrfsic_submit_bh(int rw, struct buffer_head *bh)
2857 {
2858         struct btrfsic_dev_state *dev_state;
2859
2860         if (!btrfsic_is_initialized)
2861                 return submit_bh(rw, bh);
2862
2863         mutex_lock(&btrfsic_mutex);
2864         /* since btrfsic_submit_bh() might also be called before
2865          * btrfsic_mount(), this might return NULL */
2866         dev_state = btrfsic_dev_state_lookup(bh->b_bdev);
2867
2868         /* Only called to write the superblock (incl. FLUSH/FUA) */
2869         if (NULL != dev_state &&
2870             (rw & WRITE) && bh->b_size > 0) {
2871                 u64 dev_bytenr;
2872
2873                 dev_bytenr = 4096 * bh->b_blocknr;
2874                 if (dev_state->state->print_mask &
2875                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
2876                         printk(KERN_INFO
2877                                "submit_bh(rw=0x%x, blocknr=%llu (bytenr %llu),"
2878                                " size=%zu, data=%p, bdev=%p)\n",
2879                                rw, (unsigned long long)bh->b_blocknr,
2880                                dev_bytenr, bh->b_size, bh->b_data, bh->b_bdev);
2881                 btrfsic_process_written_block(dev_state, dev_bytenr,
2882                                               &bh->b_data, 1, NULL,
2883                                               NULL, bh, rw);
2884         } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
2885                 if (dev_state->state->print_mask &
2886                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
2887                         printk(KERN_INFO
2888                                "submit_bh(rw=0x%x FLUSH, bdev=%p)\n",
2889                                rw, bh->b_bdev);
2890                 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
2891                         if ((dev_state->state->print_mask &
2892                              (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
2893                               BTRFSIC_PRINT_MASK_VERBOSE)))
2894                                 printk(KERN_INFO
2895                                        "btrfsic_submit_bh(%s) with FLUSH"
2896                                        " but dummy block already in use"
2897                                        " (ignored)!\n",
2898                                        dev_state->name);
2899                 } else {
2900                         struct btrfsic_block *const block =
2901                                 &dev_state->dummy_block_for_bio_bh_flush;
2902
2903                         block->is_iodone = 0;
2904                         block->never_written = 0;
2905                         block->iodone_w_error = 0;
2906                         block->flush_gen = dev_state->last_flush_gen + 1;
2907                         block->submit_bio_bh_rw = rw;
2908                         block->orig_bio_bh_private = bh->b_private;
2909                         block->orig_bio_bh_end_io.bh = bh->b_end_io;
2910                         block->next_in_same_bio = NULL;
2911                         bh->b_private = block;
2912                         bh->b_end_io = btrfsic_bh_end_io;
2913                 }
2914         }
2915         mutex_unlock(&btrfsic_mutex);
2916         return submit_bh(rw, bh);
2917 }
2918
2919 static void __btrfsic_submit_bio(int rw, struct bio *bio)
2920 {
2921         struct btrfsic_dev_state *dev_state;
2922
2923         if (!btrfsic_is_initialized)
2924                 return;
2925
2926         mutex_lock(&btrfsic_mutex);
2927         /* since btrfsic_submit_bio() is also called before
2928          * btrfsic_mount(), this might return NULL */
2929         dev_state = btrfsic_dev_state_lookup(bio->bi_bdev);
2930         if (NULL != dev_state &&
2931             (rw & WRITE) && NULL != bio->bi_io_vec) {
2932                 unsigned int i;
2933                 u64 dev_bytenr;
2934                 u64 cur_bytenr;
2935                 int bio_is_patched;
2936                 char **mapped_datav;
2937
2938                 dev_bytenr = 512 * bio->bi_iter.bi_sector;
2939                 bio_is_patched = 0;
2940                 if (dev_state->state->print_mask &
2941                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
2942                         printk(KERN_INFO
2943                                "submit_bio(rw=0x%x, bi_vcnt=%u,"
2944                                " bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
2945                                rw, bio->bi_vcnt,
2946                                (unsigned long long)bio->bi_iter.bi_sector,
2947                                dev_bytenr, bio->bi_bdev);
2948
2949                 mapped_datav = kmalloc_array(bio->bi_vcnt,
2950                                              sizeof(*mapped_datav), GFP_NOFS);
2951                 if (!mapped_datav)
2952                         goto leave;
2953                 cur_bytenr = dev_bytenr;
2954                 for (i = 0; i < bio->bi_vcnt; i++) {
2955                         BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
2956                         mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
2957                         if (!mapped_datav[i]) {
2958                                 while (i > 0) {
2959                                         i--;
2960                                         kunmap(bio->bi_io_vec[i].bv_page);
2961                                 }
2962                                 kfree(mapped_datav);
2963                                 goto leave;
2964                         }
2965                         if (dev_state->state->print_mask &
2966                             BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE)
2967                                 printk(KERN_INFO
2968                                        "#%u: bytenr=%llu, len=%u, offset=%u\n",
2969                                        i, cur_bytenr, bio->bi_io_vec[i].bv_len,
2970                                        bio->bi_io_vec[i].bv_offset);
2971                         cur_bytenr += bio->bi_io_vec[i].bv_len;
2972                 }
2973                 btrfsic_process_written_block(dev_state, dev_bytenr,
2974                                               mapped_datav, bio->bi_vcnt,
2975                                               bio, &bio_is_patched,
2976                                               NULL, rw);
2977                 while (i > 0) {
2978                         i--;
2979                         kunmap(bio->bi_io_vec[i].bv_page);
2980                 }
2981                 kfree(mapped_datav);
2982         } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
2983                 if (dev_state->state->print_mask &
2984                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
2985                         printk(KERN_INFO
2986                                "submit_bio(rw=0x%x FLUSH, bdev=%p)\n",
2987                                rw, bio->bi_bdev);
2988                 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
2989                         if ((dev_state->state->print_mask &
2990                              (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
2991                               BTRFSIC_PRINT_MASK_VERBOSE)))
2992                                 printk(KERN_INFO
2993                                        "btrfsic_submit_bio(%s) with FLUSH"
2994                                        " but dummy block already in use"
2995                                        " (ignored)!\n",
2996                                        dev_state->name);
2997                 } else {
2998                         struct btrfsic_block *const block =
2999                                 &dev_state->dummy_block_for_bio_bh_flush;
3000
3001                         block->is_iodone = 0;
3002                         block->never_written = 0;
3003                         block->iodone_w_error = 0;
3004                         block->flush_gen = dev_state->last_flush_gen + 1;
3005                         block->submit_bio_bh_rw = rw;
3006                         block->orig_bio_bh_private = bio->bi_private;
3007                         block->orig_bio_bh_end_io.bio = bio->bi_end_io;
3008                         block->next_in_same_bio = NULL;
3009                         bio->bi_private = block;
3010                         bio->bi_end_io = btrfsic_bio_end_io;
3011                 }
3012         }
3013 leave:
3014         mutex_unlock(&btrfsic_mutex);
3015 }
3016
3017 void btrfsic_submit_bio(int rw, struct bio *bio)
3018 {
3019         __btrfsic_submit_bio(rw, bio);
3020         submit_bio(rw, bio);
3021 }
3022
3023 int btrfsic_submit_bio_wait(int rw, struct bio *bio)
3024 {
3025         __btrfsic_submit_bio(rw, bio);
3026         return submit_bio_wait(rw, bio);
3027 }
3028
3029 int btrfsic_mount(struct btrfs_root *root,
3030                   struct btrfs_fs_devices *fs_devices,
3031                   int including_extent_data, u32 print_mask)
3032 {
3033         int ret;
3034         struct btrfsic_state *state;
3035         struct list_head *dev_head = &fs_devices->devices;
3036         struct btrfs_device *device;
3037
3038         if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) {
3039                 printk(KERN_INFO
3040                        "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3041                        root->nodesize, PAGE_CACHE_SIZE);
3042                 return -1;
3043         }
3044         if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3045                 printk(KERN_INFO
3046                        "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3047                        root->sectorsize, PAGE_CACHE_SIZE);
3048                 return -1;
3049         }
3050         state = kzalloc(sizeof(*state), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
3051         if (!state) {
3052                 state = vzalloc(sizeof(*state));
3053                 if (!state) {
3054                         printk(KERN_INFO "btrfs check-integrity: vzalloc() failed!\n");
3055                         return -1;
3056                 }
3057         }
3058
3059         if (!btrfsic_is_initialized) {
3060                 mutex_init(&btrfsic_mutex);
3061                 btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable);
3062                 btrfsic_is_initialized = 1;
3063         }
3064         mutex_lock(&btrfsic_mutex);
3065         state->root = root;
3066         state->print_mask = print_mask;
3067         state->include_extent_data = including_extent_data;
3068         state->csum_size = 0;
3069         state->metablock_size = root->nodesize;
3070         state->datablock_size = root->sectorsize;
3071         INIT_LIST_HEAD(&state->all_blocks_list);
3072         btrfsic_block_hashtable_init(&state->block_hashtable);
3073         btrfsic_block_link_hashtable_init(&state->block_link_hashtable);
3074         state->max_superblock_generation = 0;
3075         state->latest_superblock = NULL;
3076
3077         list_for_each_entry(device, dev_head, dev_list) {
3078                 struct btrfsic_dev_state *ds;
3079                 char *p;
3080
3081                 if (!device->bdev || !device->name)
3082                         continue;
3083
3084                 ds = btrfsic_dev_state_alloc();
3085                 if (NULL == ds) {
3086                         printk(KERN_INFO
3087                                "btrfs check-integrity: kmalloc() failed!\n");
3088                         mutex_unlock(&btrfsic_mutex);
3089                         return -1;
3090                 }
3091                 ds->bdev = device->bdev;
3092                 ds->state = state;
3093                 bdevname(ds->bdev, ds->name);
3094                 ds->name[BDEVNAME_SIZE - 1] = '\0';
3095                 for (p = ds->name; *p != '\0'; p++);
3096                 while (p > ds->name && *p != '/')
3097                         p--;
3098                 if (*p == '/')
3099                         p++;
3100                 strlcpy(ds->name, p, sizeof(ds->name));
3101                 btrfsic_dev_state_hashtable_add(ds,
3102                                                 &btrfsic_dev_state_hashtable);
3103         }
3104
3105         ret = btrfsic_process_superblock(state, fs_devices);
3106         if (0 != ret) {
3107                 mutex_unlock(&btrfsic_mutex);
3108                 btrfsic_unmount(root, fs_devices);
3109                 return ret;
3110         }
3111
3112         if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE)
3113                 btrfsic_dump_database(state);
3114         if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE)
3115                 btrfsic_dump_tree(state);
3116
3117         mutex_unlock(&btrfsic_mutex);
3118         return 0;
3119 }
3120
3121 void btrfsic_unmount(struct btrfs_root *root,
3122                      struct btrfs_fs_devices *fs_devices)
3123 {
3124         struct btrfsic_block *b_all, *tmp_all;
3125         struct btrfsic_state *state;
3126         struct list_head *dev_head = &fs_devices->devices;
3127         struct btrfs_device *device;
3128
3129         if (!btrfsic_is_initialized)
3130                 return;
3131
3132         mutex_lock(&btrfsic_mutex);
3133
3134         state = NULL;
3135         list_for_each_entry(device, dev_head, dev_list) {
3136                 struct btrfsic_dev_state *ds;
3137
3138                 if (!device->bdev || !device->name)
3139                         continue;
3140
3141                 ds = btrfsic_dev_state_hashtable_lookup(
3142                                 device->bdev,
3143                                 &btrfsic_dev_state_hashtable);
3144                 if (NULL != ds) {
3145                         state = ds->state;
3146                         btrfsic_dev_state_hashtable_remove(ds);
3147                         btrfsic_dev_state_free(ds);
3148                 }
3149         }
3150
3151         if (NULL == state) {
3152                 printk(KERN_INFO
3153                        "btrfsic: error, cannot find state information"
3154                        " on umount!\n");
3155                 mutex_unlock(&btrfsic_mutex);
3156                 return;
3157         }
3158
3159         /*
3160          * Don't care about keeping the lists' state up to date,
3161          * just free all memory that was allocated dynamically.
3162          * Free the blocks and the block_links.
3163          */
3164         list_for_each_entry_safe(b_all, tmp_all, &state->all_blocks_list,
3165                                  all_blocks_node) {
3166                 struct btrfsic_block_link *l, *tmp;
3167
3168                 list_for_each_entry_safe(l, tmp, &b_all->ref_to_list,
3169                                          node_ref_to) {
3170                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
3171                                 btrfsic_print_rem_link(state, l);
3172
3173                         l->ref_cnt--;
3174                         if (0 == l->ref_cnt)
3175                                 btrfsic_block_link_free(l);
3176                 }
3177
3178                 if (b_all->is_iodone || b_all->never_written)
3179                         btrfsic_block_free(b_all);
3180                 else
3181                         printk(KERN_INFO "btrfs: attempt to free %c-block"
3182                                " @%llu (%s/%llu/%d) on umount which is"
3183                                " not yet iodone!\n",
3184                                btrfsic_get_block_type(state, b_all),
3185                                b_all->logical_bytenr, b_all->dev_state->name,
3186                                b_all->dev_bytenr, b_all->mirror_num);
3187         }
3188
3189         mutex_unlock(&btrfsic_mutex);
3190
3191         kvfree(state);
3192 }