2 * Copyright 2012 Google, Inc.
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
14 * The boot cache device mapper reads a set of contiguously stored sectors.
15 * These sectors are copies of the sectors read during an earlier boot. Only
16 * small reads (less than some number of sectors) are selected for the cache,
17 * since this results in the highest benefit.
19 * The data for the boot cache consists of three sections:
20 * a header, the sector trace and the cache sectors.
21 * These are stored after the file system in the same partition.
23 * The boot cache is created by separate user process that reads a
24 * sector trace created if the boot cache is invalid.
26 #include <linux/async.h>
27 #include <linux/atomic.h>
28 #include <linux/delay.h>
29 #include <linux/device-mapper.h>
30 #include <linux/kernel.h>
31 #include <linux/module.h>
32 #include <linux/mutex.h>
33 #include <linux/sched.h>
34 #include <linux/slab.h>
35 #include <linux/workqueue.h>
38 #include "dm-bootcache.h"
40 #define DM_MSG_PREFIX "bootcache"
42 #define DEFAULT_MAX_PAGES 50000
43 #define DEFAULT_SIZE_LIMIT 128
44 #define DEFAULT_MAX_TRACE (1 << 13)
45 #define MAX_TRACE (1 << 20)
46 #define DEV_MODE FMODE_READ
47 #define SECTOR_SIZE (1 << SECTOR_SHIFT)
48 #define SECTORS_PER_PAGE (PAGE_SIZE / SECTOR_SIZE)
49 #define MAX_DEVICE_NAME (1 << 8)
60 struct bootcache_waiter {
61 struct completion completion;
65 struct bootcache_args {
66 /* Device being cached. The boot cache also stores its cache here. */
67 char device[MAX_DEVICE_NAME];
69 /* Identifies the data on the device. eg root hex digest from verity */
70 char signature[MAX_SIGNATURE];
72 /* Sector start of cache on device */
75 /* Max num of pages to cache */
78 /* Reads this size or larger will not be cached */
81 /* Maximum number of trace records to collect */
85 struct bootcache_stats {
86 unsigned num_requests; /* Read requests */
87 unsigned num_hits; /* Number of hits */
88 unsigned overlapped; /* Blocks used while reading rest */
91 struct bootcache_page {
92 struct bootcache_page *next;
94 u64 sector; /* first sector in set of sectors in this page */
98 struct bootcache_sector_map {
99 u32 num_buckets; /* Number of buckets for hash */
100 u32 num_pages; /* Number of pages of sectors */
101 struct bootcache_page *pages; /* Cache of pages of sectors */
102 struct bootcache_page *nextpage;/* Next page entry to add */
103 struct bootcache_page **bucket; /* Hash buckets */
107 const char *name; /* Taken from device being cached */
108 struct bootcache_stats stats;
109 struct bootcache_args args;
110 sector_t begin; /* Beginning sector of underlying device */
111 sector_t len; /* Length in sectors of underlying device */
112 atomic_t state; /* Cache state - needs atomic read */
113 spinlock_t trace_lock; /* Spin lock for trace table */
114 struct bootcache_trace *trace; /* Trace of blocks read during boot */
115 u32 trace_next; /* Next element to fill for tracing */
116 u32 max_io; /* Max pages we can read/write */
117 bool is_valid; /* The cache is valid */
118 bool is_free; /* The cache data has been freed */
119 struct kref kref; /* Protects in-flight operations */
120 struct dm_target *ti; /* Device in device mapper */
121 struct bio_set *bio_set; /* Set of bios for reading blocks */
122 struct dm_dev *dev; /* Device for both cache and data */
123 struct delayed_work work; /* Work that needs a thread */
124 struct mutex cache_lock; /* Locks everything in cache struct */
125 struct completion init_complete; /* Wait for initialization */
126 struct bootcache_sector_map sectors; /* Table of pages of sectors */
127 /* Sysfs files for managing the block cache */
128 struct bin_attribute valid; /* 1 -> valid 0 -> build cache */
129 struct bin_attribute free; /* Write '1' to free cache */
130 struct bin_attribute header; /* Content for bootcache header */
131 struct bin_attribute blocktrace;/* Trace of blocks accessed */
132 /* Computed hdr to be compared with on disk header. */
133 struct bootcache_hdr hdr;
136 static inline u64 bytes_to_pages(u64 bytes)
138 return (bytes + PAGE_SIZE - 1) >> PAGE_SHIFT;
141 static inline u64 sectors_to_pages(u64 sectors)
143 return sectors >> (PAGE_SHIFT - SECTOR_SHIFT);
146 static inline u64 pages_to_sectors(u64 pages)
148 return pages << (PAGE_SHIFT - SECTOR_SHIFT);
151 static void bootcache_bio_destructor(struct bio *bio)
153 struct bootcache *cache = bio->bi_private;
155 bio_free(bio, cache->bio_set);
158 static inline struct bootcache_page **bootcache_hash(
159 struct bootcache_sector_map *map,
162 return &map->bucket[(u32)sector % map->num_buckets];
165 static struct bootcache_page *bootcache_get_chunk(
166 struct bootcache_sector_map *map,
169 struct bootcache_page *next;
171 next = *bootcache_hash(map, sector);
173 if (sector == next->sector) {
184 struct bootcache_page *bootcache_new_chunk(struct bootcache_sector_map *map,
187 struct bootcache_page **bucket = bootcache_hash(map, sector);
188 struct bootcache_page *p;
190 if (map->nextpage == &map->pages[map->num_pages]) {
191 DMWARN("block cache full");
195 p->page = alloc_page(GFP_KERNEL);
202 static int build_sector_map(struct bootcache_sector_map *map, u32 num_pages)
204 map->num_pages = num_pages;
205 map->num_buckets = num_pages * 3 / 2;
206 map->bucket = kzalloc(map->num_buckets * sizeof(*map->bucket),
209 DMERR("build_sector_maps kzalloc buckets");
212 map->pages = kzalloc(num_pages * sizeof(*map->pages), GFP_KERNEL);
215 DMERR("build_sector_maps kzalloc pages");
218 map->nextpage = map->pages;
222 static void bootcache_free_sector_map(struct bootcache_sector_map *map)
224 struct bootcache_page *p;
226 for (p = map->pages; p < map->nextpage; p++)
228 __free_pages(p->page, 0);
236 static int bootcache_create_bin_file(struct bootcache *cache,
237 struct bin_attribute *attr, char *name, ssize_t size,
238 ssize_t (*read)(struct file *, struct kobject *,
239 struct bin_attribute *, char *, loff_t, size_t),
240 ssize_t (*write)(struct file *, struct kobject *,
241 struct bin_attribute *, char *, loff_t, size_t))
247 attr->attr.name = name;
248 attr->attr.mode = write ? 0644 : 0444;
253 rc = sysfs_create_bin_file(dm_kobject(dm_table_get_md(
254 cache->ti->table)), attr);
256 DMERR("sysfs_create_bin_file %s: %d", name, rc);
261 * bootcache_remove_bin_file uses the file name as flag
262 * to determine if the sysfs file has been created.
264 static void bootcache_remove_bin_file(struct bootcache *cache,
265 struct bin_attribute *attr)
267 if (attr->attr.name) {
268 sysfs_remove_bin_file(dm_kobject(dm_table_get_md(
269 cache->ti->table)), attr);
270 attr->attr.name = NULL;
275 * bootcache_remove_all_files removes all the sysfs files
276 * that have been created and only the ones that have been
279 static void bootcache_remove_all_files(struct bootcache *cache)
281 bootcache_remove_bin_file(cache, &cache->blocktrace);
282 bootcache_remove_bin_file(cache, &cache->header);
283 bootcache_remove_bin_file(cache, &cache->free);
284 bootcache_remove_bin_file(cache, &cache->valid);
287 static void bootcache_free_resources(struct kref *kref)
289 struct bootcache *cache = container_of(kref, struct bootcache,
291 /* Will hang if we try to remove cache->free here */
292 bootcache_remove_bin_file(cache, &cache->blocktrace);
293 bootcache_remove_bin_file(cache, &cache->header);
294 bootcache_remove_bin_file(cache, &cache->valid);
295 bootcache_free_sector_map(&cache->sectors);
301 * bootcache_get_ino returns the inode number of the bio if it has one.
302 * If not, it returns 0, an illegal inode number.
303 * When the bio is sent down for I/O, these fields don't change
304 * while the I/O is pending.
306 static unsigned long bootcache_get_ino(struct bio *bio)
312 if (!bio->bi_io_vec->bv_page)
314 if (!bio->bi_io_vec->bv_page->mapping)
316 if (!bio->bi_io_vec->bv_page->mapping->host)
318 return bio->bi_io_vec->bv_page->mapping->host->i_ino;
321 static void bootcache_record(struct bootcache *cache, struct bio *bio)
323 u64 sector = bio->bi_sector;
324 u64 count = to_sector(bio->bi_size);
325 struct bootcache_trace *tr;
329 spin_lock(&cache->trace_lock);
330 if (cache->trace_next < cache->args.max_trace) {
331 tr = &cache->trace[cache->trace_next];
334 tr->ino = bootcache_get_ino(bio);
337 spin_unlock(&cache->trace_lock);
340 static bool is_in_cache(struct bootcache *cache, struct bio *bio)
342 u64 sector = bio->bi_sector;
343 u32 count = bytes_to_pages(bio->bi_size);
346 for (i = 0; i < count; i++, sector += SECTORS_PER_PAGE) {
347 if (!bootcache_get_chunk(&cache->sectors, sector))
350 ++cache->stats.num_hits;
354 static void bootcache_read_from_cache(struct bootcache *cache, struct bio *bio)
356 struct bootcache_page *bp;
357 u64 sector = bio->bi_sector;
358 u32 count = bytes_to_pages(bio->bi_size);
363 for (i = 0; i < count; i++, sector += SECTORS_PER_PAGE) {
364 bp = bootcache_get_chunk(&cache->sectors, sector);
367 * Should have found it because we just
368 * looked for it before calling this code
370 DMCRIT("Didn't find block %llx", sector);
373 dst = kmap_atomic(bio_iovec_idx(bio, i)->bv_page);
374 src = kmap_atomic(bp->page);
375 memcpy(dst, src, PAGE_SIZE);
379 set_bit(BIO_UPTODATE, &bio->bi_flags);
380 bio->bi_end_io(bio, 0);
383 static void bootcache_read(struct bootcache *cache, struct bio *bio)
387 bio->bi_bdev = cache->dev->bdev;
388 /* Only record reads below the given size */
389 if ((atomic_read(&cache->state) == BC_BYPASS) ||
390 (to_sector(bio->bi_size) > cache->args.size_limit)) {
391 generic_make_request(bio);
394 kref_get(&cache->kref);
396 state = atomic_read(&cache->state);
399 wait_for_completion(&cache->init_complete);
402 bootcache_record(cache, bio);
403 generic_make_request(bio);
406 ++cache->stats.overlapped;
409 if (is_in_cache(cache, bio))
410 bootcache_read_from_cache(cache, bio);
412 generic_make_request(bio);
415 generic_make_request(bio);
418 DMCRIT("unknown state %d", state);
422 ++cache->stats.num_requests;
423 if (cache->stats.num_requests % 1000 == 0) {
424 DMINFO("hits = %u / %u",
425 cache->stats.num_hits,
426 cache->stats.num_requests);
428 kref_put(&cache->kref, bootcache_free_resources);
431 static ssize_t valid_read(struct file *file, struct kobject *kobp,
432 struct bin_attribute *bin_attr, char *buf,
433 loff_t pos, size_t count)
435 struct bootcache *cache = container_of(bin_attr, struct bootcache,
438 if (pos > 0 || count == 0)
440 buf[0] = cache->is_valid ? '1' : '0';
444 static ssize_t free_read(struct file *file, struct kobject *kobp,
445 struct bin_attribute *bin_attr, char *buf,
446 loff_t pos, size_t count)
448 struct bootcache *cache = container_of(bin_attr, struct bootcache,
451 if (pos > 0 || count == 0)
453 buf[0] = cache->is_free ? '1' : '0';
457 static ssize_t free_write(struct file *file, struct kobject *kobp,
458 struct bin_attribute *bin_attr, char *buf,
459 loff_t pos, size_t count)
461 struct bootcache *cache = container_of(bin_attr, struct bootcache,
465 mutex_lock(&cache->cache_lock);
466 if (cache->is_free) {
470 atomic_set(&cache->state, BC_BYPASS);
472 * Once BC_BYPASS is set, the system
473 * should drain quickly.
475 kref_put(&cache->kref, bootcache_free_resources);
477 /* Tell caller we wrote everything */
480 mutex_unlock(&cache->cache_lock);
484 static ssize_t header_read(struct file *file, struct kobject *kobp,
485 struct bin_attribute *bin_attr, char *buf,
486 loff_t pos, size_t count)
488 struct bootcache *cache = container_of(bin_attr, struct bootcache,
491 return memory_read_from_buffer(buf, count, &pos, &cache->hdr,
495 static ssize_t blocktrace_read(struct file *file, struct kobject *kobp,
496 struct bin_attribute *bin_attr, char *buf,
497 loff_t pos, size_t count)
499 struct bootcache *cache = container_of(bin_attr, struct bootcache,
505 kref_get(&cache->kref);
506 if (atomic_read(&cache->state) != BC_TRACING) {
510 data = (char *)cache->trace;
512 spin_lock(&cache->trace_lock);
513 next = cache->trace_next;
514 spin_unlock(&cache->trace_lock);
516 size = next * sizeof(struct bootcache_trace);
518 err = memory_read_from_buffer(buf, count, &pos, data, size);
520 kref_put(&cache->kref, bootcache_free_resources);
524 static int bootcache_init_sysfs(struct bootcache *cache, struct dm_target *ti)
528 rc = bootcache_create_bin_file(cache, &cache->valid, "valid",
529 3, valid_read, NULL);
532 rc = bootcache_create_bin_file(cache, &cache->free, "free",
533 3, free_read, free_write);
536 rc = bootcache_create_bin_file(cache, &cache->header, "header",
537 sizeof(cache->hdr), header_read, NULL);
540 rc = bootcache_create_bin_file(cache, &cache->blocktrace, "blocktrace",
541 cache->args.max_trace * sizeof(struct bootcache_trace),
542 blocktrace_read, NULL);
547 bootcache_remove_all_files(cache);
551 static void bootcache_read_sectors_end(struct bio *bio, int error)
553 struct bootcache_waiter *waiter = bio->bi_private;
555 if (unlikely(error)) {
556 waiter->error = error;
557 DMERR("Error occurred in bootcache_read_sectors:"
559 error, (u64)bio->bi_sector, bio->bi_size);
561 complete(&waiter->completion);
564 static int bootcache_read_sectors(struct bootcache *cache)
566 struct bootcache_waiter waiter;
568 struct bootcache_page *p;
569 struct bootcache_page *start_page;
570 struct bio_vec *bvec;
571 sector_t sector = cache->args.cache_start + cache->hdr.sectors_meta +
573 u32 max_io = cache->max_io;
574 u32 numpages = cache->sectors.num_pages;
575 u32 chunks_to_read = (numpages + max_io - 1) / max_io;
580 bio = bio_alloc_bioset(GFP_KERNEL, max_io, cache->bio_set);
581 if (unlikely(!bio)) {
582 DMERR("Out of memory bio_alloc_bioset");
585 bio->bi_private = &waiter;
586 bio->bi_destructor = bootcache_bio_destructor;
587 p = cache->sectors.pages;
588 for (i = 0; i < chunks_to_read; i++) {
590 bio->bi_bdev = cache->dev->bdev;
591 bio->bi_end_io = bootcache_read_sectors_end;
593 bio->bi_sector = sector;
594 bvec = bio->bi_io_vec;
596 for (j = 0; j < max_io; j++, bvec++, p++) {
597 if (p == cache->sectors.nextpage)
599 bvec->bv_page = p->page;
601 bvec->bv_len = PAGE_SIZE;
603 bio->bi_size = j * PAGE_SIZE;
606 init_completion(&waiter.completion);
608 generic_make_request(bio);
609 wait_for_completion(&waiter.completion);
615 for (j = 0; j < max_io; j++, p++) {
616 if (p == cache->sectors.nextpage)
620 sector += pages_to_sectors(j);
622 bio->bi_private = cache;
624 atomic_set(&cache->state, BC_FILLED);
628 static void bootcache_dev_read_end(struct bio *bio, int error)
630 struct bootcache_waiter *waiter = bio->bi_private;
632 if (unlikely(error)) {
633 waiter->error = error;
634 DMERR("Error occurred in bootcache_dev_read: %d (%llx, %x)",
635 error, (u64)bio->bi_sector, bio->bi_size);
637 complete(&waiter->completion);
640 static int bootcache_dev_read(struct bootcache *cache, void *data,
643 struct bootcache_waiter waiter;
645 struct bio_vec *bvec;
646 int pages_to_read = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
647 int max_io = cache->max_io;
656 if (pages_to_read < max_io)
657 num_pages = pages_to_read;
660 bio = bio_alloc_bioset(GFP_KERNEL, num_pages, cache->bio_set);
661 if (unlikely(!bio)) {
662 DMERR("Out of memory bio_alloc_bioset");
665 bvec = bio->bi_io_vec;
666 for (i = 0; i < num_pages; i++, bvec++)
667 bvec->bv_page = alloc_page(GFP_KERNEL);
668 bio->bi_private = &waiter;
669 bio->bi_destructor = bootcache_bio_destructor;
672 if (pages_to_read < max_io)
673 max_io = pages_to_read;
675 bio->bi_bdev = cache->dev->bdev;
676 bio->bi_end_io = bootcache_dev_read_end;
678 bio->bi_sector = sector;
679 bvec = bio->bi_io_vec;
680 for (i = 0; i < max_io; i++, bvec++) {
682 bvec->bv_len = PAGE_SIZE;
684 pages_to_read -= max_io;
685 bio->bi_size = max_io * PAGE_SIZE;
686 bio->bi_vcnt = max_io;
688 init_completion(&waiter.completion);
690 generic_make_request(bio);
691 wait_for_completion(&waiter.completion);
696 for (i = 0; i < max_io; i++) {
697 bytes_to_copy = (len < PAGE_SIZE) ? len : PAGE_SIZE;
698 src = kmap_atomic(bio_iovec_idx(bio, i)->bv_page);
699 memcpy(dst, src, bytes_to_copy);
701 len -= bytes_to_copy;
704 dst += bytes_to_copy;
706 sector += pages_to_sectors(max_io);
709 bvec = bio->bi_io_vec;
710 for (i = 0; i < num_pages; i++, bvec++)
711 __free_pages(bvec->bv_page, 0);
712 bio->bi_private = cache;
717 static int is_valid_hdr(struct bootcache *cache, struct bootcache_hdr *hdr)
720 u64 max_meta_sectors;
722 if (hdr->magic != BOOTCACHE_MAGIC)
724 if (hdr->version != BOOTCACHE_VERSION)
726 if (hdr->max_sectors != cache->hdr.max_sectors)
728 if (hdr->max_hw_sectors != cache->hdr.max_hw_sectors)
730 if (strncmp(hdr->date, __DATE__, strlen(__DATE__) + 1) != 0)
732 if (strncmp(hdr->time, __TIME__, strlen(__TIME__) + 1) != 0)
734 if (strncmp(hdr->signature, cache->hdr.signature,
735 sizeof(hdr->signature)) != 0)
739 * Can't have any more meta sectors than it takes to map
740 * the remaining parition space for bootcache.
742 max_sectors = to_sector(i_size_read(cache->dev->bdev->bd_inode))
743 - cache->args.cache_start;
744 max_meta_sectors = to_sector(round_up(
745 sectors_to_pages(max_sectors) * sizeof(u64), SECTOR_SIZE));
746 if (hdr->sectors_meta > max_meta_sectors) {
747 DMERR("too many meta sectors %lld", (u64)hdr->sectors_meta);
750 if (hdr->sectors_data > max_sectors - hdr->sectors_meta - 1) {
751 DMERR("bootcache too big %lld", (u64)hdr->sectors_data);
757 static int read_trace(struct bootcache *cache)
765 size_trace = sizeof(*cache->trace) * cache->hdr.num_trace_recs;
766 cache->trace = kzalloc(size_trace, GFP_KERNEL);
768 DMERR("read_trace out of memory");
771 rc = bootcache_dev_read(cache, cache->trace, size_trace,
772 cache->hdr.sector + SECTORS_PER_PAGE);
774 DMERR("bootcache_dev_read trace %d", rc);
777 for (i = 0; i < cache->hdr.num_trace_recs; i++) {
778 struct bootcache_trace *tr;
779 tr = &cache->trace[i];
780 for (j = 0; j < tr->count; j += SECTORS_PER_PAGE) {
781 bootcache_new_chunk(&cache->sectors, tr->sector + j);
791 * Reads the bootcache header from disk, checks if it is valid
793 * read the sector trace from disk
794 * build hash table for sector trace on page boundaries
795 * begin reading in sectors to be cached
797 * setup to capture trace of sectors
799 * on error: by pass boot cache
801 static void bootcache_start(struct work_struct *work)
803 struct bootcache *cache = container_of(work, struct bootcache,
805 struct bootcache_hdr hdr;
808 rc = bootcache_dev_read(cache, &hdr, sizeof(hdr), cache->hdr.sector);
810 DMERR("bootcache_dev_read hdr %d", rc);
813 if (is_valid_hdr(cache, &hdr)) {
815 memcpy(&cache->hdr, &hdr, sizeof(cache->hdr));
816 rc = build_sector_map(&cache->sectors,
817 sectors_to_pages(cache->hdr.sectors_data));
820 rc = read_trace(cache);
823 atomic_set(&cache->state, BC_FILLING);
824 rc = bootcache_read_sectors(cache);
828 atomic_set(&cache->state, BC_TRACING);
829 cache->trace = kzalloc(sizeof(*cache->trace) *
830 cache->args.max_trace, GFP_KERNEL);
832 DMERR("cache->trace out of memory");
837 complete_all(&cache->init_complete);
840 DMERR("error occured starting bootcache, setting to by pass mode");
841 atomic_set(&cache->state, BC_BYPASS);
847 * bootcache_max_io determines the maximum number of pages that can
848 * be passed in one read request to the underlying device.
849 * @cache: the max_sectors and max_hw_sectors must
851 * @proposed_max_io: maxium number of pages the caller wants
854 * Returns maximum number of pages that can be read but
855 * no more than proposed_max_io
857 static u32 bootcache_max_io(struct bootcache *cache, u32 proposed_max_io)
862 max_sectors = min(cache->hdr.max_sectors, cache->hdr.max_hw_sectors);
863 max_pages = sectors_to_pages(max_sectors);
864 if (proposed_max_io < max_pages)
865 max_pages = proposed_max_io;
869 static void bootcache_init_hdr(struct bootcache_hdr *hdr, u64 cache_start,
870 struct block_device *bdev, const char *signature)
872 hdr->sector = cache_start;
873 hdr->magic = BOOTCACHE_MAGIC;
874 hdr->version = BOOTCACHE_VERSION;
875 hdr->state = BC_INIT;
876 hdr->alignment = PAGE_SIZE;
877 hdr->max_hw_sectors = queue_max_hw_sectors(bdev_get_queue(bdev));
878 hdr->max_sectors = queue_max_sectors(bdev_get_queue(bdev));
879 strncpy(hdr->date, __DATE__, sizeof(hdr->date));
880 strncpy(hdr->time, __TIME__, sizeof(hdr->time));
881 strncpy(hdr->signature, signature, sizeof(hdr->signature));
885 * match_dev_by_uuid - callback for finding a partition using its uuid
886 * @dev: device passed in by the caller
887 * @uuid_data: opaque pointer to a uuid packed by part_pack_uuid().
889 * Returns 1 if the device matches, and 0 otherwise.
891 static int match_dev_by_uuid(struct device *dev, void *uuid_data)
893 u8 *uuid = uuid_data;
894 struct hd_struct *part = dev_to_part(dev);
899 if (memcmp(uuid, part->info->uuid, sizeof(part->info->uuid)))
908 * dm_get_device_by_uuid: claim a device using its UUID
909 * @ti: current dm_target
910 * @uuid_string: 36 byte UUID hex encoded
911 * (xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx)
912 * @dev_start: offset in sectors passed to dm_get_device
913 * @dev_len: length in sectors passed to dm_get_device
914 * @dm_dev: dm_dev to populate
916 * Wraps dm_get_device allowing it to use a unique partition id
917 * to find a given partition on any drive. This code is based on
918 * printk_all_partitions in that it walks all of the registered
921 * N.B., uuid_string is not checked for safety just strlen().
923 static int dm_get_device_by_uuid(struct dm_target *ti, const char *uuid_str,
924 sector_t dev_start, sector_t dev_len,
925 struct dm_dev **dm_dev)
927 struct device *dev = NULL;
929 char devt_buf[BDEVT_SIZE];
931 size_t uuid_length = strlen(uuid_str);
933 if (uuid_length < 36)
935 /* Pack the requested UUID in the expected format. */
936 part_pack_uuid(uuid_str, uuid);
938 dev = class_find_device(&block_class, NULL, uuid, &match_dev_by_uuid);
945 /* The caller may specify +/-%u after the UUID if they want a partition
946 * before or after the one identified.
948 if (uuid_length > 36) {
949 unsigned int part_offset;
951 unsigned minor = MINOR(devt);
952 if (sscanf(uuid_str + 36, "%c%u", &sign, &part_offset) == 2) {
954 minor += part_offset;
955 } else if (sign == '-') {
956 minor -= part_offset;
958 DMWARN("Trailing characters after UUID: %s\n",
961 devt = MKDEV(MAJOR(devt), minor);
965 /* Construct the dev name to pass to dm_get_device. dm_get_device
966 * doesn't support being passed a dev_t.
968 snprintf(devt_buf, sizeof(devt_buf), "%u:%u",
969 MAJOR(devt), MINOR(devt));
971 /* TODO(wad) to make this generic we could also pass in the mode. */
972 if (!dm_get_device(ti, devt_buf, dm_table_get_mode(ti->table), dm_dev))
975 ti->error = "Failed to acquire device";
976 DMDEBUG("Failed to acquire discovered device %s", devt_buf);
979 ti->error = "Bad UUID";
980 DMDEBUG("Supplied value '%s' is an invalid UUID", uuid_str);
983 DMDEBUG("No matching partition for GUID: %s", uuid_str);
984 ti->error = "No matching GUID";
988 static int bootcache_get_device(
989 struct dm_target *ti,
993 struct dm_dev **dm_dev)
996 /* Try the normal path first since if everything is ready, it
997 * will be the fastest.
999 if (!dm_get_device(ti, devname,
1000 dm_table_get_mode(ti->table), dm_dev))
1003 /* Try the device by partition UUID */
1004 if (!dm_get_device_by_uuid(ti, devname, dev_start, dev_len,
1008 /* No need to be too aggressive since this is a slow path. */
1010 } while (driver_probe_done() != 0 || *dm_dev == NULL);
1011 async_synchronize_full();
1016 * bootcache_ctr - Construct a boot cache
1017 * @ti: Target being created
1018 * @argc: Number of elements in argv
1019 * @argv: Vector of arguments - All arguments are positional, this
1020 * means that to set a particular argument, all of its
1021 * predecessors must be present.
1023 * Accepts the folowing parametes [defaults in brackets]:
1024 * @device: Device being cached. The boot cache is alsoe stored here.
1025 * @cache_start: Sector start on the device for the boot cache.
1026 * @signature: Signature to determine if cache is valid.
1027 * @size_limit: In sectors, max size reads to include in cache [128]
1028 * @max_trace: Number of entries in block trace made during boot [8192]
1029 * @max_pages: Maximum number of pages to cache in memory [50000]
1032 * [<dev> [<cache_start> [<sig> [<size_limit> [<max_trace> [<max_limit>]]]]]]
1035 * 0f5dbd05-c063-a848-a296-b8b8c2c24b28+1 1741200 10e8...78 80 64000 60000
1037 static int bootcache_ctr(struct dm_target *ti, unsigned argc, char **argv)
1039 struct bootcache *cache = NULL;
1040 const char *signature = NULL;
1041 const char *device = NULL;
1042 u64 cache_start = 0;
1043 u64 max_pages = DEFAULT_MAX_PAGES;
1044 u64 size_limit = DEFAULT_SIZE_LIMIT;
1045 u64 max_trace = DEFAULT_MAX_TRACE;
1051 if (strict_strtoull(argv[1], 10, &cache_start)) {
1052 ti->error = "Invalid cache_start";
1056 signature = argv[2];
1058 if (strict_strtoull(argv[3], 10, &size_limit)) {
1059 ti->error = "Invalid size_limit";
1063 if (strict_strtoull(argv[4], 10, &max_trace)) {
1064 ti->error = "Invalid max_trace";
1068 if (strict_strtoull(argv[5], 10, &max_pages)) {
1069 ti->error = "Invalid max_pages";
1073 #define NEEDARG(n) \
1075 ti->error = "Missing argument: " #n; \
1081 NEEDARG(cache_start);
1084 if ((dm_table_get_mode(ti->table) & DEV_MODE) != DEV_MODE) {
1085 ti->error = "Must be created read only.";
1089 cache = kzalloc(sizeof(*cache), GFP_KERNEL);
1092 init_completion(&cache->init_complete);
1095 strlcpy(cache->args.device, device, sizeof(cache->args.device));
1096 strlcpy(cache->args.signature, signature,
1097 sizeof(cache->args.signature));
1098 cache->args.cache_start = cache_start;
1099 cache->args.max_pages = max_pages;
1100 cache->args.size_limit = size_limit;
1101 if (max_trace > MAX_TRACE) {
1102 DMWARN("max_trace too large %llu, setting to %d\n",
1103 max_trace, MAX_TRACE);
1104 max_trace = MAX_TRACE;
1106 cache->args.max_trace = max_trace;
1108 cache->begin = ti->begin;
1109 cache->len = ti->len;
1111 atomic_set(&cache->state, BC_INIT);
1112 kref_init(&cache->kref);
1113 mutex_init(&cache->cache_lock);
1114 spin_lock_init(&cache->trace_lock);
1116 /* For the name, use the device default with / changed to _ */
1117 cache->name = dm_disk(dm_table_get_md(ti->table))->disk_name;
1119 if (bootcache_init_sysfs(cache, ti))
1122 rc = bootcache_get_device(ti, device,
1123 ti->begin, ti->len, &cache->dev);
1125 DMERR("Failed to acquire device '%s': %d", device, rc);
1126 ti->error = "Device lookup failed";
1130 bootcache_init_hdr(&cache->hdr, cache_start,
1131 cache->dev->bdev, signature);
1132 cache->max_io = bootcache_max_io(cache, BIO_MAX_PAGES);
1134 /* Allocate the bioset used for request padding */
1135 cache->bio_set = bioset_create(cache->max_io * 4, 0);
1136 if (!cache->bio_set) {
1137 ti->error = "Cannot allocate verity bioset";
1141 ti->num_flush_requests = 1;
1142 ti->private = cache;
1145 char vdev[BDEVNAME_SIZE];
1146 bdevname(cache->dev->bdev, vdev);
1147 DMINFO("dev:%s", vdev);
1149 INIT_WORK(&cache->work.work, bootcache_start);
1150 schedule_work(&cache->work.work);
1152 DMINFO("cache:%p", cache);
1156 dm_put_device(ti, cache->dev);
1158 bootcache_remove_all_files(cache);
1160 kfree(cache); /* hash is not secret so no need to zero */
1165 static int bootcache_status(struct dm_target *ti, status_type_t type,
1166 char *result, uint maxlen)
1168 struct bootcache *cache = (struct bootcache *) ti->private;
1170 char vdev[BDEVNAME_SIZE];
1173 case STATUSTYPE_INFO:
1175 cache->stats.num_requests,
1176 cache->stats.num_hits,
1177 cache->stats.overlapped);
1180 case STATUSTYPE_TABLE:
1181 bdevname(cache->dev->bdev, vdev);
1182 DMEMIT("/dev/%s signature=%s cache_start=%llu max_pages=%llu"
1183 " size_limit=%llu max_trace=%llu\n",
1185 cache->args.signature,
1186 cache->args.cache_start,
1187 cache->args.max_pages,
1188 cache->args.size_limit,
1189 cache->args.max_trace);
1195 static void bootcache_dtr(struct dm_target *ti)
1198 * Doesn't have to clean-up the meta files in sysfs
1199 * because the device mapper has already done it.
1201 struct bootcache *cache = (struct bootcache *)ti->private;
1203 DMDEBUG("Destroying bio set");
1204 bioset_free(cache->bio_set);
1206 DMDEBUG("Putting dev");
1207 dm_put_device(ti, cache->dev);
1209 DMDEBUG("Destroying config");
1213 static int bootcache_map(struct dm_target *ti, struct bio *bio,
1214 union map_info *map_context)
1216 bootcache_read(ti->private, bio);
1217 return DM_MAPIO_SUBMITTED;
1220 static int bootcache_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
1221 struct bio_vec *biovec, int max_size)
1223 struct bootcache *cache = ti->private;
1224 struct request_queue *q = bdev_get_queue(cache->dev->bdev);
1226 if (!q->merge_bvec_fn)
1229 bvm->bi_bdev = cache->dev->bdev;
1230 bvm->bi_sector = cache->begin +
1231 bvm->bi_sector - ti->begin;
1233 /* Optionally, this could just return 0 to stick to single pages. */
1234 return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
1237 static int bootcache_iterate_devices(struct dm_target *ti,
1238 iterate_devices_callout_fn fn, void *data)
1240 struct bootcache *cache = ti->private;
1242 return fn(ti, cache->dev, cache->begin, ti->len, data);
1245 static void bootcache_io_hints(struct dm_target *ti,
1246 struct queue_limits *limits)
1248 limits->logical_block_size = PAGE_SIZE;
1249 limits->physical_block_size = PAGE_SIZE;
1250 blk_limits_io_min(limits, PAGE_SIZE);
1253 static struct target_type bootcache_target = {
1254 .name = "bootcache",
1255 .version = {0, 1, 0},
1256 .module = THIS_MODULE,
1257 .ctr = bootcache_ctr,
1258 .dtr = bootcache_dtr,
1259 .map = bootcache_map,
1260 .merge = bootcache_merge,
1261 .status = bootcache_status,
1262 .iterate_devices = bootcache_iterate_devices,
1263 .io_hints = bootcache_io_hints,
1266 static int __init dm_bootcache_init(void)
1270 rc = dm_register_target(&bootcache_target);
1272 DMERR("register failed %d", rc);
1273 goto register_failed;
1276 DMINFO("version %u.%u.%u loaded", bootcache_target.version[0],
1277 bootcache_target.version[1], bootcache_target.version[2]);
1285 static void __exit dm_bootcache_exit(void)
1287 dm_unregister_target(&bootcache_target);
1290 module_init(dm_bootcache_init);
1291 module_exit(dm_bootcache_exit);
1293 MODULE_AUTHOR("Paul Taysom <taysom@chromium.org>");
1294 MODULE_DESCRIPTION(DM_NAME "read cache");
1295 MODULE_LICENSE("GPL");