2 * Copyright 2012 Google, Inc.
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
14 * The boot cache device mapper reads a set of contiguously stored sectors.
15 * These sectors are copies of the sectors read during an earlier boot. Only
16 * small reads (less than some number of sectors) are selected for the cache,
17 * since this results in the highest benefit.
19 * The data for the boot cache consists of three sections:
20 * a header, the sector trace and the cache sectors.
21 * These are stored after the file system in the same partition.
23 * The boot cache is created by separate user process that reads a
24 * sector trace created if the boot cache is invalid.
26 #include <linux/async.h>
27 #include <linux/atomic.h>
28 #include <linux/delay.h>
29 #include <linux/device-mapper.h>
30 #include <linux/kernel.h>
31 #include <linux/module.h>
32 #include <linux/mutex.h>
33 #include <linux/sched.h>
34 #include <linux/slab.h>
35 #include <linux/workqueue.h>
38 #include "dm-bootcache.h"
40 #define DM_MSG_PREFIX "bootcache"
42 #define DEFAULT_MAX_PAGES 50000
43 #define DEFAULT_SIZE_LIMIT 128
44 #define DEFAULT_MAX_TRACE (1 << 13)
45 #define MAX_TRACE (1 << 20)
46 #define DEV_MODE FMODE_READ
47 #define SECTOR_SIZE (1 << SECTOR_SHIFT)
48 #define SECTORS_PER_PAGE (PAGE_SIZE / SECTOR_SIZE)
49 #define MAX_DEVICE_NAME (1 << 8)
60 struct bootcache_waiter {
61 struct completion completion;
65 struct bootcache_args {
66 /* Device being cached. The boot cache also stores its cache here. */
67 char device[MAX_DEVICE_NAME];
69 /* Identifies the data on the device. eg root hex digest from verity */
70 char signature[MAX_SIGNATURE];
72 /* Sector start of cache on device */
75 /* Max num of pages to cache */
78 /* Reads this size or larger will not be cached */
81 /* Maximum number of trace records to collect */
85 struct bootcache_stats {
86 unsigned num_requests; /* Read requests */
87 unsigned num_hits; /* Number of hits */
88 unsigned overlapped; /* Blocks used while reading rest */
91 struct bootcache_page {
92 struct bootcache_page *next;
94 u64 sector; /* first sector in set of sectors in this page */
98 struct bootcache_sector_map {
99 u32 num_buckets; /* Number of buckets for hash */
100 u32 num_pages; /* Number of pages of sectors */
101 struct bootcache_page *pages; /* Cache of pages of sectors */
102 struct bootcache_page *nextpage;/* Next page entry to add */
103 struct bootcache_page **bucket; /* Hash buckets */
107 const char *name; /* Taken from device being cached */
108 struct bootcache_stats stats;
109 struct bootcache_args args;
110 sector_t begin; /* Beginning sector of underlying device */
111 sector_t len; /* Length in sectors of underlying device */
112 atomic_t state; /* Cache state - needs atomic read */
113 spinlock_t trace_lock; /* Spin lock for trace table */
114 struct bootcache_trace *trace; /* Trace of blocks read during boot */
115 u32 trace_next; /* Next element to fill for tracing */
116 u32 max_io; /* Max pages we can read/write */
117 bool is_valid; /* The cache is valid */
118 bool is_free; /* The cache data has been freed */
119 struct kref kref; /* Protects in-flight operations */
120 struct dm_target *ti; /* Device in device mapper */
121 struct bio_set *bio_set; /* Set of bios for reading blocks */
122 struct dm_dev *dev; /* Device for both cache and data */
123 struct delayed_work work; /* Work that needs a thread */
124 struct mutex cache_lock; /* Locks everything in cache struct */
125 struct completion init_complete; /* Wait for initialization */
126 struct bootcache_sector_map sectors; /* Table of pages of sectors */
127 /* Sysfs files for managing the block cache */
128 struct bin_attribute valid; /* 1 -> valid 0 -> build cache */
129 struct bin_attribute free; /* Write '1' to free cache */
130 struct bin_attribute header; /* Content for bootcache header */
131 struct bin_attribute blocktrace;/* Trace of blocks accessed */
132 /* Computed hdr to be compared with on disk header. */
133 struct bootcache_hdr hdr;
136 static inline u64 bytes_to_pages(u64 bytes)
138 return (bytes + PAGE_SIZE - 1) >> PAGE_SHIFT;
141 static inline u64 sectors_to_pages(u64 sectors)
143 return sectors >> (PAGE_SHIFT - SECTOR_SHIFT);
146 static inline u64 pages_to_sectors(u64 pages)
148 return pages << (PAGE_SHIFT - SECTOR_SHIFT);
151 static void bootcache_bio_destructor(struct bio *bio)
153 struct bootcache *cache = bio->bi_private;
155 bio_free(bio, cache->bio_set);
158 static inline struct bootcache_page **bootcache_hash(
159 struct bootcache_sector_map *map,
162 return &map->bucket[(u32)sector % map->num_buckets];
165 static struct bootcache_page *bootcache_get_chunk(
166 struct bootcache_sector_map *map,
169 struct bootcache_page *next;
171 next = *bootcache_hash(map, sector);
173 if (sector == next->sector) {
184 struct bootcache_page *bootcache_new_chunk(struct bootcache_sector_map *map,
187 struct bootcache_page **bucket = bootcache_hash(map, sector);
188 struct bootcache_page *p;
190 if (map->nextpage == &map->pages[map->num_pages]) {
191 DMWARN("block cache full");
195 p->page = alloc_page(GFP_KERNEL);
202 static int build_sector_map(struct bootcache_sector_map *map, u32 num_pages)
204 map->num_pages = num_pages;
205 map->num_buckets = num_pages * 3 / 2;
206 map->bucket = kzalloc(map->num_buckets * sizeof(*map->bucket),
209 DMERR("build_sector_maps kzalloc buckets");
212 map->pages = kzalloc(num_pages * sizeof(*map->pages), GFP_KERNEL);
215 DMERR("build_sector_maps kzalloc pages");
218 map->nextpage = map->pages;
222 static void bootcache_free_sector_map(struct bootcache_sector_map *map)
224 struct bootcache_page *p;
226 for (p = map->pages; p < map->nextpage; p++)
228 __free_pages(p->page, 0);
236 static int bootcache_create_bin_file(struct bootcache *cache,
237 struct bin_attribute *attr, char *name, ssize_t size,
238 ssize_t (*read)(struct file *, struct kobject *,
239 struct bin_attribute *, char *, loff_t, size_t),
240 ssize_t (*write)(struct file *, struct kobject *,
241 struct bin_attribute *, char *, loff_t, size_t))
247 attr->attr.name = name;
248 attr->attr.mode = write ? 0644 : 0444;
253 rc = sysfs_create_bin_file(dm_kobject(dm_table_get_md(
254 cache->ti->table)), attr);
256 DMERR("sysfs_create_bin_file %s: %d", name, rc);
261 * bootcache_remove_bin_file uses the file name as flag
262 * to determine if the sysfs file has been created.
264 static void bootcache_remove_bin_file(struct bootcache *cache,
265 struct bin_attribute *attr)
267 if (attr->attr.name) {
268 sysfs_remove_bin_file(dm_kobject(dm_table_get_md(
269 cache->ti->table)), attr);
270 attr->attr.name = NULL;
275 * bootcache_remove_all_files removes all the sysfs files
276 * that have been created and only the ones that have been
279 static void bootcache_remove_all_files(struct bootcache *cache)
281 bootcache_remove_bin_file(cache, &cache->blocktrace);
282 bootcache_remove_bin_file(cache, &cache->header);
283 bootcache_remove_bin_file(cache, &cache->free);
284 bootcache_remove_bin_file(cache, &cache->valid);
287 static void bootcache_free_resources(struct kref *kref)
289 struct bootcache *cache = container_of(kref, struct bootcache,
291 /* Will hang if we try to remove cache->free here */
292 bootcache_remove_bin_file(cache, &cache->blocktrace);
293 bootcache_remove_bin_file(cache, &cache->header);
294 bootcache_remove_bin_file(cache, &cache->valid);
295 bootcache_free_sector_map(&cache->sectors);
301 * bootcache_get_ino returns the inode number of the bio if it has one.
302 * If not, it returns 0, an illegal inode number.
303 * When the bio is sent down for I/O, these fields don't change
304 * while the I/O is pending.
306 static unsigned long bootcache_get_ino(struct bio *bio)
312 if (!bio->bi_io_vec->bv_page)
314 if (!bio->bi_io_vec->bv_page->mapping)
316 if (!bio->bi_io_vec->bv_page->mapping->host)
318 return bio->bi_io_vec->bv_page->mapping->host->i_ino;
321 static void bootcache_record(struct bootcache *cache, struct bio *bio)
323 u64 sector = bio->bi_sector;
324 u64 count = to_sector(bio->bi_size);
325 struct bootcache_trace *tr;
329 spin_lock(&cache->trace_lock);
330 if (cache->trace_next < cache->args.max_trace) {
331 tr = &cache->trace[cache->trace_next];
334 tr->ino = bootcache_get_ino(bio);
337 spin_unlock(&cache->trace_lock);
340 static bool is_in_cache(struct bootcache *cache, struct bio *bio)
342 u64 sector = bio->bi_sector;
343 u32 count = bytes_to_pages(bio->bi_size);
346 for (i = 0; i < count; i++, sector += SECTORS_PER_PAGE) {
347 if (!bootcache_get_chunk(&cache->sectors, sector))
350 ++cache->stats.num_hits;
354 static void bootcache_read_from_cache(struct bootcache *cache, struct bio *bio)
356 struct bootcache_page *bp;
357 u64 sector = bio->bi_sector;
358 u32 count = bytes_to_pages(bio->bi_size);
363 for (i = 0; i < count; i++, sector += SECTORS_PER_PAGE) {
364 bp = bootcache_get_chunk(&cache->sectors, sector);
367 * Should have found it because we just
368 * looked for it before calling this code
370 DMCRIT("Didn't find block %llx", sector);
373 dst = kmap_atomic(bio_iovec_idx(bio, i)->bv_page);
374 src = kmap_atomic(bp->page);
375 memcpy(dst, src, PAGE_SIZE);
379 set_bit(BIO_UPTODATE, &bio->bi_flags);
380 bio->bi_end_io(bio, 0);
383 static void bootcache_read(struct bootcache *cache, struct bio *bio)
387 bio->bi_bdev = cache->dev->bdev;
388 /* Only record reads below the given size */
389 if ((atomic_read(&cache->state) == BC_BYPASS) ||
390 (to_sector(bio->bi_size) > cache->args.size_limit)) {
391 generic_make_request(bio);
394 kref_get(&cache->kref);
396 state = atomic_read(&cache->state);
399 wait_for_completion(&cache->init_complete);
402 bootcache_record(cache, bio);
403 generic_make_request(bio);
406 ++cache->stats.overlapped;
409 if (is_in_cache(cache, bio))
410 bootcache_read_from_cache(cache, bio);
412 generic_make_request(bio);
415 generic_make_request(bio);
418 DMCRIT("unknown state %d", state);
422 ++cache->stats.num_requests;
423 if (cache->stats.num_requests % 1000 == 0) {
424 DMINFO("hits = %u / %u",
425 cache->stats.num_hits,
426 cache->stats.num_requests);
428 kref_put(&cache->kref, bootcache_free_resources);
431 static ssize_t valid_read(struct file *file, struct kobject *kobp,
432 struct bin_attribute *bin_attr, char *buf,
433 loff_t pos, size_t count)
435 struct bootcache *cache = container_of(bin_attr, struct bootcache,
438 if (pos > 0 || count == 0)
440 buf[0] = cache->is_valid ? '1' : '0';
444 static ssize_t free_read(struct file *file, struct kobject *kobp,
445 struct bin_attribute *bin_attr, char *buf,
446 loff_t pos, size_t count)
448 struct bootcache *cache = container_of(bin_attr, struct bootcache,
451 if (pos > 0 || count == 0)
453 buf[0] = cache->is_free ? '1' : '0';
457 static ssize_t free_write(struct file *file, struct kobject *kobp,
458 struct bin_attribute *bin_attr, char *buf,
459 loff_t pos, size_t count)
461 struct bootcache *cache = container_of(bin_attr, struct bootcache,
465 mutex_lock(&cache->cache_lock);
466 if (cache->is_free) {
470 atomic_set(&cache->state, BC_BYPASS);
472 * Once BC_BYPASS is set, the system
473 * should drain quickly.
475 kref_put(&cache->kref, bootcache_free_resources);
477 /* Tell caller we wrote everything */
480 mutex_unlock(&cache->cache_lock);
484 static ssize_t header_read(struct file *file, struct kobject *kobp,
485 struct bin_attribute *bin_attr, char *buf,
486 loff_t pos, size_t count)
488 struct bootcache *cache = container_of(bin_attr, struct bootcache,
491 return memory_read_from_buffer(buf, count, &pos, &cache->hdr,
495 static ssize_t blocktrace_read(struct file *file, struct kobject *kobp,
496 struct bin_attribute *bin_attr, char *buf,
497 loff_t pos, size_t count)
499 struct bootcache *cache = container_of(bin_attr, struct bootcache,
505 kref_get(&cache->kref);
506 if (atomic_read(&cache->state) != BC_TRACING) {
510 data = (char *)cache->trace;
512 spin_lock(&cache->trace_lock);
513 next = cache->trace_next;
514 spin_unlock(&cache->trace_lock);
516 size = next * sizeof(struct bootcache_trace);
518 err = memory_read_from_buffer(buf, count, &pos, data, size);
520 kref_put(&cache->kref, bootcache_free_resources);
524 static int bootcache_init_sysfs(struct bootcache *cache, struct dm_target *ti)
528 rc = bootcache_create_bin_file(cache, &cache->valid, "valid",
529 3, valid_read, NULL);
532 rc = bootcache_create_bin_file(cache, &cache->free, "free",
533 3, free_read, free_write);
536 rc = bootcache_create_bin_file(cache, &cache->header, "header",
537 sizeof(cache->hdr), header_read, NULL);
540 rc = bootcache_create_bin_file(cache, &cache->blocktrace, "blocktrace",
541 cache->args.max_trace * sizeof(struct bootcache_trace),
542 blocktrace_read, NULL);
547 bootcache_remove_all_files(cache);
551 static void bootcache_read_sectors_end(struct bio *bio, int error)
553 struct bootcache_waiter *waiter = bio->bi_private;
555 if (unlikely(error)) {
556 waiter->error = error;
557 DMERR("Error occurred in bootcache_read_sectors:"
559 error, (u64)bio->bi_sector, bio->bi_size);
561 complete(&waiter->completion);
564 static int bootcache_read_sectors(struct bootcache *cache)
566 struct bootcache_waiter waiter;
568 struct bootcache_page *p;
569 struct bootcache_page *start_page;
570 struct bio_vec *bvec;
571 sector_t sector = cache->args.cache_start + cache->hdr.sectors_meta +
573 u32 max_io = cache->max_io;
574 u32 numpages = cache->sectors.num_pages;
575 u32 chunks_to_read = (numpages + max_io - 1) / max_io;
580 p = cache->sectors.pages;
581 for (i = 0; i < chunks_to_read; i++) {
582 bio = bio_alloc_bioset(GFP_KERNEL, max_io, cache->bio_set);
583 if (unlikely(!bio)) {
584 DMERR("Out of memory bio_alloc_bioset");
587 bio->bi_private = &waiter;
588 bio->bi_destructor = bootcache_bio_destructor;
590 bio->bi_bdev = cache->dev->bdev;
591 bio->bi_end_io = bootcache_read_sectors_end;
593 bio->bi_sector = sector;
594 bvec = bio->bi_io_vec;
596 for (j = 0; j < max_io; j++, bvec++, p++) {
597 if (p == cache->sectors.nextpage)
599 bvec->bv_page = p->page;
601 bvec->bv_len = PAGE_SIZE;
603 bio->bi_size = j * PAGE_SIZE;
606 init_completion(&waiter.completion);
608 generic_make_request(bio);
609 wait_for_completion(&waiter.completion);
612 bio->bi_private = cache;
617 for (j = 0; j < max_io; j++, p++) {
618 if (p == cache->sectors.nextpage)
622 sector += pages_to_sectors(j);
623 bio->bi_private = cache;
626 atomic_set(&cache->state, BC_FILLED);
630 static void bootcache_dev_read_end(struct bio *bio, int error)
632 struct bootcache_waiter *waiter = bio->bi_private;
634 if (unlikely(error)) {
635 waiter->error = error;
636 DMERR("Error occurred in bootcache_dev_read: %d (%llx, %x)",
637 error, (u64)bio->bi_sector, bio->bi_size);
639 complete(&waiter->completion);
642 static int bootcache_dev_read(struct bootcache *cache, void *data,
645 struct bootcache_waiter waiter;
647 struct bio_vec *bvec;
648 int pages_to_read = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
649 int max_io = cache->max_io;
659 if (pages_to_read < max_io)
660 max_io = pages_to_read;
661 bio = bio_alloc_bioset(GFP_KERNEL, max_io, cache->bio_set);
662 if (unlikely(!bio)) {
663 DMERR("Out of memory bio_alloc_bioset");
666 bvec = bio->bi_io_vec;
667 for (i = 0; i < max_io; i++, bvec++)
668 bvec->bv_page = alloc_page(GFP_KERNEL);
669 bio->bi_private = &waiter;
670 bio->bi_destructor = bootcache_bio_destructor;
672 bio->bi_bdev = cache->dev->bdev;
673 bio->bi_end_io = bootcache_dev_read_end;
675 bio->bi_sector = sector;
676 bvec = bio->bi_io_vec;
677 for (i = 0; i < max_io; i++, bvec++) {
679 bvec->bv_len = PAGE_SIZE;
681 pages_to_read -= max_io;
682 bio->bi_size = max_io * PAGE_SIZE;
683 bio->bi_vcnt = max_io;
685 init_completion(&waiter.completion);
687 generic_make_request(bio);
688 wait_for_completion(&waiter.completion);
693 for (i = 0; i < max_io; i++) {
694 bytes_to_copy = min(len, (int)PAGE_SIZE);
695 src = kmap_atomic(bio_iovec_idx(bio, i)->bv_page);
696 memcpy(dst, src, bytes_to_copy);
698 len -= bytes_to_copy;
701 dst += bytes_to_copy;
703 sector += pages_to_sectors(max_io);
704 bvec = bio->bi_io_vec;
705 for (i = 0; i < max_io; i++, bvec++)
706 __free_pages(bvec->bv_page, 0);
707 bio->bi_private = cache;
712 bvec = bio->bi_io_vec;
713 for (i = 0; i < max_io; i++, bvec++)
714 __free_pages(bvec->bv_page, 0);
715 bio->bi_private = cache;
720 static int is_valid_hdr(struct bootcache *cache, struct bootcache_hdr *hdr)
723 u64 max_meta_sectors;
725 if (hdr->magic != BOOTCACHE_MAGIC)
727 if (hdr->version != BOOTCACHE_VERSION)
729 if (hdr->max_sectors != cache->hdr.max_sectors)
731 if (hdr->max_hw_sectors != cache->hdr.max_hw_sectors)
733 if (strncmp(hdr->date, __DATE__, strlen(__DATE__) + 1) != 0)
735 if (strncmp(hdr->time, __TIME__, strlen(__TIME__) + 1) != 0)
737 if (strncmp(hdr->signature, cache->hdr.signature,
738 sizeof(hdr->signature)) != 0)
742 * Can't have any more meta sectors than it takes to map
743 * the remaining parition space for bootcache.
745 max_sectors = to_sector(i_size_read(cache->dev->bdev->bd_inode))
746 - cache->args.cache_start;
747 max_meta_sectors = to_sector(round_up(
748 sectors_to_pages(max_sectors) * sizeof(u64), SECTOR_SIZE));
749 if (hdr->sectors_meta > max_meta_sectors) {
750 DMERR("too many meta sectors %lld", (u64)hdr->sectors_meta);
753 if (hdr->sectors_data > max_sectors - hdr->sectors_meta - 1) {
754 DMERR("bootcache too big %lld", (u64)hdr->sectors_data);
760 static int read_trace(struct bootcache *cache)
768 size_trace = sizeof(*cache->trace) * cache->hdr.num_trace_recs;
769 cache->trace = kzalloc(size_trace, GFP_KERNEL);
771 DMERR("read_trace out of memory");
774 rc = bootcache_dev_read(cache, cache->trace, size_trace,
775 cache->hdr.sector + SECTORS_PER_PAGE);
777 DMERR("bootcache_dev_read trace %d", rc);
780 for (i = 0; i < cache->hdr.num_trace_recs; i++) {
781 struct bootcache_trace *tr;
782 tr = &cache->trace[i];
783 for (j = 0; j < tr->count; j += SECTORS_PER_PAGE) {
784 bootcache_new_chunk(&cache->sectors, tr->sector + j);
794 * Reads the bootcache header from disk, checks if it is valid
796 * read the sector trace from disk
797 * build hash table for sector trace on page boundaries
798 * begin reading in sectors to be cached
800 * setup to capture trace of sectors
802 * on error: by pass boot cache
804 static void bootcache_start(struct work_struct *work)
806 struct bootcache *cache = container_of(work, struct bootcache,
808 struct bootcache_hdr hdr;
811 rc = bootcache_dev_read(cache, &hdr, sizeof(hdr), cache->hdr.sector);
813 DMERR("bootcache_dev_read hdr %d", rc);
816 if (is_valid_hdr(cache, &hdr)) {
818 memcpy(&cache->hdr, &hdr, sizeof(cache->hdr));
819 rc = build_sector_map(&cache->sectors,
820 sectors_to_pages(cache->hdr.sectors_data));
823 rc = read_trace(cache);
826 atomic_set(&cache->state, BC_FILLING);
827 rc = bootcache_read_sectors(cache);
831 atomic_set(&cache->state, BC_TRACING);
832 cache->trace = kzalloc(sizeof(*cache->trace) *
833 cache->args.max_trace, GFP_KERNEL);
835 DMERR("cache->trace out of memory");
840 complete_all(&cache->init_complete);
843 DMERR("error occured starting bootcache, setting to by pass mode");
844 atomic_set(&cache->state, BC_BYPASS);
850 * bootcache_max_io determines the maximum number of pages that can
851 * be passed in one read request to the underlying device.
852 * @cache: the max_sectors and max_hw_sectors must
854 * @proposed_max_io: maxium number of pages the caller wants
857 * Returns maximum number of pages that can be read but
858 * no more than proposed_max_io
860 static u32 bootcache_max_io(struct bootcache *cache, u32 proposed_max_io)
865 max_sectors = min(cache->hdr.max_sectors, cache->hdr.max_hw_sectors);
866 max_pages = sectors_to_pages(max_sectors);
867 if (proposed_max_io < max_pages)
868 max_pages = proposed_max_io;
872 static void bootcache_init_hdr(struct bootcache_hdr *hdr, u64 cache_start,
873 struct block_device *bdev, const char *signature)
875 hdr->sector = cache_start;
876 hdr->magic = BOOTCACHE_MAGIC;
877 hdr->version = BOOTCACHE_VERSION;
878 hdr->state = BC_INIT;
879 hdr->alignment = PAGE_SIZE;
880 hdr->max_hw_sectors = queue_max_hw_sectors(bdev_get_queue(bdev));
881 hdr->max_sectors = queue_max_sectors(bdev_get_queue(bdev));
882 strncpy(hdr->date, __DATE__, sizeof(hdr->date));
883 strncpy(hdr->time, __TIME__, sizeof(hdr->time));
884 strncpy(hdr->signature, signature, sizeof(hdr->signature));
888 * match_dev_by_uuid - callback for finding a partition using its uuid
889 * @dev: device passed in by the caller
890 * @uuid_data: opaque pointer to a uuid packed by part_pack_uuid().
892 * Returns 1 if the device matches, and 0 otherwise.
894 static int match_dev_by_uuid(struct device *dev, void *uuid_data)
896 u8 *uuid = uuid_data;
897 struct hd_struct *part = dev_to_part(dev);
902 if (memcmp(uuid, part->info->uuid, sizeof(part->info->uuid)))
911 * dm_get_device_by_uuid: claim a device using its UUID
912 * @ti: current dm_target
913 * @uuid_string: 36 byte UUID hex encoded
914 * (xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx)
915 * @dev_start: offset in sectors passed to dm_get_device
916 * @dev_len: length in sectors passed to dm_get_device
917 * @dm_dev: dm_dev to populate
919 * Wraps dm_get_device allowing it to use a unique partition id
920 * to find a given partition on any drive. This code is based on
921 * printk_all_partitions in that it walks all of the registered
924 * N.B., uuid_string is not checked for safety just strlen().
926 static int dm_get_device_by_uuid(struct dm_target *ti, const char *uuid_str,
927 sector_t dev_start, sector_t dev_len,
928 struct dm_dev **dm_dev)
930 struct device *dev = NULL;
932 char devt_buf[BDEVT_SIZE];
934 size_t uuid_length = strlen(uuid_str);
936 if (uuid_length < 36)
938 /* Pack the requested UUID in the expected format. */
939 part_pack_uuid(uuid_str, uuid);
941 dev = class_find_device(&block_class, NULL, uuid, &match_dev_by_uuid);
948 /* The caller may specify +/-%u after the UUID if they want a partition
949 * before or after the one identified.
951 if (uuid_length > 36) {
952 unsigned int part_offset;
954 unsigned minor = MINOR(devt);
955 if (sscanf(uuid_str + 36, "%c%u", &sign, &part_offset) == 2) {
957 minor += part_offset;
958 } else if (sign == '-') {
959 minor -= part_offset;
961 DMWARN("Trailing characters after UUID: %s\n",
964 devt = MKDEV(MAJOR(devt), minor);
968 /* Construct the dev name to pass to dm_get_device. dm_get_device
969 * doesn't support being passed a dev_t.
971 snprintf(devt_buf, sizeof(devt_buf), "%u:%u",
972 MAJOR(devt), MINOR(devt));
974 /* TODO(wad) to make this generic we could also pass in the mode. */
975 if (!dm_get_device(ti, devt_buf, dm_table_get_mode(ti->table), dm_dev))
978 ti->error = "Failed to acquire device";
979 DMDEBUG("Failed to acquire discovered device %s", devt_buf);
982 ti->error = "Bad UUID";
983 DMDEBUG("Supplied value '%s' is an invalid UUID", uuid_str);
986 DMDEBUG("No matching partition for GUID: %s", uuid_str);
987 ti->error = "No matching GUID";
991 static int bootcache_get_device(
992 struct dm_target *ti,
996 struct dm_dev **dm_dev)
999 /* Try the normal path first since if everything is ready, it
1000 * will be the fastest.
1002 if (!dm_get_device(ti, devname,
1003 dm_table_get_mode(ti->table), dm_dev))
1006 /* Try the device by partition UUID */
1007 if (!dm_get_device_by_uuid(ti, devname, dev_start, dev_len,
1011 /* No need to be too aggressive since this is a slow path. */
1013 } while (driver_probe_done() != 0 || *dm_dev == NULL);
1014 async_synchronize_full();
1019 * bootcache_ctr - Construct a boot cache
1020 * @ti: Target being created
1021 * @argc: Number of elements in argv
1022 * @argv: Vector of arguments - All arguments are positional, this
1023 * means that to set a particular argument, all of its
1024 * predecessors must be present.
1026 * Accepts the folowing parametes [defaults in brackets]:
1027 * @device: Device being cached. The boot cache is alsoe stored here.
1028 * @cache_start: Sector start on the device for the boot cache.
1029 * @signature: Signature to determine if cache is valid.
1030 * @size_limit: In sectors, max size reads to include in cache [128]
1031 * @max_trace: Number of entries in block trace made during boot [8192]
1032 * @max_pages: Maximum number of pages to cache in memory [50000]
1035 * [<dev> [<cache_start> [<sig> [<size_limit> [<max_trace> [<max_limit>]]]]]]
1038 * 0f5dbd05-c063-a848-a296-b8b8c2c24b28+1 1741200 10e8...78 80 64000 60000
1040 static int bootcache_ctr(struct dm_target *ti, unsigned argc, char **argv)
1042 struct bootcache *cache = NULL;
1043 const char *signature = NULL;
1044 const char *device = NULL;
1045 u64 cache_start = 0;
1046 u64 max_pages = DEFAULT_MAX_PAGES;
1047 u64 size_limit = DEFAULT_SIZE_LIMIT;
1048 u64 max_trace = DEFAULT_MAX_TRACE;
1054 if (strict_strtoull(argv[1], 10, &cache_start)) {
1055 ti->error = "Invalid cache_start";
1059 signature = argv[2];
1061 if (strict_strtoull(argv[3], 10, &size_limit)) {
1062 ti->error = "Invalid size_limit";
1066 if (strict_strtoull(argv[4], 10, &max_trace)) {
1067 ti->error = "Invalid max_trace";
1071 if (strict_strtoull(argv[5], 10, &max_pages)) {
1072 ti->error = "Invalid max_pages";
1076 #define NEEDARG(n) \
1078 ti->error = "Missing argument: " #n; \
1084 NEEDARG(cache_start);
1087 if ((dm_table_get_mode(ti->table) & DEV_MODE) != DEV_MODE) {
1088 ti->error = "Must be created read only.";
1092 cache = kzalloc(sizeof(*cache), GFP_KERNEL);
1095 init_completion(&cache->init_complete);
1098 strlcpy(cache->args.device, device, sizeof(cache->args.device));
1099 strlcpy(cache->args.signature, signature,
1100 sizeof(cache->args.signature));
1101 cache->args.cache_start = cache_start;
1102 cache->args.max_pages = max_pages;
1103 cache->args.size_limit = size_limit;
1104 if (max_trace > MAX_TRACE) {
1105 DMWARN("max_trace too large %llu, setting to %d\n",
1106 max_trace, MAX_TRACE);
1107 max_trace = MAX_TRACE;
1109 cache->args.max_trace = max_trace;
1111 cache->begin = ti->begin;
1112 cache->len = ti->len;
1114 atomic_set(&cache->state, BC_INIT);
1115 kref_init(&cache->kref);
1116 mutex_init(&cache->cache_lock);
1117 spin_lock_init(&cache->trace_lock);
1119 /* For the name, use the device default with / changed to _ */
1120 cache->name = dm_disk(dm_table_get_md(ti->table))->disk_name;
1122 if (bootcache_init_sysfs(cache, ti))
1125 rc = bootcache_get_device(ti, device,
1126 ti->begin, ti->len, &cache->dev);
1128 DMERR("Failed to acquire device '%s': %d", device, rc);
1129 ti->error = "Device lookup failed";
1133 bootcache_init_hdr(&cache->hdr, cache_start,
1134 cache->dev->bdev, signature);
1135 cache->max_io = bootcache_max_io(cache, BIO_MAX_PAGES);
1137 /* Allocate the bioset used for request padding */
1138 cache->bio_set = bioset_create(cache->max_io * 4, 0);
1139 if (!cache->bio_set) {
1140 ti->error = "Cannot allocate verity bioset";
1144 ti->num_flush_requests = 1;
1145 ti->private = cache;
1148 char vdev[BDEVNAME_SIZE];
1149 bdevname(cache->dev->bdev, vdev);
1150 DMINFO("dev:%s", vdev);
1152 INIT_WORK(&cache->work.work, bootcache_start);
1153 schedule_work(&cache->work.work);
1155 DMINFO("cache:%p", cache);
1159 dm_put_device(ti, cache->dev);
1161 bootcache_remove_all_files(cache);
1163 kfree(cache); /* hash is not secret so no need to zero */
1168 static int bootcache_status(struct dm_target *ti, status_type_t type,
1169 char *result, uint maxlen)
1171 struct bootcache *cache = (struct bootcache *) ti->private;
1173 char vdev[BDEVNAME_SIZE];
1176 case STATUSTYPE_INFO:
1178 cache->stats.num_requests,
1179 cache->stats.num_hits,
1180 cache->stats.overlapped);
1183 case STATUSTYPE_TABLE:
1184 bdevname(cache->dev->bdev, vdev);
1185 DMEMIT("/dev/%s signature=%s cache_start=%llu max_pages=%llu"
1186 " size_limit=%llu max_trace=%llu\n",
1188 cache->args.signature,
1189 cache->args.cache_start,
1190 cache->args.max_pages,
1191 cache->args.size_limit,
1192 cache->args.max_trace);
1198 static void bootcache_dtr(struct dm_target *ti)
1201 * Doesn't have to clean-up the meta files in sysfs
1202 * because the device mapper has already done it.
1204 struct bootcache *cache = (struct bootcache *)ti->private;
1206 DMDEBUG("Destroying bio set");
1207 bioset_free(cache->bio_set);
1209 DMDEBUG("Putting dev");
1210 dm_put_device(ti, cache->dev);
1212 DMDEBUG("Destroying config");
1216 static int bootcache_map(struct dm_target *ti, struct bio *bio,
1217 union map_info *map_context)
1219 bootcache_read(ti->private, bio);
1220 return DM_MAPIO_SUBMITTED;
1223 static int bootcache_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
1224 struct bio_vec *biovec, int max_size)
1226 struct bootcache *cache = ti->private;
1227 struct request_queue *q = bdev_get_queue(cache->dev->bdev);
1229 if (!q->merge_bvec_fn)
1232 bvm->bi_bdev = cache->dev->bdev;
1233 bvm->bi_sector = cache->begin +
1234 bvm->bi_sector - ti->begin;
1236 /* Optionally, this could just return 0 to stick to single pages. */
1237 return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
1240 static int bootcache_iterate_devices(struct dm_target *ti,
1241 iterate_devices_callout_fn fn, void *data)
1243 struct bootcache *cache = ti->private;
1245 return fn(ti, cache->dev, cache->begin, ti->len, data);
1248 static void bootcache_io_hints(struct dm_target *ti,
1249 struct queue_limits *limits)
1251 limits->logical_block_size = PAGE_SIZE;
1252 limits->physical_block_size = PAGE_SIZE;
1253 blk_limits_io_min(limits, PAGE_SIZE);
1256 static struct target_type bootcache_target = {
1257 .name = "bootcache",
1258 .version = {0, 1, 0},
1259 .module = THIS_MODULE,
1260 .ctr = bootcache_ctr,
1261 .dtr = bootcache_dtr,
1262 .map = bootcache_map,
1263 .merge = bootcache_merge,
1264 .status = bootcache_status,
1265 .iterate_devices = bootcache_iterate_devices,
1266 .io_hints = bootcache_io_hints,
1269 static int __init dm_bootcache_init(void)
1273 rc = dm_register_target(&bootcache_target);
1275 DMERR("register failed %d", rc);
1276 goto register_failed;
1279 DMINFO("version %u.%u.%u loaded", bootcache_target.version[0],
1280 bootcache_target.version[1], bootcache_target.version[2]);
1288 static void __exit dm_bootcache_exit(void)
1290 dm_unregister_target(&bootcache_target);
1293 module_init(dm_bootcache_init);
1294 module_exit(dm_bootcache_exit);
1296 MODULE_AUTHOR("Paul Taysom <taysom@chromium.org>");
1297 MODULE_DESCRIPTION(DM_NAME "read cache");
1298 MODULE_LICENSE("GPL");