From: Linus Torvalds Date: Mon, 10 Oct 2016 00:16:18 +0000 (-0700) Subject: Merge tag 'dm-4.9-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device... X-Git-Tag: v4.9-rc1~70 X-Git-Url: http://git.cascardo.eti.br/?a=commitdiff_plain;h=48915c2cbc77eceec2005afb695ac658fede4e0d;hp=-c;p=cascardo%2Flinux.git Merge tag 'dm-4.9-changes' of git://git./linux/kernel/git/device-mapper/linux-dm Pull device mapper updates from Mike Snitzer: - various fixes and cleanups for request-based DM core - add support for delaying the requeue of requests; used by DM multipath when all paths have failed and 'queue_if_no_path' is enabled - DM cache improvements to speedup the loading metadata and the writing of the hint array - fix potential for a dm-crypt crash on device teardown - remove dm_bufio_cond_resched() and just using cond_resched() - change DM multipath to return a reservation conflict error immediately; rather than failing the path and retrying (potentially indefinitely) * tag 'dm-4.9-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (24 commits) dm mpath: always return reservation conflict without failing over dm bufio: remove dm_bufio_cond_resched() dm crypt: fix crash on exit dm cache metadata: switch to using the new cursor api for loading metadata dm array: introduce cursor api dm btree: introduce cursor api dm cache policy smq: distribute entries to random levels when switching to smq dm cache: speed up writing of the hint array dm array: add dm_array_new() dm mpath: delay the requeue of blk-mq requests while all paths down dm mpath: use dm_mq_kick_requeue_list() dm rq: introduce dm_mq_kick_requeue_list() dm rq: reduce arguments passed to map_request() and dm_requeue_original_request() dm rq: add DM_MAPIO_DELAY_REQUEUE to delay requeue of blk-mq requests dm: convert wait loops to use autoremove_wake_function() dm: use signal_pending_state() in dm_wait_for_completion() dm: rename task state function arguments dm: add two lockdep_assert_held() statements dm rq: simplify dm_old_stop_queue() dm mpath: check if path's request_queue is dying in activate_path() ... --- 48915c2cbc77eceec2005afb695ac658fede4e0d diff --combined drivers/md/dm-bufio.c index 8625040bae92,27f4a802f83c..125aedc3875f --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@@ -191,19 -191,6 +191,6 @@@ static void dm_bufio_unlock(struct dm_b mutex_unlock(&c->lock); } - /* - * FIXME Move to sched.h? - */ - #ifdef CONFIG_PREEMPT_VOLUNTARY - # define dm_bufio_cond_resched() \ - do { \ - if (unlikely(need_resched())) \ - _cond_resched(); \ - } while (0) - #else - # define dm_bufio_cond_resched() do { } while (0) - #endif - /*----------------------------------------------------------------*/ /* @@@ -741,7 -728,7 +728,7 @@@ static void __flush_write_list(struct l list_entry(write_list->next, struct dm_buffer, write_list); list_del(&b->write_list); submit_io(b, WRITE, b->block, write_endio); - dm_bufio_cond_resched(); + cond_resched(); } blk_finish_plug(&plug); } @@@ -780,7 -767,7 +767,7 @@@ static struct dm_buffer *__get_unclaime __unlink_buffer(b); return b; } - dm_bufio_cond_resched(); + cond_resched(); } list_for_each_entry_reverse(b, &c->lru[LIST_DIRTY], lru_list) { @@@ -791,7 -778,7 +778,7 @@@ __unlink_buffer(b); return b; } - dm_bufio_cond_resched(); + cond_resched(); } return NULL; @@@ -923,7 -910,7 +910,7 @@@ static void __write_dirty_buffers_async return; __write_dirty_buffer(b, write_list); - dm_bufio_cond_resched(); + cond_resched(); } } @@@ -973,7 -960,7 +960,7 @@@ static void __check_watermark(struct dm return; __free_buffer_wake(b); - dm_bufio_cond_resched(); + cond_resched(); } if (c->n_buffers[LIST_DIRTY] > threshold_buffers) @@@ -1170,7 -1157,7 +1157,7 @@@ void dm_bufio_prefetch(struct dm_bufio_ submit_io(b, READ, b->block, read_endio); dm_bufio_release(b); - dm_bufio_cond_resched(); + cond_resched(); if (!n_blocks) goto flush_plug; @@@ -1291,7 -1278,7 +1278,7 @@@ again !test_bit(B_WRITING, &b->state)) __relink_lru(b, LIST_CLEAN); - dm_bufio_cond_resched(); + cond_resched(); /* * If we dropped the lock, the list is no longer consistent, @@@ -1574,7 -1561,7 +1561,7 @@@ static unsigned long __scan(struct dm_b freed++; if (!--nr_to_scan || ((count - freed) <= retain_target)) return freed; - dm_bufio_cond_resched(); + cond_resched(); } } return freed; @@@ -1808,7 -1795,7 +1795,7 @@@ static void __evict_old_buffers(struct if (__try_evict_buffer(b, 0)) count--; - dm_bufio_cond_resched(); + cond_resched(); } dm_bufio_unlock(c); @@@ -1879,7 -1866,7 +1866,7 @@@ static int __init dm_bufio_init(void __cache_size_refresh(); mutex_unlock(&dm_bufio_clients_lock); - dm_bufio_wq = create_singlethread_workqueue("dm_bufio_cache"); + dm_bufio_wq = alloc_workqueue("dm_bufio_cache", WQ_MEM_RECLAIM, 0); if (!dm_bufio_wq) return -ENOMEM; diff --combined drivers/md/dm-crypt.c index 0448e7e35c8c,bcba462a7d14..a2768835d394 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@@ -113,8 -113,7 +113,7 @@@ struct iv_tcw_private * and encrypts / decrypts at the same time. */ enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID, - DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD, - DM_CRYPT_EXIT_THREAD}; + DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD }; /* * The fields in here must be read only after initialization. @@@ -1207,18 -1206,20 +1206,20 @@@ continue_locked if (!RB_EMPTY_ROOT(&cc->write_tree)) goto pop_from_list; - if (unlikely(test_bit(DM_CRYPT_EXIT_THREAD, &cc->flags))) { - spin_unlock_irq(&cc->write_thread_wait.lock); - break; - } - - __set_current_state(TASK_INTERRUPTIBLE); + set_current_state(TASK_INTERRUPTIBLE); __add_wait_queue(&cc->write_thread_wait, &wait); spin_unlock_irq(&cc->write_thread_wait.lock); + if (unlikely(kthread_should_stop())) { + set_task_state(current, TASK_RUNNING); + remove_wait_queue(&cc->write_thread_wait, &wait); + break; + } + schedule(); + set_task_state(current, TASK_RUNNING); spin_lock_irq(&cc->write_thread_wait.lock); __remove_wait_queue(&cc->write_thread_wait, &wait); goto continue_locked; @@@ -1453,7 -1454,7 +1454,7 @@@ static int crypt_alloc_tfms(struct cryp unsigned i; int err; - cc->tfms = kmalloc(cc->tfms_count * sizeof(struct crypto_skcipher *), + cc->tfms = kzalloc(cc->tfms_count * sizeof(struct crypto_skcipher *), GFP_KERNEL); if (!cc->tfms) return -ENOMEM; @@@ -1533,13 -1534,8 +1534,8 @@@ static void crypt_dtr(struct dm_target if (!cc) return; - if (cc->write_thread) { - spin_lock_irq(&cc->write_thread_wait.lock); - set_bit(DM_CRYPT_EXIT_THREAD, &cc->flags); - wake_up_locked(&cc->write_thread_wait); - spin_unlock_irq(&cc->write_thread_wait.lock); + if (cc->write_thread) kthread_stop(cc->write_thread); - } if (cc->io_queue) destroy_workqueue(cc->io_queue); @@@ -1924,13 -1920,6 +1920,13 @@@ static int crypt_map(struct dm_target * return DM_MAPIO_REMAPPED; } + /* + * Check if bio is too large, split as needed. + */ + if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_PAGES << PAGE_SHIFT)) && + bio_data_dir(bio) == WRITE) + dm_accept_partial_bio(bio, ((BIO_MAX_PAGES << PAGE_SHIFT) >> SECTOR_SHIFT)); + io = dm_per_bio_data(bio, cc->per_bio_data_size); crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector)); io->ctx.req = (struct skcipher_request *)(io + 1); diff --combined drivers/md/dm-rq.c index ee48230a2952,877b8f33620e..182b67947dad --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@@ -73,43 -73,60 +73,60 @@@ static void dm_old_start_queue(struct r spin_unlock_irqrestore(q->queue_lock, flags); } + static void dm_mq_start_queue(struct request_queue *q) + { + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + queue_flag_clear(QUEUE_FLAG_STOPPED, q); + spin_unlock_irqrestore(q->queue_lock, flags); + + blk_mq_start_stopped_hw_queues(q, true); + blk_mq_kick_requeue_list(q); + } + void dm_start_queue(struct request_queue *q) { if (!q->mq_ops) dm_old_start_queue(q); - else { - queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, q); - blk_mq_start_stopped_hw_queues(q, true); - blk_mq_kick_requeue_list(q); - } + else + dm_mq_start_queue(q); } static void dm_old_stop_queue(struct request_queue *q) { unsigned long flags; + spin_lock_irqsave(q->queue_lock, flags); + if (!blk_queue_stopped(q)) + blk_stop_queue(q); + spin_unlock_irqrestore(q->queue_lock, flags); + } + + static void dm_mq_stop_queue(struct request_queue *q) + { + unsigned long flags; + spin_lock_irqsave(q->queue_lock, flags); if (blk_queue_stopped(q)) { spin_unlock_irqrestore(q->queue_lock, flags); return; } - blk_stop_queue(q); + queue_flag_set(QUEUE_FLAG_STOPPED, q); spin_unlock_irqrestore(q->queue_lock, flags); + + /* Avoid that requeuing could restart the queue. */ + blk_mq_cancel_requeue_work(q); + blk_mq_stop_hw_queues(q); } void dm_stop_queue(struct request_queue *q) { if (!q->mq_ops) dm_old_stop_queue(q); - else { - spin_lock_irq(q->queue_lock); - queue_flag_set(QUEUE_FLAG_STOPPED, q); - spin_unlock_irq(q->queue_lock); - - blk_mq_cancel_requeue_work(q); - blk_mq_stop_hw_queues(q); - } + else + dm_mq_stop_queue(q); } static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md, @@@ -319,21 -336,32 +336,32 @@@ static void dm_old_requeue_request(stru spin_unlock_irqrestore(q->queue_lock, flags); } - static void dm_mq_requeue_request(struct request *rq) + static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs) { - struct request_queue *q = rq->q; unsigned long flags; - blk_mq_requeue_request(rq); spin_lock_irqsave(q->queue_lock, flags); if (!blk_queue_stopped(q)) - blk_mq_kick_requeue_list(q); + blk_mq_delay_kick_requeue_list(q, msecs); spin_unlock_irqrestore(q->queue_lock, flags); } - static void dm_requeue_original_request(struct mapped_device *md, - struct request *rq) + void dm_mq_kick_requeue_list(struct mapped_device *md) + { + __dm_mq_kick_requeue_list(dm_get_md_queue(md), 0); + } + EXPORT_SYMBOL(dm_mq_kick_requeue_list); + + static void dm_mq_delay_requeue_request(struct request *rq, unsigned long msecs) + { + blk_mq_requeue_request(rq); + __dm_mq_kick_requeue_list(rq->q, msecs); + } + + static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_requeue) { + struct mapped_device *md = tio->md; + struct request *rq = tio->orig; int rw = rq_data_dir(rq); rq_end_stats(md, rq); @@@ -342,7 -370,7 +370,7 @@@ if (!rq->q->mq_ops) dm_old_requeue_request(rq); else - dm_mq_requeue_request(rq); + dm_mq_delay_requeue_request(rq, delay_requeue ? 5000 : 0); rq_completed(md, rw, false); } @@@ -372,7 -400,7 +400,7 @@@ static void dm_done(struct request *clo return; else if (r == DM_ENDIO_REQUEUE) /* The target wants to requeue the I/O */ - dm_requeue_original_request(tio->md, tio->orig); + dm_requeue_original_request(tio, false); else { DMWARN("unimplemented target endio return value: %d", r); BUG(); @@@ -612,20 -640,23 +640,23 @@@ static int dm_old_prep_fn(struct reques /* * Returns: - * 0 : the request has been processed - * DM_MAPIO_REQUEUE : the original request needs to be requeued + * DM_MAPIO_* : the request has been processed as indicated + * DM_MAPIO_REQUEUE : the original request needs to be immediately requeued * < 0 : the request was completed due to failure */ - static int map_request(struct dm_rq_target_io *tio, struct request *rq, - struct mapped_device *md) + static int map_request(struct dm_rq_target_io *tio) { int r; struct dm_target *ti = tio->ti; + struct mapped_device *md = tio->md; + struct request *rq = tio->orig; struct request *clone = NULL; if (tio->clone) { clone = tio->clone; r = ti->type->map_rq(ti, clone, &tio->info); + if (r == DM_MAPIO_DELAY_REQUEUE) + return DM_MAPIO_REQUEUE; /* .request_fn requeue is always immediate */ } else { r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone); if (r < 0) { @@@ -633,9 -664,8 +664,8 @@@ dm_kill_unmapped_request(rq, r); return r; } - if (r != DM_MAPIO_REMAPPED) - return r; - if (setup_clone(clone, rq, tio, GFP_ATOMIC)) { + if (r == DM_MAPIO_REMAPPED && + setup_clone(clone, rq, tio, GFP_ATOMIC)) { /* -ENOMEM */ ti->type->release_clone_rq(clone); return DM_MAPIO_REQUEUE; @@@ -654,7 -684,10 +684,10 @@@ break; case DM_MAPIO_REQUEUE: /* The target wants to requeue the I/O */ - dm_requeue_original_request(md, tio->orig); + break; + case DM_MAPIO_DELAY_REQUEUE: + /* The target wants to requeue the I/O after a delay */ + dm_requeue_original_request(tio, true); break; default: if (r > 0) { @@@ -664,10 -697,9 +697,9 @@@ /* The target wants to complete the I/O */ dm_kill_unmapped_request(rq, r); - return r; } - return 0; + return r; } static void dm_start_request(struct mapped_device *md, struct request *orig) @@@ -706,11 -738,9 +738,9 @@@ static void map_tio_request(struct kthread_work *work) { struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work); - struct request *rq = tio->orig; - struct mapped_device *md = tio->md; - if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) - dm_requeue_original_request(md, rq); + if (map_request(tio) == DM_MAPIO_REQUEUE) + dm_requeue_original_request(tio, false); } ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf) @@@ -896,7 -926,7 +926,7 @@@ static int dm_mq_queue_rq(struct blk_mq tio->ti = ti; /* Direct call is fine since .queue_rq allows allocations */ - if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) { + if (map_request(tio) == DM_MAPIO_REQUEUE) { /* Undo dm_start_request() before requeuing */ rq_end_stats(md, rq); rq_completed(md, rq_data_dir(rq), false); @@@ -955,7 -985,7 +985,7 @@@ int dm_mq_init_request_queue(struct map dm_init_md_queue(md); /* backfill 'mq' sysfs registration normally done in blk_register_queue */ - blk_mq_register_disk(md->disk); + blk_mq_register_dev(disk_to_dev(md->disk), q); return 0;