Merge branch 'for-linus' of git://git.kernel.dk/linux-block
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 13 Sep 2014 16:39:55 +0000 (09:39 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 13 Sep 2014 16:39:55 +0000 (09:39 -0700)
Pull block fixes from Jens Axboe:
 "A small collection of fixes for the current rc series.  This contains:

   - Two small blk-mq patches from Rob Elliott, cleaning up error case
     at init time.

   - A fix from Ming Lei, fixing SG merging for blk-mq where
     QUEUE_FLAG_SG_NO_MERGE is the default.

   - A dev_t minor lifetime fix from Keith, fixing an issue where a
     minor might be reused before all references to it were gone.

   - Fix from Alan Stern where an unbalanced queue bypass caused SCSI
     some headaches when it does a series of add/del on devices without
     fully registrering the queue.

   - A fix from me for improving the scaling of tag depth in blk-mq if
     we are short on memory"

* 'for-linus' of git://git.kernel.dk/linux-block:
  blk-mq: scale depth and rq map appropriate if low on memory
  Block: fix unbalanced bypass-disable in blk_register_queue
  block: Fix dev_t minor allocation lifetime
  blk-mq: cleanup after blk_mq_init_rq_map failures
  blk-mq: pass along blk_mq_alloc_tag_set return values
  blk-merge: fix blk_recount_segments

block/blk-merge.c
block/blk-mq.c
block/blk-sysfs.c
block/genhd.c
block/partition-generic.c
drivers/block/mtip32xx/mtip32xx.c
drivers/block/null_blk.c

index 5453583..7788179 100644 (file)
 #include "blk.h"
 
 static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
-                                            struct bio *bio)
+                                            struct bio *bio,
+                                            bool no_sg_merge)
 {
        struct bio_vec bv, bvprv = { NULL };
-       int cluster, high, highprv = 1, no_sg_merge;
+       int cluster, high, highprv = 1;
        unsigned int seg_size, nr_phys_segs;
        struct bio *fbio, *bbio;
        struct bvec_iter iter;
@@ -35,7 +36,6 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
        cluster = blk_queue_cluster(q);
        seg_size = 0;
        nr_phys_segs = 0;
-       no_sg_merge = test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags);
        high = 0;
        for_each_bio(bio) {
                bio_for_each_segment(bv, bio, iter) {
@@ -88,18 +88,23 @@ new_segment:
 
 void blk_recalc_rq_segments(struct request *rq)
 {
-       rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio);
+       bool no_sg_merge = !!test_bit(QUEUE_FLAG_NO_SG_MERGE,
+                       &rq->q->queue_flags);
+
+       rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio,
+                       no_sg_merge);
 }
 
 void blk_recount_segments(struct request_queue *q, struct bio *bio)
 {
-       if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags))
+       if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags) &&
+                       bio->bi_vcnt < queue_max_segments(q))
                bio->bi_phys_segments = bio->bi_vcnt;
        else {
                struct bio *nxt = bio->bi_next;
 
                bio->bi_next = NULL;
-               bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio);
+               bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, false);
                bio->bi_next = nxt;
        }
 
index 4aac826..383ea0c 100644 (file)
@@ -1321,6 +1321,7 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
                                continue;
                        set->ops->exit_request(set->driver_data, tags->rqs[i],
                                                hctx_idx, i);
+                       tags->rqs[i] = NULL;
                }
        }
 
@@ -1354,8 +1355,9 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
 
        INIT_LIST_HEAD(&tags->page_list);
 
-       tags->rqs = kmalloc_node(set->queue_depth * sizeof(struct request *),
-                                       GFP_KERNEL, set->numa_node);
+       tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *),
+                                GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
+                                set->numa_node);
        if (!tags->rqs) {
                blk_mq_free_tags(tags);
                return NULL;
@@ -1379,8 +1381,9 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
                        this_order--;
 
                do {
-                       page = alloc_pages_node(set->numa_node, GFP_KERNEL,
-                                               this_order);
+                       page = alloc_pages_node(set->numa_node,
+                               GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
+                               this_order);
                        if (page)
                                break;
                        if (!this_order--)
@@ -1404,8 +1407,10 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
                        if (set->ops->init_request) {
                                if (set->ops->init_request(set->driver_data,
                                                tags->rqs[i], hctx_idx, i,
-                                               set->numa_node))
+                                               set->numa_node)) {
+                                       tags->rqs[i] = NULL;
                                        goto fail;
+                               }
                        }
 
                        p += rq_size;
@@ -1416,7 +1421,6 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
        return tags;
 
 fail:
-       pr_warn("%s: failed to allocate requests\n", __func__);
        blk_mq_free_rq_map(set, tags, hctx_idx);
        return NULL;
 }
@@ -1936,6 +1940,61 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
        return NOTIFY_OK;
 }
 
+static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
+{
+       int i;
+
+       for (i = 0; i < set->nr_hw_queues; i++) {
+               set->tags[i] = blk_mq_init_rq_map(set, i);
+               if (!set->tags[i])
+                       goto out_unwind;
+       }
+
+       return 0;
+
+out_unwind:
+       while (--i >= 0)
+               blk_mq_free_rq_map(set, set->tags[i], i);
+
+       set->tags = NULL;
+       return -ENOMEM;
+}
+
+/*
+ * Allocate the request maps associated with this tag_set. Note that this
+ * may reduce the depth asked for, if memory is tight. set->queue_depth
+ * will be updated to reflect the allocated depth.
+ */
+static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
+{
+       unsigned int depth;
+       int err;
+
+       depth = set->queue_depth;
+       do {
+               err = __blk_mq_alloc_rq_maps(set);
+               if (!err)
+                       break;
+
+               set->queue_depth >>= 1;
+               if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) {
+                       err = -ENOMEM;
+                       break;
+               }
+       } while (set->queue_depth);
+
+       if (!set->queue_depth || err) {
+               pr_err("blk-mq: failed to allocate request map\n");
+               return -ENOMEM;
+       }
+
+       if (depth != set->queue_depth)
+               pr_info("blk-mq: reduced tag depth (%u -> %u)\n",
+                                               depth, set->queue_depth);
+
+       return 0;
+}
+
 /*
  * Alloc a tag set to be associated with one or more request queues.
  * May fail with EINVAL for various error conditions. May adjust the
@@ -1944,8 +2003,6 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
  */
 int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 {
-       int i;
-
        if (!set->nr_hw_queues)
                return -EINVAL;
        if (!set->queue_depth)
@@ -1966,23 +2023,18 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
                                 sizeof(struct blk_mq_tags *),
                                 GFP_KERNEL, set->numa_node);
        if (!set->tags)
-               goto out;
+               return -ENOMEM;
 
-       for (i = 0; i < set->nr_hw_queues; i++) {
-               set->tags[i] = blk_mq_init_rq_map(set, i);
-               if (!set->tags[i])
-                       goto out_unwind;
-       }
+       if (blk_mq_alloc_rq_maps(set))
+               goto enomem;
 
        mutex_init(&set->tag_list_lock);
        INIT_LIST_HEAD(&set->tag_list);
 
        return 0;
-
-out_unwind:
-       while (--i >= 0)
-               blk_mq_free_rq_map(set, set->tags[i], i);
-out:
+enomem:
+       kfree(set->tags);
+       set->tags = NULL;
        return -ENOMEM;
 }
 EXPORT_SYMBOL(blk_mq_alloc_tag_set);
@@ -1997,6 +2049,7 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
        }
 
        kfree(set->tags);
+       set->tags = NULL;
 }
 EXPORT_SYMBOL(blk_mq_free_tag_set);
 
index 4db5abf..17f5c84 100644 (file)
@@ -554,8 +554,10 @@ int blk_register_queue(struct gendisk *disk)
         * Initialization must be complete by now.  Finish the initial
         * bypass from queue allocation.
         */
-       queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q);
-       blk_queue_bypass_end(q);
+       if (!blk_queue_init_done(q)) {
+               queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q);
+               blk_queue_bypass_end(q);
+       }
 
        ret = blk_trace_init_sysfs(dev);
        if (ret)
index 791f419..09da5e4 100644 (file)
@@ -28,10 +28,10 @@ struct kobject *block_depr;
 /* for extended dynamic devt allocation, currently only one major is used */
 #define NR_EXT_DEVT            (1 << MINORBITS)
 
-/* For extended devt allocation.  ext_devt_mutex prevents look up
+/* For extended devt allocation.  ext_devt_lock prevents look up
  * results from going away underneath its user.
  */
-static DEFINE_MUTEX(ext_devt_mutex);
+static DEFINE_SPINLOCK(ext_devt_lock);
 static DEFINE_IDR(ext_devt_idr);
 
 static struct device_type disk_type;
@@ -420,9 +420,13 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
        }
 
        /* allocate ext devt */
-       mutex_lock(&ext_devt_mutex);
-       idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_KERNEL);
-       mutex_unlock(&ext_devt_mutex);
+       idr_preload(GFP_KERNEL);
+
+       spin_lock(&ext_devt_lock);
+       idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT);
+       spin_unlock(&ext_devt_lock);
+
+       idr_preload_end();
        if (idx < 0)
                return idx == -ENOSPC ? -EBUSY : idx;
 
@@ -447,9 +451,9 @@ void blk_free_devt(dev_t devt)
                return;
 
        if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
-               mutex_lock(&ext_devt_mutex);
+               spin_lock(&ext_devt_lock);
                idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
-               mutex_unlock(&ext_devt_mutex);
+               spin_unlock(&ext_devt_lock);
        }
 }
 
@@ -665,7 +669,6 @@ void del_gendisk(struct gendisk *disk)
                sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
        pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
        device_del(disk_to_dev(disk));
-       blk_free_devt(disk_to_dev(disk)->devt);
 }
 EXPORT_SYMBOL(del_gendisk);
 
@@ -690,13 +693,13 @@ struct gendisk *get_gendisk(dev_t devt, int *partno)
        } else {
                struct hd_struct *part;
 
-               mutex_lock(&ext_devt_mutex);
+               spin_lock(&ext_devt_lock);
                part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
                if (part && get_disk(part_to_disk(part))) {
                        *partno = part->partno;
                        disk = part_to_disk(part);
                }
-               mutex_unlock(&ext_devt_mutex);
+               spin_unlock(&ext_devt_lock);
        }
 
        return disk;
@@ -1098,6 +1101,7 @@ static void disk_release(struct device *dev)
 {
        struct gendisk *disk = dev_to_disk(dev);
 
+       blk_free_devt(dev->devt);
        disk_release_events(disk);
        kfree(disk->random);
        disk_replace_part_tbl(disk, NULL);
index 789cdea..0d9e5f9 100644 (file)
@@ -211,6 +211,7 @@ static const struct attribute_group *part_attr_groups[] = {
 static void part_release(struct device *dev)
 {
        struct hd_struct *p = dev_to_part(dev);
+       blk_free_devt(dev->devt);
        free_part_stats(p);
        free_part_info(p);
        kfree(p);
@@ -253,7 +254,6 @@ void delete_partition(struct gendisk *disk, int partno)
        rcu_assign_pointer(ptbl->last_lookup, NULL);
        kobject_put(part->holder_dir);
        device_del(part_to_dev(part));
-       blk_free_devt(part_devt(part));
 
        hd_struct_put(part);
 }
index db1e956..5c8e7fe 100644 (file)
@@ -3918,7 +3918,6 @@ skip_create_disk:
        if (rv) {
                dev_err(&dd->pdev->dev,
                        "Unable to allocate request queue\n");
-               rv = -ENOMEM;
                goto block_queue_alloc_init_error;
        }
 
index a3b042c..00d469c 100644 (file)
@@ -462,17 +462,21 @@ static int null_add_dev(void)
        struct gendisk *disk;
        struct nullb *nullb;
        sector_t size;
+       int rv;
 
        nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node);
-       if (!nullb)
+       if (!nullb) {
+               rv = -ENOMEM;
                goto out;
+       }
 
        spin_lock_init(&nullb->lock);
 
        if (queue_mode == NULL_Q_MQ && use_per_node_hctx)
                submit_queues = nr_online_nodes;
 
-       if (setup_queues(nullb))
+       rv = setup_queues(nullb);
+       if (rv)
                goto out_free_nullb;
 
        if (queue_mode == NULL_Q_MQ) {
@@ -484,22 +488,29 @@ static int null_add_dev(void)
                nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
                nullb->tag_set.driver_data = nullb;
 
-               if (blk_mq_alloc_tag_set(&nullb->tag_set))
+               rv = blk_mq_alloc_tag_set(&nullb->tag_set);
+               if (rv)
                        goto out_cleanup_queues;
 
                nullb->q = blk_mq_init_queue(&nullb->tag_set);
-               if (!nullb->q)
+               if (!nullb->q) {
+                       rv = -ENOMEM;
                        goto out_cleanup_tags;
+               }
        } else if (queue_mode == NULL_Q_BIO) {
                nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
-               if (!nullb->q)
+               if (!nullb->q) {
+                       rv = -ENOMEM;
                        goto out_cleanup_queues;
+               }
                blk_queue_make_request(nullb->q, null_queue_bio);
                init_driver_queues(nullb);
        } else {
                nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node);
-               if (!nullb->q)
+               if (!nullb->q) {
+                       rv = -ENOMEM;
                        goto out_cleanup_queues;
+               }
                blk_queue_prep_rq(nullb->q, null_rq_prep_fn);
                blk_queue_softirq_done(nullb->q, null_softirq_done_fn);
                init_driver_queues(nullb);
@@ -509,8 +520,10 @@ static int null_add_dev(void)
        queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
 
        disk = nullb->disk = alloc_disk_node(1, home_node);
-       if (!disk)
+       if (!disk) {
+               rv = -ENOMEM;
                goto out_cleanup_blk_queue;
+       }
 
        mutex_lock(&lock);
        list_add_tail(&nullb->list, &nullb_list);
@@ -544,7 +557,7 @@ out_cleanup_queues:
 out_free_nullb:
        kfree(nullb);
 out:
-       return -ENOMEM;
+       return rv;
 }
 
 static int __init null_init(void)