Merge branch 'locking-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[cascardo/linux.git] / drivers / md / raid1.c
index 967a4ed..4517f06 100644 (file)
@@ -255,9 +255,10 @@ static void call_bio_endio(struct r1bio *r1_bio)
                done = 1;
 
        if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
-               clear_bit(BIO_UPTODATE, &bio->bi_flags);
+               bio->bi_error = -EIO;
+
        if (done) {
-               bio_endio(bio, 0);
+               bio_endio(bio);
                /*
                 * Wake up any possible resync thread that waits for the device
                 * to go idle.
@@ -312,9 +313,9 @@ static int find_bio_disk(struct r1bio *r1_bio, struct bio *bio)
        return mirror;
 }
 
-static void raid1_end_read_request(struct bio *bio, int error)
+static void raid1_end_read_request(struct bio *bio)
 {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+       int uptodate = !bio->bi_error;
        struct r1bio *r1_bio = bio->bi_private;
        int mirror;
        struct r1conf *conf = r1_bio->mddev->private;
@@ -397,9 +398,8 @@ static void r1_bio_write_done(struct r1bio *r1_bio)
        }
 }
 
-static void raid1_end_write_request(struct bio *bio, int error)
+static void raid1_end_write_request(struct bio *bio)
 {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
        struct r1bio *r1_bio = bio->bi_private;
        int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state);
        struct r1conf *conf = r1_bio->mddev->private;
@@ -410,7 +410,7 @@ static void raid1_end_write_request(struct bio *bio, int error)
        /*
         * 'one mirror IO has finished' event handler:
         */
-       if (!uptodate) {
+       if (bio->bi_error) {
                set_bit(WriteErrorSeen,
                        &conf->mirrors[mirror].rdev->flags);
                if (!test_and_set_bit(WantReplacement,
@@ -557,7 +557,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
                rdev = rcu_dereference(conf->mirrors[disk].rdev);
                if (r1_bio->bios[disk] == IO_BLOCKED
                    || rdev == NULL
-                   || test_bit(Unmerged, &rdev->flags)
                    || test_bit(Faulty, &rdev->flags))
                        continue;
                if (!test_bit(In_sync, &rdev->flags) &&
@@ -708,38 +707,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
        return best_disk;
 }
 
-static int raid1_mergeable_bvec(struct mddev *mddev,
-                               struct bvec_merge_data *bvm,
-                               struct bio_vec *biovec)
-{
-       struct r1conf *conf = mddev->private;
-       sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
-       int max = biovec->bv_len;
-
-       if (mddev->merge_check_needed) {
-               int disk;
-               rcu_read_lock();
-               for (disk = 0; disk < conf->raid_disks * 2; disk++) {
-                       struct md_rdev *rdev = rcu_dereference(
-                               conf->mirrors[disk].rdev);
-                       if (rdev && !test_bit(Faulty, &rdev->flags)) {
-                               struct request_queue *q =
-                                       bdev_get_queue(rdev->bdev);
-                               if (q->merge_bvec_fn) {
-                                       bvm->bi_sector = sector +
-                                               rdev->data_offset;
-                                       bvm->bi_bdev = rdev->bdev;
-                                       max = min(max, q->merge_bvec_fn(
-                                                         q, bvm, biovec));
-                               }
-                       }
-               }
-               rcu_read_unlock();
-       }
-       return max;
-
-}
-
 static int raid1_congested(struct mddev *mddev, int bits)
 {
        struct r1conf *conf = mddev->private;
@@ -793,7 +760,7 @@ static void flush_pending_writes(struct r1conf *conf)
                        if (unlikely((bio->bi_rw & REQ_DISCARD) &&
                            !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
                                /* Just ignore it */
-                               bio_endio(bio, 0);
+                               bio_endio(bio);
                        else
                                generic_make_request(bio);
                        bio = next;
@@ -1068,7 +1035,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
                if (unlikely((bio->bi_rw & REQ_DISCARD) &&
                    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
                        /* Just ignore it */
-                       bio_endio(bio, 0);
+                       bio_endio(bio);
                else
                        generic_make_request(bio);
                bio = next;
@@ -1158,7 +1125,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
         * non-zero, then it is the number of not-completed requests.
         */
        bio->bi_phys_segments = 0;
-       clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+       bio_clear_flag(bio, BIO_SEG_VALID);
 
        if (rw == READ) {
                /*
@@ -1269,8 +1236,7 @@ read_again:
                        break;
                }
                r1_bio->bios[i] = NULL;
-               if (!rdev || test_bit(Faulty, &rdev->flags)
-                   || test_bit(Unmerged, &rdev->flags)) {
+               if (!rdev || test_bit(Faulty, &rdev->flags)) {
                        if (i < conf->raid_disks)
                                set_bit(R1BIO_Degraded, &r1_bio->state);
                        continue;
@@ -1508,6 +1474,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
         */
        set_bit(MD_RECOVERY_INTR, &mddev->recovery);
        set_bit(MD_CHANGE_DEVS, &mddev->flags);
+       set_bit(MD_CHANGE_PENDING, &mddev->flags);
        printk(KERN_ALERT
               "md/raid1:%s: Disk failure on %s, disabling device.\n"
               "md/raid1:%s: Operation continuing on %d devices.\n",
@@ -1617,7 +1584,6 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
        struct raid1_info *p;
        int first = 0;
        int last = conf->raid_disks - 1;
-       struct request_queue *q = bdev_get_queue(rdev->bdev);
 
        if (mddev->recovery_disabled == conf->recovery_disabled)
                return -EBUSY;
@@ -1625,11 +1591,6 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
        if (rdev->raid_disk >= 0)
                first = last = rdev->raid_disk;
 
-       if (q->merge_bvec_fn) {
-               set_bit(Unmerged, &rdev->flags);
-               mddev->merge_check_needed = 1;
-       }
-
        for (mirror = first; mirror <= last; mirror++) {
                p = conf->mirrors+mirror;
                if (!p->rdev) {
@@ -1661,19 +1622,6 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
                        break;
                }
        }
-       if (err == 0 && test_bit(Unmerged, &rdev->flags)) {
-               /* Some requests might not have seen this new
-                * merge_bvec_fn.  We must wait for them to complete
-                * before merging the device fully.
-                * First we make sure any code which has tested
-                * our function has submitted the request, then
-                * we wait for all outstanding requests to complete.
-                */
-               synchronize_sched();
-               freeze_array(conf, 0);
-               unfreeze_array(conf);
-               clear_bit(Unmerged, &rdev->flags);
-       }
        md_integrity_add_rdev(rdev, mddev);
        if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
                queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
@@ -1737,7 +1685,7 @@ abort:
        return err;
 }
 
-static void end_sync_read(struct bio *bio, int error)
+static void end_sync_read(struct bio *bio)
 {
        struct r1bio *r1_bio = bio->bi_private;
 
@@ -1748,16 +1696,16 @@ static void end_sync_read(struct bio *bio, int error)
         * or re-read if the read failed.
         * We don't do much here, just schedule handling by raid1d
         */
-       if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+       if (!bio->bi_error)
                set_bit(R1BIO_Uptodate, &r1_bio->state);
 
        if (atomic_dec_and_test(&r1_bio->remaining))
                reschedule_retry(r1_bio);
 }
 
-static void end_sync_write(struct bio *bio, int error)
+static void end_sync_write(struct bio *bio)
 {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+       int uptodate = !bio->bi_error;
        struct r1bio *r1_bio = bio->bi_private;
        struct mddev *mddev = r1_bio->mddev;
        struct r1conf *conf = mddev->private;
@@ -1944,7 +1892,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
                idx ++;
        }
        set_bit(R1BIO_Uptodate, &r1_bio->state);
-       set_bit(BIO_UPTODATE, &bio->bi_flags);
+       bio->bi_error = 0;
        return 1;
 }
 
@@ -1968,15 +1916,14 @@ static void process_checks(struct r1bio *r1_bio)
        for (i = 0; i < conf->raid_disks * 2; i++) {
                int j;
                int size;
-               int uptodate;
+               int error;
                struct bio *b = r1_bio->bios[i];
                if (b->bi_end_io != end_sync_read)
                        continue;
-               /* fixup the bio for reuse, but preserve BIO_UPTODATE */
-               uptodate = test_bit(BIO_UPTODATE, &b->bi_flags);
+               /* fixup the bio for reuse, but preserve errno */
+               error = b->bi_error;
                bio_reset(b);
-               if (!uptodate)
-                       clear_bit(BIO_UPTODATE, &b->bi_flags);
+               b->bi_error = error;
                b->bi_vcnt = vcnt;
                b->bi_iter.bi_size = r1_bio->sectors << 9;
                b->bi_iter.bi_sector = r1_bio->sector +
@@ -1999,7 +1946,7 @@ static void process_checks(struct r1bio *r1_bio)
        }
        for (primary = 0; primary < conf->raid_disks * 2; primary++)
                if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
-                   test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
+                   !r1_bio->bios[primary]->bi_error) {
                        r1_bio->bios[primary]->bi_end_io = NULL;
                        rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
                        break;
@@ -2009,14 +1956,14 @@ static void process_checks(struct r1bio *r1_bio)
                int j;
                struct bio *pbio = r1_bio->bios[primary];
                struct bio *sbio = r1_bio->bios[i];
-               int uptodate = test_bit(BIO_UPTODATE, &sbio->bi_flags);
+               int error = sbio->bi_error;
 
                if (sbio->bi_end_io != end_sync_read)
                        continue;
-               /* Now we can 'fixup' the BIO_UPTODATE flag */
-               set_bit(BIO_UPTODATE, &sbio->bi_flags);
+               /* Now we can 'fixup' the error value */
+               sbio->bi_error = 0;
 
-               if (uptodate) {
+               if (!error) {
                        for (j = vcnt; j-- ; ) {
                                struct page *p, *s;
                                p = pbio->bi_io_vec[j].bv_page;
@@ -2031,7 +1978,7 @@ static void process_checks(struct r1bio *r1_bio)
                if (j >= 0)
                        atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
                if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
-                             && uptodate)) {
+                             && !error)) {
                        /* No need to write to this device. */
                        sbio->bi_end_io = NULL;
                        rdev_dec_pending(conf->mirrors[i].rdev, mddev);
@@ -2272,11 +2219,11 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
                struct bio *bio = r1_bio->bios[m];
                if (bio->bi_end_io == NULL)
                        continue;
-               if (test_bit(BIO_UPTODATE, &bio->bi_flags) &&
+               if (!bio->bi_error &&
                    test_bit(R1BIO_MadeGood, &r1_bio->state)) {
                        rdev_clear_badblocks(rdev, r1_bio->sector, s, 0);
                }
-               if (!test_bit(BIO_UPTODATE, &bio->bi_flags) &&
+               if (bio->bi_error &&
                    test_bit(R1BIO_WriteError, &r1_bio->state)) {
                        if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
                                md_error(conf->mddev, rdev);
@@ -2289,6 +2236,7 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
 static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
 {
        int m;
+       bool fail = false;
        for (m = 0; m < conf->raid_disks * 2 ; m++)
                if (r1_bio->bios[m] == IO_MADE_GOOD) {
                        struct md_rdev *rdev = conf->mirrors[m].rdev;
@@ -2301,6 +2249,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
                         * narrow down and record precise write
                         * errors.
                         */
+                       fail = true;
                        if (!narrow_write_error(r1_bio, m)) {
                                md_error(conf->mddev,
                                         conf->mirrors[m].rdev);
@@ -2312,7 +2261,13 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
                }
        if (test_bit(R1BIO_WriteError, &r1_bio->state))
                close_write(r1_bio);
-       raid_end_bio_io(r1_bio);
+       if (fail) {
+               spin_lock_irq(&conf->device_lock);
+               list_add(&r1_bio->retry_list, &conf->bio_end_io_list);
+               spin_unlock_irq(&conf->device_lock);
+               md_wakeup_thread(conf->mddev->thread);
+       } else
+               raid_end_bio_io(r1_bio);
 }
 
 static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
@@ -2418,6 +2373,23 @@ static void raid1d(struct md_thread *thread)
 
        md_check_recovery(mddev);
 
+       if (!list_empty_careful(&conf->bio_end_io_list) &&
+           !test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
+               LIST_HEAD(tmp);
+               spin_lock_irqsave(&conf->device_lock, flags);
+               if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
+                       list_add(&tmp, &conf->bio_end_io_list);
+                       list_del_init(&conf->bio_end_io_list);
+               }
+               spin_unlock_irqrestore(&conf->device_lock, flags);
+               while (!list_empty(&tmp)) {
+                       r1_bio = list_first_entry(&conf->bio_end_io_list,
+                                                 struct r1bio, retry_list);
+                       list_del(&r1_bio->retry_list);
+                       raid_end_bio_io(r1_bio);
+               }
+       }
+
        blk_start_plug(&plug);
        for (;;) {
 
@@ -2715,7 +2687,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
                                                /* remove last page from this bio */
                                                bio->bi_vcnt--;
                                                bio->bi_iter.bi_size -= len;
-                                               __clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+                                               bio_clear_flag(bio, BIO_SEG_VALID);
                                        }
                                        goto bio_full;
                                }
@@ -2810,8 +2782,6 @@ static struct r1conf *setup_conf(struct mddev *mddev)
                        goto abort;
                disk->rdev = rdev;
                q = bdev_get_queue(rdev->bdev);
-               if (q->merge_bvec_fn)
-                       mddev->merge_check_needed = 1;
 
                disk->head_position = 0;
                disk->seq_start = MaxSector;
@@ -2819,6 +2789,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
        conf->raid_disks = mddev->raid_disks;
        conf->mddev = mddev;
        INIT_LIST_HEAD(&conf->retry_list);
+       INIT_LIST_HEAD(&conf->bio_end_io_list);
 
        spin_lock_init(&conf->resync_lock);
        init_waitqueue_head(&conf->wait_barrier);
@@ -3113,6 +3084,7 @@ static int raid1_reshape(struct mddev *mddev)
 
        unfreeze_array(conf);
 
+       set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
        md_wakeup_thread(mddev->thread);
 
@@ -3176,7 +3148,6 @@ static struct md_personality raid1_personality =
        .quiesce        = raid1_quiesce,
        .takeover       = raid1_takeover,
        .congested      = raid1_congested,
-       .mergeable_bvec = raid1_mergeable_bvec,
 };
 
 static int __init raid_init(void)