Merge branch 'for-2.6.36' of git://git.kernel.dk/linux-2.6-block
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 10 Aug 2010 22:22:42 +0000 (15:22 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 10 Aug 2010 22:22:42 +0000 (15:22 -0700)
* 'for-2.6.36' of git://git.kernel.dk/linux-2.6-block: (149 commits)
  block: make sure that REQ_* types are seen even with CONFIG_BLOCK=n
  xen-blkfront: fix missing out label
  blkdev: fix blkdev_issue_zeroout return value
  block: update request stacking methods to support discards
  block: fix missing export of blk_types.h
  writeback: fix bad _bh spinlock nesting
  drbd: revert "delay probes", feature is being re-implemented differently
  drbd: Initialize all members of sync_conf to their defaults [Bugz 315]
  drbd: Disable delay probes for the upcomming release
  writeback: cleanup bdi_register
  writeback: add new tracepoints
  writeback: remove unnecessary init_timer call
  writeback: optimize periodic bdi thread wakeups
  writeback: prevent unnecessary bdi threads wakeups
  writeback: move bdi threads exiting logic to the forker thread
  writeback: restructure bdi forker loop a little
  writeback: move last_active to bdi
  writeback: do not remove bdi from bdi_list
  writeback: simplify bdi code a little
  writeback: do not lose wake-ups in bdi threads
  ...

Fixed up pretty trivial conflicts in drivers/block/virtio_blk.c and
drivers/scsi/scsi_error.c as per Jens.

154 files changed:
arch/alpha/include/asm/scatterlist.h
arch/avr32/include/asm/scatterlist.h
arch/blackfin/include/asm/scatterlist.h
arch/cris/include/asm/scatterlist.h
arch/frv/include/asm/scatterlist.h
arch/h8300/include/asm/scatterlist.h
arch/ia64/include/asm/scatterlist.h
arch/m32r/include/asm/scatterlist.h
arch/m68k/include/asm/scatterlist.h
arch/microblaze/include/asm/scatterlist.h
arch/mips/include/asm/scatterlist.h
arch/mn10300/include/asm/scatterlist.h
arch/parisc/include/asm/scatterlist.h
arch/powerpc/include/asm/scatterlist.h
arch/s390/include/asm/scatterlist.h
arch/score/include/asm/scatterlist.h
arch/sh/include/asm/scatterlist.h
arch/sparc/include/asm/scatterlist.h
arch/um/drivers/ubd_kern.c
arch/x86/include/asm/scatterlist.h
arch/xtensa/include/asm/scatterlist.h
block/blk-barrier.c
block/blk-core.c
block/blk-exec.c
block/blk-lib.c
block/blk-map.c
block/blk-merge.c
block/blk-settings.c
block/blk-sysfs.c
block/blk.h
block/cfq-iosched.c
block/compat_ioctl.c
block/elevator.c
block/ioctl.c
drivers/ata/libata-scsi.c
drivers/block/DAC960.c
drivers/block/amiflop.c
drivers/block/aoe/aoeblk.c
drivers/block/ataflop.c
drivers/block/brd.c
drivers/block/cciss.c
drivers/block/cciss.h
drivers/block/cciss_cmd.h
drivers/block/cciss_scsi.c
drivers/block/cpqarray.c
drivers/block/drbd/drbd_actlog.c
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_nl.c
drivers/block/drbd/drbd_proc.c
drivers/block/drbd/drbd_receiver.c
drivers/block/drbd/drbd_req.c
drivers/block/drbd/drbd_worker.c
drivers/block/floppy.c
drivers/block/hd.c
drivers/block/loop.c
drivers/block/mg_disk.c
drivers/block/nbd.c
drivers/block/osdblk.c
drivers/block/paride/pcd.c
drivers/block/paride/pd.c
drivers/block/paride/pf.c
drivers/block/pktcdvd.c
drivers/block/ps3disk.c
drivers/block/swim.c
drivers/block/swim3.c
drivers/block/ub.c
drivers/block/umem.c
drivers/block/viodasd.c
drivers/block/virtio_blk.c
drivers/block/xd.c
drivers/block/xen-blkfront.c
drivers/block/xsysace.c
drivers/block/z2ram.c
drivers/cdrom/cdrom.c
drivers/cdrom/gdrom.c
drivers/cdrom/viocd.c
drivers/ide/ide-atapi.c
drivers/ide/ide-cd.c
drivers/ide/ide-cd_ioctl.c
drivers/ide/ide-disk.c
drivers/ide/ide-disk_ioctl.c
drivers/ide/ide-eh.c
drivers/ide/ide-floppy.c
drivers/ide/ide-floppy_ioctl.c
drivers/ide/ide-gd.c
drivers/ide/ide-io.c
drivers/ide/ide-pm.c
drivers/ide/ide-tape.c
drivers/md/dm-io.c
drivers/md/dm-kcopyd.c
drivers/md/dm-raid1.c
drivers/md/dm-stripe.c
drivers/md/dm.c
drivers/md/linear.c
drivers/md/md.c
drivers/md/md.h
drivers/md/multipath.c
drivers/md/raid0.c
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5.c
drivers/memstick/core/mspro_block.c
drivers/message/i2o/i2o_block.c
drivers/mmc/card/block.c
drivers/mmc/card/queue.c
drivers/mtd/mtd_blkdevs.c
drivers/s390/block/dasd.c
drivers/s390/block/dcssblk.c
drivers/s390/char/tape_block.c
drivers/scsi/aha1542.c
drivers/scsi/osd/osd_initiator.c
drivers/scsi/scsi_error.c
drivers/scsi/scsi_lib.c
drivers/scsi/sd.c
drivers/scsi/sd.h
drivers/scsi/sr.c
drivers/scsi/sun3_NCR5380.c
drivers/scsi/sun3_scsi.c
drivers/scsi/sun3_scsi_vme.c
drivers/staging/hv/blkvsc_drv.c
drivers/xen/xenbus/xenbus_client.c
fs/bio.c
fs/block_dev.c
fs/btrfs/disk-io.c
fs/btrfs/inode.c
fs/btrfs/volumes.c
fs/coda/psdev.c
fs/coda/upcall.c
fs/exofs/ios.c
fs/fs-writeback.c
fs/gfs2/log.c
fs/gfs2/meta_io.c
fs/gfs2/ops_fstype.c
fs/nilfs2/segbuf.c
fs/splice.c
include/linux/Kbuild
include/linux/audit.h
include/linux/backing-dev.h
include/linux/bio.h
include/linux/blk_types.h [new file with mode: 0644]
include/linux/blkdev.h
include/linux/blktrace_api.h
include/linux/coda_psdev.h
include/linux/drbd.h
include/linux/drbd_nl.h
include/linux/fs.h
include/trace/events/block.h
include/trace/events/writeback.h [new file with mode: 0644]
kernel/power/block_io.c
kernel/trace/blktrace.c
mm/backing-dev.c
mm/page-writeback.c
mm/page_io.c

index 5728c52..017d747 100644 (file)
@@ -3,6 +3,4 @@
 
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD (~0UL)
-
 #endif /* !(_ALPHA_SCATTERLIST_H) */
index 06394e5..a5902d9 100644 (file)
@@ -3,6 +3,4 @@
 
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD (0xffffffff)
-
 #endif /* __ASM_AVR32_SCATTERLIST_H */
index 64d41d3..d177a15 100644 (file)
@@ -3,6 +3,4 @@
 
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD      (0xffffffff)
-
 #endif                         /* !(_BLACKFIN_SCATTERLIST_H) */
index 249a784..f11f8f4 100644 (file)
@@ -3,6 +3,4 @@
 
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD (0x1fffffff)
-
 #endif /* !(__ASM_CRIS_SCATTERLIST_H) */
index 1614bfd..0e5eb30 100644 (file)
@@ -3,6 +3,4 @@
 
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD (0xffffffffUL)
-
 #endif /* !_ASM_SCATTERLIST_H */
index de08a4a..82130ed 100644 (file)
@@ -3,6 +3,4 @@
 
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD      (0xffffffff)
-
 #endif /* !(_H8300_SCATTERLIST_H) */
index f299a4f..08fd93b 100644 (file)
@@ -2,15 +2,6 @@
 #define _ASM_IA64_SCATTERLIST_H
 
 #include <asm-generic/scatterlist.h>
-/*
- * It used to be that ISA_DMA_THRESHOLD had something to do with the
- * DMA-limits of ISA-devices.  Nowadays, its only remaining use (apart
- * from the aha1542.c driver, which isn't 64-bit clean anyhow) is to
- * tell the block-layer (via BLK_BOUNCE_ISA) what the max. physical
- * address of a page is that is allocated with GFP_DMA.  On IA-64,
- * that's 4GB - 1.
- */
-#define ISA_DMA_THRESHOLD      0xffffffff
 #define ARCH_HAS_SG_CHAIN
 
 #endif /* _ASM_IA64_SCATTERLIST_H */
index aeeddd8..7370b8b 100644 (file)
@@ -3,6 +3,4 @@
 
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD (0x1fffffff)
-
 #endif /* _ASM_M32R_SCATTERLIST_H */
index 175da06..3125054 100644 (file)
@@ -3,7 +3,4 @@
 
 #include <asm-generic/scatterlist.h>
 
-/* This is bogus and should go away. */
-#define ISA_DMA_THRESHOLD (0x00ffffff)
-
 #endif /* !(_M68K_SCATTERLIST_H) */
index dc4a890..35d786f 100644 (file)
@@ -1,3 +1 @@
 #include <asm-generic/scatterlist.h>
-
-#define ISA_DMA_THRESHOLD      (~0UL)
index 9af65e7..7ee0e64 100644 (file)
@@ -3,6 +3,4 @@
 
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD (0x00ffffffUL)
-
 #endif /* __ASM_SCATTERLIST_H */
index 7bd00b9..7baa400 100644 (file)
@@ -13,6 +13,4 @@
 
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD (0x00ffffff)
-
 #endif /* _ASM_SCATTERLIST_H */
index 2c3b79b..8bf1f0d 100644 (file)
@@ -5,7 +5,6 @@
 #include <asm/types.h>
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD (~0UL)
 #define sg_virt_addr(sg) ((unsigned long)sg_virt(sg))
 
 #endif /* _ASM_PARISC_SCATTERLIST_H */
index 34cc78f..de1f620 100644 (file)
@@ -12,9 +12,6 @@
 #include <asm/dma.h>
 #include <asm-generic/scatterlist.h>
 
-#ifdef __powerpc64__
-#define ISA_DMA_THRESHOLD      (~0UL)
-#endif
 #define ARCH_HAS_SG_CHAIN
 
 #endif /* _ASM_POWERPC_SCATTERLIST_H */
index be44d94..35d786f 100644 (file)
@@ -1,3 +1 @@
-#define ISA_DMA_THRESHOLD      (~0UL)
-
 #include <asm-generic/scatterlist.h>
index 4fa1a66..9f533b8 100644 (file)
@@ -1,8 +1,6 @@
 #ifndef _ASM_SCORE_SCATTERLIST_H
 #define _ASM_SCORE_SCATTERLIST_H
 
-#define ISA_DMA_THRESHOLD      (~0UL)
-
 #include <asm-generic/scatterlist.h>
 
 #endif /* _ASM_SCORE_SCATTERLIST_H */
index e38d1d4..98dfc35 100644 (file)
@@ -1,8 +1,6 @@
 #ifndef __ASM_SH_SCATTERLIST_H
 #define __ASM_SH_SCATTERLIST_H
 
-#define ISA_DMA_THRESHOLD      phys_addr_mask()
-
 #include <asm-generic/scatterlist.h>
 
 #endif /* __ASM_SH_SCATTERLIST_H */
index 433e45f..92bb638 100644 (file)
@@ -3,7 +3,6 @@
 
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD      (~0UL)
 #define ARCH_HAS_SG_CHAIN
 
 #endif /* !(_SPARC_SCATTERLIST_H) */
index da992a3..1bcd208 100644 (file)
@@ -33,6 +33,7 @@
 #include "linux/mm.h"
 #include "linux/slab.h"
 #include "linux/vmalloc.h"
+#include "linux/smp_lock.h"
 #include "linux/blkpg.h"
 #include "linux/genhd.h"
 #include "linux/spinlock.h"
@@ -1098,6 +1099,7 @@ static int ubd_open(struct block_device *bdev, fmode_t mode)
        struct ubd *ubd_dev = disk->private_data;
        int err = 0;
 
+       lock_kernel();
        if(ubd_dev->count == 0){
                err = ubd_open_dev(ubd_dev);
                if(err){
@@ -1115,7 +1117,8 @@ static int ubd_open(struct block_device *bdev, fmode_t mode)
                if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
                err = -EROFS;
        }*/
- out:
+out:
+       unlock_kernel();
        return err;
 }
 
@@ -1123,8 +1126,10 @@ static int ubd_release(struct gendisk *disk, fmode_t mode)
 {
        struct ubd *ubd_dev = disk->private_data;
 
+       lock_kernel();
        if(--ubd_dev->count == 0)
                ubd_close_dev(ubd_dev);
+       unlock_kernel();
        return 0;
 }
 
index fb0b187..4240878 100644 (file)
@@ -3,7 +3,6 @@
 
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD (0x00ffffff)
 #define ARCH_HAS_SG_CHAIN
 
 #endif /* _ASM_X86_SCATTERLIST_H */
index b1f9fdc..a0421a6 100644 (file)
@@ -13,6 +13,4 @@
 
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD (~0UL)
-
 #endif /* _XTENSA_SCATTERLIST_H */
index 0d710c9..f0faefc 100644 (file)
@@ -13,7 +13,6 @@
  * blk_queue_ordered - does this queue support ordered writes
  * @q:        the request queue
  * @ordered:  one of QUEUE_ORDERED_*
- * @prepare_flush_fn: rq setup helper for cache flush ordered writes
  *
  * Description:
  *   For journalled file systems, doing ordered writes on a commit
  *   feature should call this function and indicate so.
  *
  **/
-int blk_queue_ordered(struct request_queue *q, unsigned ordered,
-                     prepare_flush_fn *prepare_flush_fn)
+int blk_queue_ordered(struct request_queue *q, unsigned ordered)
 {
-       if (!prepare_flush_fn && (ordered & (QUEUE_ORDERED_DO_PREFLUSH |
-                                            QUEUE_ORDERED_DO_POSTFLUSH))) {
-               printk(KERN_ERR "%s: prepare_flush_fn required\n", __func__);
-               return -EINVAL;
-       }
-
        if (ordered != QUEUE_ORDERED_NONE &&
            ordered != QUEUE_ORDERED_DRAIN &&
            ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
@@ -44,7 +36,6 @@ int blk_queue_ordered(struct request_queue *q, unsigned ordered,
 
        q->ordered = ordered;
        q->next_ordered = ordered;
-       q->prepare_flush_fn = prepare_flush_fn;
 
        return 0;
 }
@@ -79,7 +70,7 @@ unsigned blk_ordered_req_seq(struct request *rq)
         *
         * http://thread.gmane.org/gmane.linux.kernel/537473
         */
-       if (!blk_fs_request(rq))
+       if (rq->cmd_type != REQ_TYPE_FS)
                return QUEUE_ORDSEQ_DRAIN;
 
        if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
@@ -143,10 +134,10 @@ static void queue_flush(struct request_queue *q, unsigned which)
        }
 
        blk_rq_init(q, rq);
-       rq->cmd_flags = REQ_HARDBARRIER;
-       rq->rq_disk = q->bar_rq.rq_disk;
+       rq->cmd_type = REQ_TYPE_FS;
+       rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH;
+       rq->rq_disk = q->orig_bar_rq->rq_disk;
        rq->end_io = end_io;
-       q->prepare_flush_fn(q, rq);
 
        elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 }
@@ -203,7 +194,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp)
                /* initialize proxy request and queue it */
                blk_rq_init(q, rq);
                if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
-                       rq->cmd_flags |= REQ_RW;
+                       rq->cmd_flags |= REQ_WRITE;
                if (q->ordered & QUEUE_ORDERED_DO_FUA)
                        rq->cmd_flags |= REQ_FUA;
                init_request_from_bio(rq, q->orig_bar_rq->bio);
@@ -236,7 +227,8 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 bool blk_do_ordered(struct request_queue *q, struct request **rqp)
 {
        struct request *rq = *rqp;
-       const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
+       const int is_barrier = rq->cmd_type == REQ_TYPE_FS &&
+                               (rq->cmd_flags & REQ_HARDBARRIER);
 
        if (!q->ordseq) {
                if (!is_barrier)
@@ -261,7 +253,7 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp)
         */
 
        /* Special requests are not subject to ordering rules. */
-       if (!blk_fs_request(rq) &&
+       if (rq->cmd_type != REQ_TYPE_FS &&
            rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
                return true;
 
@@ -319,6 +311,15 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
        if (!q)
                return -ENXIO;
 
+       /*
+        * some block devices may not have their queue correctly set up here
+        * (e.g. loop device without a backing file) and so issuing a flush
+        * here will panic. Ensure there is a request function before issuing
+        * the barrier.
+        */
+       if (!q->make_request_fn)
+               return -ENXIO;
+
        bio = bio_alloc(gfp_mask, 0);
        bio->bi_end_io = bio_end_empty_barrier;
        bio->bi_bdev = bdev;
index f0640d7..7da630e 100644 (file)
@@ -184,7 +184,7 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
        printk(KERN_INFO "  bio %p, biotail %p, buffer %p, len %u\n",
               rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));
 
-       if (blk_pc_request(rq)) {
+       if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
                printk(KERN_INFO "  cdb: ");
                for (bit = 0; bit < BLK_MAX_CDB; bit++)
                        printk("%02x ", rq->cmd[bit]);
@@ -608,6 +608,7 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
 
        q->request_fn           = rfn;
        q->prep_rq_fn           = NULL;
+       q->unprep_rq_fn         = NULL;
        q->unplug_fn            = generic_unplug_device;
        q->queue_flags          = QUEUE_FLAG_DEFAULT;
        q->queue_lock           = lock;
@@ -1135,30 +1136,46 @@ void blk_put_request(struct request *req)
 }
 EXPORT_SYMBOL(blk_put_request);
 
+/**
+ * blk_add_request_payload - add a payload to a request
+ * @rq: request to update
+ * @page: page backing the payload
+ * @len: length of the payload.
+ *
+ * This allows to later add a payload to an already submitted request by
+ * a block driver.  The driver needs to take care of freeing the payload
+ * itself.
+ *
+ * Note that this is a quite horrible hack and nothing but handling of
+ * discard requests should ever use it.
+ */
+void blk_add_request_payload(struct request *rq, struct page *page,
+               unsigned int len)
+{
+       struct bio *bio = rq->bio;
+
+       bio->bi_io_vec->bv_page = page;
+       bio->bi_io_vec->bv_offset = 0;
+       bio->bi_io_vec->bv_len = len;
+
+       bio->bi_size = len;
+       bio->bi_vcnt = 1;
+       bio->bi_phys_segments = 1;
+
+       rq->__data_len = rq->resid_len = len;
+       rq->nr_phys_segments = 1;
+       rq->buffer = bio_data(bio);
+}
+EXPORT_SYMBOL_GPL(blk_add_request_payload);
+
 void init_request_from_bio(struct request *req, struct bio *bio)
 {
        req->cpu = bio->bi_comp_cpu;
        req->cmd_type = REQ_TYPE_FS;
 
-       /*
-        * Inherit FAILFAST from bio (for read-ahead, and explicit
-        * FAILFAST).  FAILFAST flags are identical for req and bio.
-        */
-       if (bio_rw_flagged(bio, BIO_RW_AHEAD))
+       req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK;
+       if (bio->bi_rw & REQ_RAHEAD)
                req->cmd_flags |= REQ_FAILFAST_MASK;
-       else
-               req->cmd_flags |= bio->bi_rw & REQ_FAILFAST_MASK;
-
-       if (bio_rw_flagged(bio, BIO_RW_DISCARD))
-               req->cmd_flags |= REQ_DISCARD;
-       if (bio_rw_flagged(bio, BIO_RW_BARRIER))
-               req->cmd_flags |= REQ_HARDBARRIER;
-       if (bio_rw_flagged(bio, BIO_RW_SYNCIO))
-               req->cmd_flags |= REQ_RW_SYNC;
-       if (bio_rw_flagged(bio, BIO_RW_META))
-               req->cmd_flags |= REQ_RW_META;
-       if (bio_rw_flagged(bio, BIO_RW_NOIDLE))
-               req->cmd_flags |= REQ_NOIDLE;
 
        req->errors = 0;
        req->__sector = bio->bi_sector;
@@ -1181,12 +1198,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
        int el_ret;
        unsigned int bytes = bio->bi_size;
        const unsigned short prio = bio_prio(bio);
-       const bool sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
-       const bool unplug = bio_rw_flagged(bio, BIO_RW_UNPLUG);
+       const bool sync = (bio->bi_rw & REQ_SYNC);
+       const bool unplug = (bio->bi_rw & REQ_UNPLUG);
        const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
        int rw_flags;
 
-       if (bio_rw_flagged(bio, BIO_RW_BARRIER) &&
+       if ((bio->bi_rw & REQ_HARDBARRIER) &&
            (q->next_ordered == QUEUE_ORDERED_NONE)) {
                bio_endio(bio, -EOPNOTSUPP);
                return 0;
@@ -1200,7 +1217,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 
        spin_lock_irq(q->queue_lock);
 
-       if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)) || elv_queue_empty(q))
+       if (unlikely((bio->bi_rw & REQ_HARDBARRIER)) || elv_queue_empty(q))
                goto get_rq;
 
        el_ret = elv_merge(q, &req, bio);
@@ -1275,7 +1292,7 @@ get_rq:
         */
        rw_flags = bio_data_dir(bio);
        if (sync)
-               rw_flags |= REQ_RW_SYNC;
+               rw_flags |= REQ_SYNC;
 
        /*
         * Grab a free request. This is might sleep but can not fail.
@@ -1464,7 +1481,7 @@ static inline void __generic_make_request(struct bio *bio)
                        goto end_io;
                }
 
-               if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) &&
+               if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&
                             nr_sectors > queue_max_hw_sectors(q))) {
                        printk(KERN_ERR "bio too big device %s (%u > %u)\n",
                               bdevname(bio->bi_bdev, b),
@@ -1497,8 +1514,7 @@ static inline void __generic_make_request(struct bio *bio)
                if (bio_check_eod(bio, nr_sectors))
                        goto end_io;
 
-               if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&
-                   !blk_queue_discard(q)) {
+               if ((bio->bi_rw & REQ_DISCARD) && !blk_queue_discard(q)) {
                        err = -EOPNOTSUPP;
                        goto end_io;
                }
@@ -1583,7 +1599,7 @@ void submit_bio(int rw, struct bio *bio)
         * If it's a regular read/write or a barrier with data attached,
         * go through the normal accounting stuff before submission.
         */
-       if (bio_has_data(bio) && !(rw & (1 << BIO_RW_DISCARD))) {
+       if (bio_has_data(bio) && !(rw & REQ_DISCARD)) {
                if (rw & WRITE) {
                        count_vm_events(PGPGOUT, count);
                } else {
@@ -1628,6 +1644,9 @@ EXPORT_SYMBOL(submit_bio);
  */
 int blk_rq_check_limits(struct request_queue *q, struct request *rq)
 {
+       if (rq->cmd_flags & REQ_DISCARD)
+               return 0;
+
        if (blk_rq_sectors(rq) > queue_max_sectors(q) ||
            blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
                printk(KERN_ERR "%s: over max size limit.\n", __func__);
@@ -1796,7 +1815,7 @@ struct request *blk_peek_request(struct request_queue *q)
                         * sees this request (possibly after
                         * requeueing).  Notify IO scheduler.
                         */
-                       if (blk_sorted_rq(rq))
+                       if (rq->cmd_flags & REQ_SORTED)
                                elv_activate_rq(q, rq);
 
                        /*
@@ -1984,10 +2003,11 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
         * TODO: tj: This is too subtle.  It would be better to let
         * low level drivers do what they see fit.
         */
-       if (blk_fs_request(req))
+       if (req->cmd_type == REQ_TYPE_FS)
                req->errors = 0;
 
-       if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) {
+       if (error && req->cmd_type == REQ_TYPE_FS &&
+           !(req->cmd_flags & REQ_QUIET)) {
                printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n",
                                req->rq_disk ? req->rq_disk->disk_name : "?",
                                (unsigned long long)blk_rq_pos(req));
@@ -2074,7 +2094,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
        req->buffer = bio_data(req->bio);
 
        /* update sector only for requests with clear definition of sector */
-       if (blk_fs_request(req) || blk_discard_rq(req))
+       if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD))
                req->__sector += total_bytes >> 9;
 
        /* mixed attributes always follow the first bio */
@@ -2111,11 +2131,32 @@ static bool blk_update_bidi_request(struct request *rq, int error,
            blk_update_request(rq->next_rq, error, bidi_bytes))
                return true;
 
-       add_disk_randomness(rq->rq_disk);
+       if (blk_queue_add_random(rq->q))
+               add_disk_randomness(rq->rq_disk);
 
        return false;
 }
 
+/**
+ * blk_unprep_request - unprepare a request
+ * @req:       the request
+ *
+ * This function makes a request ready for complete resubmission (or
+ * completion).  It happens only after all error handling is complete,
+ * so represents the appropriate moment to deallocate any resources
+ * that were allocated to the request in the prep_rq_fn.  The queue
+ * lock is held when calling this.
+ */
+void blk_unprep_request(struct request *req)
+{
+       struct request_queue *q = req->q;
+
+       req->cmd_flags &= ~REQ_DONTPREP;
+       if (q->unprep_rq_fn)
+               q->unprep_rq_fn(q, req);
+}
+EXPORT_SYMBOL_GPL(blk_unprep_request);
+
 /*
  * queue lock must be held
  */
@@ -2126,11 +2167,15 @@ static void blk_finish_request(struct request *req, int error)
 
        BUG_ON(blk_queued_rq(req));
 
-       if (unlikely(laptop_mode) && blk_fs_request(req))
+       if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS)
                laptop_io_completion(&req->q->backing_dev_info);
 
        blk_delete_timer(req);
 
+       if (req->cmd_flags & REQ_DONTPREP)
+               blk_unprep_request(req);
+
+
        blk_account_io_done(req);
 
        if (req->end_io)
@@ -2363,7 +2408,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
                     struct bio *bio)
 {
        /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */
-       rq->cmd_flags |= bio->bi_rw & REQ_RW;
+       rq->cmd_flags |= bio->bi_rw & REQ_WRITE;
 
        if (bio_has_data(bio)) {
                rq->nr_phys_segments = bio_phys_segments(q, bio);
@@ -2450,6 +2495,8 @@ static void __blk_rq_prep_clone(struct request *dst, struct request *src)
 {
        dst->cpu = src->cpu;
        dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE);
+       if (src->cmd_flags & REQ_DISCARD)
+               dst->cmd_flags |= REQ_DISCARD;
        dst->cmd_type = src->cmd_type;
        dst->__sector = blk_rq_pos(src);
        dst->__data_len = blk_rq_bytes(src);
index 49557e9..e1672f1 100644 (file)
@@ -57,7 +57,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
        __elv_add_request(q, rq, where, 1);
        __generic_unplug_device(q);
        /* the queue is stopped so it won't be plugged+unplugged */
-       if (blk_pm_resume_request(rq))
+       if (rq->cmd_type == REQ_TYPE_PM_RESUME)
                q->request_fn(q);
        spin_unlock_irq(q->queue_lock);
 }
index d0216b9..c1fc55a 100644 (file)
@@ -19,7 +19,6 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
 
        if (bio->bi_private)
                complete(bio->bi_private);
-       __free_page(bio_page(bio));
 
        bio_put(bio);
 }
@@ -42,8 +41,8 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
        struct request_queue *q = bdev_get_queue(bdev);
        int type = flags & BLKDEV_IFL_BARRIER ?
                DISCARD_BARRIER : DISCARD_NOBARRIER;
+       unsigned int max_discard_sectors;
        struct bio *bio;
-       struct page *page;
        int ret = 0;
 
        if (!q)
@@ -52,36 +51,30 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
        if (!blk_queue_discard(q))
                return -EOPNOTSUPP;
 
-       while (nr_sects && !ret) {
-               unsigned int sector_size = q->limits.logical_block_size;
-               unsigned int max_discard_sectors =
-                       min(q->limits.max_discard_sectors, UINT_MAX >> 9);
+       /*
+        * Ensure that max_discard_sectors is of the proper
+        * granularity
+        */
+       max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
+       if (q->limits.discard_granularity) {
+               unsigned int disc_sects = q->limits.discard_granularity >> 9;
 
+               max_discard_sectors &= ~(disc_sects - 1);
+       }
+
+       while (nr_sects && !ret) {
                bio = bio_alloc(gfp_mask, 1);
-               if (!bio)
-                       goto out;
+               if (!bio) {
+                       ret = -ENOMEM;
+                       break;
+               }
+
                bio->bi_sector = sector;
                bio->bi_end_io = blkdev_discard_end_io;
                bio->bi_bdev = bdev;
                if (flags & BLKDEV_IFL_WAIT)
                        bio->bi_private = &wait;
 
-               /*
-                * Add a zeroed one-sector payload as that's what
-                * our current implementations need.  If we'll ever need
-                * more the interface will need revisiting.
-                */
-               page = alloc_page(gfp_mask | __GFP_ZERO);
-               if (!page)
-                       goto out_free_bio;
-               if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
-                       goto out_free_page;
-
-               /*
-                * And override the bio size - the way discard works we
-                * touch many more blocks on disk than the actual payload
-                * length.
-                */
                if (nr_sects > max_discard_sectors) {
                        bio->bi_size = max_discard_sectors << 9;
                        nr_sects -= max_discard_sectors;
@@ -103,13 +96,8 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
                        ret = -EIO;
                bio_put(bio);
        }
+
        return ret;
-out_free_page:
-       __free_page(page);
-out_free_bio:
-       bio_put(bio);
-out:
-       return -ENOMEM;
 }
 EXPORT_SYMBOL(blkdev_issue_discard);
 
@@ -157,7 +145,7 @@ static void bio_batch_end_io(struct bio *bio, int err)
 int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
                        sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
 {
-       int ret = 0;
+       int ret;
        struct bio *bio;
        struct bio_batch bb;
        unsigned int sz, issued = 0;
@@ -175,11 +163,14 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
                        return ret;
        }
 submit:
+       ret = 0;
        while (nr_sects != 0) {
                bio = bio_alloc(gfp_mask,
                                min(nr_sects, (sector_t)BIO_MAX_PAGES));
-               if (!bio)
+               if (!bio) {
+                       ret = -ENOMEM;
                        break;
+               }
 
                bio->bi_sector = sector;
                bio->bi_bdev   = bdev;
@@ -198,6 +189,7 @@ submit:
                        if (ret < (sz << 9))
                                break;
                }
+               ret = 0;
                issued++;
                submit_bio(WRITE, bio);
        }
index 9083cf0..c65d759 100644 (file)
@@ -307,7 +307,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
                return PTR_ERR(bio);
 
        if (rq_data_dir(rq) == WRITE)
-               bio->bi_rw |= (1 << BIO_RW);
+               bio->bi_rw |= (1 << REQ_WRITE);
 
        if (do_copy)
                rq->cmd_flags |= REQ_COPY_USER;
index 5e7dc99..3b0cd42 100644 (file)
@@ -12,7 +12,6 @@
 static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
                                             struct bio *bio)
 {
-       unsigned int phys_size;
        struct bio_vec *bv, *bvprv = NULL;
        int cluster, i, high, highprv = 1;
        unsigned int seg_size, nr_phys_segs;
@@ -24,7 +23,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
        fbio = bio;
        cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
        seg_size = 0;
-       phys_size = nr_phys_segs = 0;
+       nr_phys_segs = 0;
        for_each_bio(bio) {
                bio_for_each_segment(bv, bio, i) {
                        /*
@@ -180,7 +179,7 @@ new_segment:
        }
 
        if (q->dma_drain_size && q->dma_drain_needed(rq)) {
-               if (rq->cmd_flags & REQ_RW)
+               if (rq->cmd_flags & REQ_WRITE)
                        memset(q->dma_drain_buffer, 0, q->dma_drain_size);
 
                sg->page_link &= ~0x02;
@@ -226,7 +225,7 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
 {
        unsigned short max_sectors;
 
-       if (unlikely(blk_pc_request(req)))
+       if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC))
                max_sectors = queue_max_hw_sectors(q);
        else
                max_sectors = queue_max_sectors(q);
@@ -250,7 +249,7 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
 {
        unsigned short max_sectors;
 
-       if (unlikely(blk_pc_request(req)))
+       if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC))
                max_sectors = queue_max_hw_sectors(q);
        else
                max_sectors = queue_max_sectors(q);
index f5ed5a1..a234f4b 100644 (file)
@@ -36,6 +36,23 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
 }
 EXPORT_SYMBOL(blk_queue_prep_rq);
 
+/**
+ * blk_queue_unprep_rq - set an unprepare_request function for queue
+ * @q:         queue
+ * @ufn:       unprepare_request function
+ *
+ * It's possible for a queue to register an unprepare_request callback
+ * which is invoked before the request is finally completed. The goal
+ * of the function is to deallocate any data that was allocated in the
+ * prepare_request callback.
+ *
+ */
+void blk_queue_unprep_rq(struct request_queue *q, unprep_rq_fn *ufn)
+{
+       q->unprep_rq_fn = ufn;
+}
+EXPORT_SYMBOL(blk_queue_unprep_rq);
+
 /**
  * blk_queue_merge_bvec - set a merge_bvec function for queue
  * @q:         queue
index 306759b..001ab18 100644 (file)
@@ -180,26 +180,36 @@ static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
        return queue_var_show(max_hw_sectors_kb, (page));
 }
 
-static ssize_t queue_nonrot_show(struct request_queue *q, char *page)
-{
-       return queue_var_show(!blk_queue_nonrot(q), page);
+#define QUEUE_SYSFS_BIT_FNS(name, flag, neg)                           \
+static ssize_t                                                         \
+queue_show_##name(struct request_queue *q, char *page)                 \
+{                                                                      \
+       int bit;                                                        \
+       bit = test_bit(QUEUE_FLAG_##flag, &q->queue_flags);             \
+       return queue_var_show(neg ? !bit : bit, page);                  \
+}                                                                      \
+static ssize_t                                                         \
+queue_store_##name(struct request_queue *q, const char *page, size_t count) \
+{                                                                      \
+       unsigned long val;                                              \
+       ssize_t ret;                                                    \
+       ret = queue_var_store(&val, page, count);                       \
+       if (neg)                                                        \
+               val = !val;                                             \
+                                                                       \
+       spin_lock_irq(q->queue_lock);                                   \
+       if (val)                                                        \
+               queue_flag_set(QUEUE_FLAG_##flag, q);                   \
+       else                                                            \
+               queue_flag_clear(QUEUE_FLAG_##flag, q);                 \
+       spin_unlock_irq(q->queue_lock);                                 \
+       return ret;                                                     \
 }
 
-static ssize_t queue_nonrot_store(struct request_queue *q, const char *page,
-                                 size_t count)
-{
-       unsigned long nm;
-       ssize_t ret = queue_var_store(&nm, page, count);
-
-       spin_lock_irq(q->queue_lock);
-       if (nm)
-               queue_flag_clear(QUEUE_FLAG_NONROT, q);
-       else
-               queue_flag_set(QUEUE_FLAG_NONROT, q);
-       spin_unlock_irq(q->queue_lock);
-
-       return ret;
-}
+QUEUE_SYSFS_BIT_FNS(nonrot, NONROT, 1);
+QUEUE_SYSFS_BIT_FNS(random, ADD_RANDOM, 0);
+QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0);
+#undef QUEUE_SYSFS_BIT_FNS
 
 static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
 {
@@ -250,27 +260,6 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
        return ret;
 }
 
-static ssize_t queue_iostats_show(struct request_queue *q, char *page)
-{
-       return queue_var_show(blk_queue_io_stat(q), page);
-}
-
-static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
-                                  size_t count)
-{
-       unsigned long stats;
-       ssize_t ret = queue_var_store(&stats, page, count);
-
-       spin_lock_irq(q->queue_lock);
-       if (stats)
-               queue_flag_set(QUEUE_FLAG_IO_STAT, q);
-       else
-               queue_flag_clear(QUEUE_FLAG_IO_STAT, q);
-       spin_unlock_irq(q->queue_lock);
-
-       return ret;
-}
-
 static struct queue_sysfs_entry queue_requests_entry = {
        .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
        .show = queue_requests_show,
@@ -352,8 +341,8 @@ static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
 
 static struct queue_sysfs_entry queue_nonrot_entry = {
        .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
-       .show = queue_nonrot_show,
-       .store = queue_nonrot_store,
+       .show = queue_show_nonrot,
+       .store = queue_store_nonrot,
 };
 
 static struct queue_sysfs_entry queue_nomerges_entry = {
@@ -370,8 +359,14 @@ static struct queue_sysfs_entry queue_rq_affinity_entry = {
 
 static struct queue_sysfs_entry queue_iostats_entry = {
        .attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR },
-       .show = queue_iostats_show,
-       .store = queue_iostats_store,
+       .show = queue_show_iostats,
+       .store = queue_store_iostats,
+};
+
+static struct queue_sysfs_entry queue_random_entry = {
+       .attr = {.name = "add_random", .mode = S_IRUGO | S_IWUSR },
+       .show = queue_show_random,
+       .store = queue_store_random,
 };
 
 static struct attribute *default_attrs[] = {
@@ -394,6 +389,7 @@ static struct attribute *default_attrs[] = {
        &queue_nomerges_entry.attr,
        &queue_rq_affinity_entry.attr,
        &queue_iostats_entry.attr,
+       &queue_random_entry.attr,
        NULL,
 };
 
index 5ee3d7e..6e7dc87 100644 (file)
@@ -161,8 +161,10 @@ static inline int blk_cpu_to_group(int cpu)
  */
 static inline int blk_do_io_stat(struct request *rq)
 {
-       return rq->rq_disk && blk_rq_io_stat(rq) &&
-              (blk_fs_request(rq) || blk_discard_rq(rq));
+       return rq->rq_disk &&
+              (rq->cmd_flags & REQ_IO_STAT) &&
+              (rq->cmd_type == REQ_TYPE_FS ||
+               (rq->cmd_flags & REQ_DISCARD));
 }
 
 #endif
index 7982b83..eb4086f 100644 (file)
@@ -458,7 +458,7 @@ static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic)
  */
 static inline bool cfq_bio_sync(struct bio *bio)
 {
-       return bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO);
+       return bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC);
 }
 
 /*
@@ -646,9 +646,10 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2,
                return rq1;
        else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
                return rq2;
-       if (rq_is_meta(rq1) && !rq_is_meta(rq2))
+       if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META))
                return rq1;
-       else if (rq_is_meta(rq2) && !rq_is_meta(rq1))
+       else if ((rq2->cmd_flags & REQ_META) &&
+                !(rq1->cmd_flags & REQ_META))
                return rq2;
 
        s1 = blk_rq_pos(rq1);
@@ -1484,7 +1485,7 @@ static void cfq_remove_request(struct request *rq)
        cfqq->cfqd->rq_queued--;
        cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
                                        rq_data_dir(rq), rq_is_sync(rq));
-       if (rq_is_meta(rq)) {
+       if (rq->cmd_flags & REQ_META) {
                WARN_ON(!cfqq->meta_pending);
                cfqq->meta_pending--;
        }
@@ -3176,7 +3177,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
         * So both queues are sync. Let the new request get disk time if
         * it's a metadata request and the current queue is doing regular IO.
         */
-       if (rq_is_meta(rq) && !cfqq->meta_pending)
+       if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending)
                return true;
 
        /*
@@ -3230,7 +3231,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        struct cfq_io_context *cic = RQ_CIC(rq);
 
        cfqd->rq_queued++;
-       if (rq_is_meta(rq))
+       if (rq->cmd_flags & REQ_META)
                cfqq->meta_pending++;
 
        cfq_update_io_thinktime(cfqd, cic);
@@ -3365,7 +3366,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
        unsigned long now;
 
        now = jiffies;
-       cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d", !!rq_noidle(rq));
+       cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d",
+                    !!(rq->cmd_flags & REQ_NOIDLE));
 
        cfq_update_hw_tag(cfqd);
 
@@ -3419,11 +3421,12 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
                        cfq_slice_expired(cfqd, 1);
                else if (sync && cfqq_empty &&
                         !cfq_close_cooperator(cfqd, cfqq)) {
-                       cfqd->noidle_tree_requires_idle |= !rq_noidle(rq);
+                       cfqd->noidle_tree_requires_idle |=
+                               !(rq->cmd_flags & REQ_NOIDLE);
                        /*
                         * Idling is enabled for SYNC_WORKLOAD.
                         * SYNC_NOIDLE_WORKLOAD idles at the end of the tree
-                        * only if we processed at least one !rq_noidle request
+                        * only if we processed at least one !REQ_NOIDLE request
                         */
                        if (cfqd->serving_type == SYNC_WORKLOAD
                            || cfqd->noidle_tree_requires_idle
index f26051f..d530856 100644 (file)
@@ -535,56 +535,6 @@ out:
        return err;
 }
 
-struct compat_blk_user_trace_setup {
-       char name[32];
-       u16 act_mask;
-       u32 buf_size;
-       u32 buf_nr;
-       compat_u64 start_lba;
-       compat_u64 end_lba;
-       u32 pid;
-};
-#define BLKTRACESETUP32 _IOWR(0x12, 115, struct compat_blk_user_trace_setup)
-
-static int compat_blk_trace_setup(struct block_device *bdev, char __user *arg)
-{
-       struct blk_user_trace_setup buts;
-       struct compat_blk_user_trace_setup cbuts;
-       struct request_queue *q;
-       char b[BDEVNAME_SIZE];
-       int ret;
-
-       q = bdev_get_queue(bdev);
-       if (!q)
-               return -ENXIO;
-
-       if (copy_from_user(&cbuts, arg, sizeof(cbuts)))
-               return -EFAULT;
-
-       bdevname(bdev, b);
-
-       buts = (struct blk_user_trace_setup) {
-               .act_mask = cbuts.act_mask,
-               .buf_size = cbuts.buf_size,
-               .buf_nr = cbuts.buf_nr,
-               .start_lba = cbuts.start_lba,
-               .end_lba = cbuts.end_lba,
-               .pid = cbuts.pid,
-       };
-       memcpy(&buts.name, &cbuts.name, 32);
-
-       mutex_lock(&bdev->bd_mutex);
-       ret = do_blk_trace_setup(q, b, bdev->bd_dev, bdev, &buts);
-       mutex_unlock(&bdev->bd_mutex);
-       if (ret)
-               return ret;
-
-       if (copy_to_user(arg, &buts.name, 32))
-               return -EFAULT;
-
-       return 0;
-}
-
 static int compat_blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode,
                        unsigned cmd, unsigned long arg)
 {
@@ -802,16 +752,10 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
                return compat_put_u64(arg, bdev->bd_inode->i_size);
 
        case BLKTRACESETUP32:
-               lock_kernel();
-               ret = compat_blk_trace_setup(bdev, compat_ptr(arg));
-               unlock_kernel();
-               return ret;
        case BLKTRACESTART: /* compatible */
        case BLKTRACESTOP:  /* compatible */
        case BLKTRACETEARDOWN: /* compatible */
-               lock_kernel();
                ret = blk_trace_ioctl(bdev, cmd, compat_ptr(arg));
-               unlock_kernel();
                return ret;
        default:
                if (disk->fops->compat_ioctl)
index 923a913..816a7c8 100644 (file)
@@ -79,8 +79,7 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
        /*
         * Don't merge file system requests and discard requests
         */
-       if (bio_rw_flagged(bio, BIO_RW_DISCARD) !=
-           bio_rw_flagged(rq->bio, BIO_RW_DISCARD))
+       if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD))
                return 0;
 
        /*
@@ -428,7 +427,8 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
        list_for_each_prev(entry, &q->queue_head) {
                struct request *pos = list_entry_rq(entry);
 
-               if (blk_discard_rq(rq) != blk_discard_rq(pos))
+               if ((rq->cmd_flags & REQ_DISCARD) !=
+                   (pos->cmd_flags & REQ_DISCARD))
                        break;
                if (rq_data_dir(rq) != rq_data_dir(pos))
                        break;
@@ -558,7 +558,7 @@ void elv_requeue_request(struct request_queue *q, struct request *rq)
         */
        if (blk_account_rq(rq)) {
                q->in_flight[rq_is_sync(rq)]--;
-               if (blk_sorted_rq(rq))
+               if (rq->cmd_flags & REQ_SORTED)
                        elv_deactivate_rq(q, rq);
        }
 
@@ -644,7 +644,8 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
                break;
 
        case ELEVATOR_INSERT_SORT:
-               BUG_ON(!blk_fs_request(rq) && !blk_discard_rq(rq));
+               BUG_ON(rq->cmd_type != REQ_TYPE_FS &&
+                      !(rq->cmd_flags & REQ_DISCARD));
                rq->cmd_flags |= REQ_SORTED;
                q->nr_sorted++;
                if (rq_mergeable(rq)) {
@@ -716,7 +717,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where,
                /*
                 * toggle ordered color
                 */
-               if (blk_barrier_rq(rq))
+               if (rq->cmd_flags & REQ_HARDBARRIER)
                        q->ordcolor ^= 1;
 
                /*
@@ -729,7 +730,8 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where,
                 * this request is scheduling boundary, update
                 * end_sector
                 */
-               if (blk_fs_request(rq) || blk_discard_rq(rq)) {
+               if (rq->cmd_type == REQ_TYPE_FS ||
+                   (rq->cmd_flags & REQ_DISCARD)) {
                        q->end_sector = rq_end_sector(rq);
                        q->boundary_rq = rq;
                }
@@ -843,7 +845,8 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
         */
        if (blk_account_rq(rq)) {
                q->in_flight[rq_is_sync(rq)]--;
-               if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
+               if ((rq->cmd_flags & REQ_SORTED) &&
+                   e->ops->elevator_completed_req_fn)
                        e->ops->elevator_completed_req_fn(q, rq);
        }
 
index e8eb679..09fd7f1 100644 (file)
@@ -163,18 +163,10 @@ int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode,
                        unsigned cmd, unsigned long arg)
 {
        struct gendisk *disk = bdev->bd_disk;
-       int ret;
 
        if (disk->fops->ioctl)
                return disk->fops->ioctl(bdev, mode, cmd, arg);
 
-       if (disk->fops->locked_ioctl) {
-               lock_kernel();
-               ret = disk->fops->locked_ioctl(bdev, mode, cmd, arg);
-               unlock_kernel();
-               return ret;
-       }
-
        return -ENOTTY;
 }
 /*
@@ -185,8 +177,7 @@ int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode,
 EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl);
 
 /*
- * always keep this in sync with compat_blkdev_ioctl() and
- * compat_blkdev_locked_ioctl()
+ * always keep this in sync with compat_blkdev_ioctl()
  */
 int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
                        unsigned long arg)
@@ -206,10 +197,8 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
                if (ret != -EINVAL && ret != -ENOTTY)
                        return ret;
 
-               lock_kernel();
                fsync_bdev(bdev);
                invalidate_bdev(bdev);
-               unlock_kernel();
                return 0;
 
        case BLKROSET:
@@ -221,9 +210,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
                        return -EACCES;
                if (get_user(n, (int __user *)(arg)))
                        return -EFAULT;
-               lock_kernel();
                set_device_ro(bdev, n);
-               unlock_kernel();
                return 0;
 
        case BLKDISCARD: {
@@ -309,14 +296,10 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
                        bd_release(bdev);
                return ret;
        case BLKPG:
-               lock_kernel();
                ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg);
-               unlock_kernel();
                break;
        case BLKRRPART:
-               lock_kernel();
                ret = blkdev_reread_part(bdev);
-               unlock_kernel();
                break;
        case BLKGETSIZE:
                size = bdev->bd_inode->i_size;
@@ -329,9 +312,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
        case BLKTRACESTOP:
        case BLKTRACESETUP:
        case BLKTRACETEARDOWN:
-               lock_kernel();
                ret = blk_trace_ioctl(bdev, cmd, (char __user *) arg);
-               unlock_kernel();
                break;
        default:
                ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
index d75c9c4..a89172c 100644 (file)
@@ -1111,10 +1111,10 @@ static void ata_scsi_sdev_config(struct scsi_device *sdev)
  */
 static int atapi_drain_needed(struct request *rq)
 {
-       if (likely(!blk_pc_request(rq)))
+       if (likely(rq->cmd_type != REQ_TYPE_BLOCK_PC))
                return 0;
 
-       if (!blk_rq_bytes(rq) || (rq->cmd_flags & REQ_RW))
+       if (!blk_rq_bytes(rq) || (rq->cmd_flags & REQ_WRITE))
                return 0;
 
        return atapi_cmd_type(rq->cmd[0]) == ATAPI_MISC;
index c5f22bb..4e2c367 100644 (file)
@@ -79,23 +79,28 @@ static int DAC960_open(struct block_device *bdev, fmode_t mode)
        struct gendisk *disk = bdev->bd_disk;
        DAC960_Controller_T *p = disk->queue->queuedata;
        int drive_nr = (long)disk->private_data;
+       int ret = -ENXIO;
 
+       lock_kernel();
        if (p->FirmwareType == DAC960_V1_Controller) {
                if (p->V1.LogicalDriveInformation[drive_nr].
                    LogicalDriveState == DAC960_V1_LogicalDrive_Offline)
-                       return -ENXIO;
+                       goto out;
        } else {
                DAC960_V2_LogicalDeviceInfo_T *i =
                        p->V2.LogicalDeviceInformation[drive_nr];
                if (!i || i->LogicalDeviceState == DAC960_V2_LogicalDevice_Offline)
-                       return -ENXIO;
+                       goto out;
        }
 
        check_disk_change(bdev);
 
        if (!get_capacity(p->disks[drive_nr]))
-               return -ENXIO;
-       return 0;
+               goto out;
+       ret = 0;
+out:
+       unlock_kernel();
+       return ret;
 }
 
 static int DAC960_getgeo(struct block_device *bdev, struct hd_geometry *geo)
index 832798a..76f114f 100644 (file)
@@ -60,6 +60,7 @@
 #include <linux/hdreg.h>
 #include <linux/delay.h>
 #include <linux/init.h>
+#include <linux/smp_lock.h>
 #include <linux/amifdreg.h>
 #include <linux/amifd.h>
 #include <linux/buffer_head.h>
@@ -1423,7 +1424,7 @@ static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
        return 0;
 }
 
-static int fd_ioctl(struct block_device *bdev, fmode_t mode,
+static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
                    unsigned int cmd, unsigned long param)
 {
        struct amiga_floppy_struct *p = bdev->bd_disk->private_data;
@@ -1500,6 +1501,18 @@ static int fd_ioctl(struct block_device *bdev, fmode_t mode,
        return 0;
 }
 
+static int fd_ioctl(struct block_device *bdev, fmode_t mode,
+                            unsigned int cmd, unsigned long param)
+{
+       int ret;
+
+       lock_kernel();
+       ret = fd_locked_ioctl(bdev, mode, cmd, param);
+       unlock_kernel();
+
+       return ret;
+}
+
 static void fd_probe(int dev)
 {
        unsigned long code;
@@ -1542,10 +1555,13 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
        int old_dev;
        unsigned long flags;
 
+       lock_kernel();
        old_dev = fd_device[drive];
 
-       if (fd_ref[drive] && old_dev != system)
+       if (fd_ref[drive] && old_dev != system) {
+               unlock_kernel();
                return -EBUSY;
+       }
 
        if (mode & (FMODE_READ|FMODE_WRITE)) {
                check_disk_change(bdev);
@@ -1558,8 +1574,10 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
                        fd_deselect (drive);
                        rel_fdc();
 
-                       if (wrprot)
+                       if (wrprot) {
+                               unlock_kernel();
                                return -EROFS;
+                       }
                }
        }
 
@@ -1576,6 +1594,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
        printk(KERN_INFO "fd%d: accessing %s-disk with %s-layout\n",drive,
               unit[drive].type->name, data_types[system].name);
 
+       unlock_kernel();
        return 0;
 }
 
@@ -1584,6 +1603,7 @@ static int floppy_release(struct gendisk *disk, fmode_t mode)
        struct amiga_floppy_struct *p = disk->private_data;
        int drive = p - unit;
 
+       lock_kernel();
        if (unit[drive].dirty == 1) {
                del_timer (flush_track_timer + drive);
                non_int_flush_track (drive);
@@ -1597,6 +1617,7 @@ static int floppy_release(struct gendisk *disk, fmode_t mode)
 /* the mod_use counter is handled this way */
        floppy_off (drive | 0x40000000);
 #endif
+       unlock_kernel();
        return 0;
 }
 
@@ -1638,7 +1659,7 @@ static const struct block_device_operations floppy_fops = {
        .owner          = THIS_MODULE,
        .open           = floppy_open,
        .release        = floppy_release,
-       .locked_ioctl   = fd_ioctl,
+       .ioctl          = fd_ioctl,
        .getgeo         = fd_getgeo,
        .media_changed  = amiga_floppy_change,
 };
index 035cefe..a946929 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/slab.h>
 #include <linux/genhd.h>
 #include <linux/netdevice.h>
+#include <linux/smp_lock.h>
 #include "aoe.h"
 
 static struct kmem_cache *buf_pool_cache;
@@ -124,13 +125,16 @@ aoeblk_open(struct block_device *bdev, fmode_t mode)
        struct aoedev *d = bdev->bd_disk->private_data;
        ulong flags;
 
+       lock_kernel();
        spin_lock_irqsave(&d->lock, flags);
        if (d->flags & DEVFL_UP) {
                d->nopen++;
                spin_unlock_irqrestore(&d->lock, flags);
+               unlock_kernel();
                return 0;
        }
        spin_unlock_irqrestore(&d->lock, flags);
+       unlock_kernel();
        return -ENODEV;
 }
 
@@ -173,7 +177,7 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio)
                BUG();
                bio_endio(bio, -ENXIO);
                return 0;
-       } else if (bio_rw_flagged(bio, BIO_RW_BARRIER)) {
+       } else if (bio->bi_rw & REQ_HARDBARRIER) {
                bio_endio(bio, -EOPNOTSUPP);
                return 0;
        } else if (bio->bi_io_vec == NULL) {
index e35cf59..aceb964 100644 (file)
@@ -67,6 +67,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/blkdev.h>
+#include <linux/smp_lock.h>
 
 #include <asm/atafd.h>
 #include <asm/atafdreg.h>
@@ -359,7 +360,7 @@ static void finish_fdc( void );
 static void finish_fdc_done( int dummy );
 static void setup_req_params( int drive );
 static void redo_fd_request( void);
-static int fd_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
+static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
                      cmd, unsigned long param);
 static void fd_probe( int drive );
 static int fd_test_drive_present( int drive );
@@ -1480,7 +1481,7 @@ void do_fd_request(struct request_queue * q)
        atari_enable_irq( IRQ_MFP_FDC );
 }
 
-static int fd_ioctl(struct block_device *bdev, fmode_t mode,
+static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
                    unsigned int cmd, unsigned long param)
 {
        struct gendisk *disk = bdev->bd_disk;
@@ -1665,6 +1666,17 @@ static int fd_ioctl(struct block_device *bdev, fmode_t mode,
        }
 }
 
+static int fd_ioctl(struct block_device *bdev, fmode_t mode,
+                            unsigned int cmd, unsigned long arg)
+{
+       int ret;
+
+       lock_kernel();
+       ret = fd_locked_ioctl(bdev, mode, cmd, arg);
+       unlock_kernel();
+
+       return ret;
+}
 
 /* Initialize the 'unit' variable for drive 'drive' */
 
@@ -1838,24 +1850,36 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
        return 0;
 }
 
+static int floppy_unlocked_open(struct block_device *bdev, fmode_t mode)
+{
+       int ret;
+
+       lock_kernel();
+       ret = floppy_open(bdev, mode);
+       unlock_kernel();
+
+       return ret;
+}
 
 static int floppy_release(struct gendisk *disk, fmode_t mode)
 {
        struct atari_floppy_struct *p = disk->private_data;
+       lock_kernel();
        if (p->ref < 0)
                p->ref = 0;
        else if (!p->ref--) {
                printk(KERN_ERR "floppy_release with fd_ref == 0");
                p->ref = 0;
        }
+       unlock_kernel();
        return 0;
 }
 
 static const struct block_device_operations floppy_fops = {
        .owner          = THIS_MODULE,
-       .open           = floppy_open,
+       .open           = floppy_unlocked_open,
        .release        = floppy_release,
-       .locked_ioctl   = fd_ioctl,
+       .ioctl          = fd_ioctl,
        .media_changed  = check_floppy_change,
        .revalidate_disk= floppy_revalidate,
 };
index f1bf79d..1c7f637 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/blkdev.h>
 #include <linux/bio.h>
 #include <linux/highmem.h>
+#include <linux/smp_lock.h>
 #include <linux/radix-tree.h>
 #include <linux/buffer_head.h> /* invalidate_bh_lrus() */
 #include <linux/slab.h>
@@ -340,7 +341,7 @@ static int brd_make_request(struct request_queue *q, struct bio *bio)
                                                get_capacity(bdev->bd_disk))
                goto out;
 
-       if (unlikely(bio_rw_flagged(bio, BIO_RW_DISCARD))) {
+       if (unlikely(bio->bi_rw & REQ_DISCARD)) {
                err = 0;
                discard_from_brd(brd, sector, bio->bi_size);
                goto out;
@@ -401,6 +402,7 @@ static int brd_ioctl(struct block_device *bdev, fmode_t mode,
         * ram device BLKFLSBUF has special semantics, we want to actually
         * release and destroy the ramdisk data.
         */
+       lock_kernel();
        mutex_lock(&bdev->bd_mutex);
        error = -EBUSY;
        if (bdev->bd_openers <= 1) {
@@ -417,13 +419,14 @@ static int brd_ioctl(struct block_device *bdev, fmode_t mode,
                error = 0;
        }
        mutex_unlock(&bdev->bd_mutex);
+       unlock_kernel();
 
        return error;
 }
 
 static const struct block_device_operations brd_fops = {
        .owner =                THIS_MODULE,
-       .locked_ioctl =         brd_ioctl,
+       .ioctl =                brd_ioctl,
 #ifdef CONFIG_BLK_DEV_XIP
        .direct_access =        brd_direct_access,
 #endif
@@ -479,7 +482,7 @@ static struct brd_device *brd_alloc(int i)
        if (!brd->brd_queue)
                goto out_free_dev;
        blk_queue_make_request(brd->brd_queue, brd_make_request);
-       blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_TAG, NULL);
+       blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_TAG);
        blk_queue_max_hw_sectors(brd->brd_queue, 1024);
        blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
 
index e1e7143..31064df 100644 (file)
 #include <linux/kthread.h>
 
 #define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin))
-#define DRIVER_NAME "HP CISS Driver (v 3.6.20)"
-#define DRIVER_VERSION CCISS_DRIVER_VERSION(3, 6, 20)
+#define DRIVER_NAME "HP CISS Driver (v 3.6.26)"
+#define DRIVER_VERSION CCISS_DRIVER_VERSION(3, 6, 26)
 
 /* Embedded module documentation macros - see modules.h */
 MODULE_AUTHOR("Hewlett-Packard Company");
 MODULE_DESCRIPTION("Driver for HP Smart Array Controllers");
-MODULE_SUPPORTED_DEVICE("HP SA5i SA5i+ SA532 SA5300 SA5312 SA641 SA642 SA6400"
-                       " SA6i P600 P800 P400 P400i E200 E200i E500 P700m"
-                       " Smart Array G2 Series SAS/SATA Controllers");
-MODULE_VERSION("3.6.20");
+MODULE_SUPPORTED_DEVICE("HP Smart Array Controllers");
+MODULE_VERSION("3.6.26");
 MODULE_LICENSE("GPL");
 
 static int cciss_allow_hpsa;
@@ -107,6 +105,11 @@ static const struct pci_device_id cciss_pci_device_id[] = {
        {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3249},
        {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x324A},
        {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x324B},
+       {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3250},
+       {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3251},
+       {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3252},
+       {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3253},
+       {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3254},
        {0,}
 };
 
@@ -146,6 +149,11 @@ static struct board_type products[] = {
        {0x3249103C, "Smart Array P812", &SA5_access},
        {0x324A103C, "Smart Array P712m", &SA5_access},
        {0x324B103C, "Smart Array P711m", &SA5_access},
+       {0x3250103C, "Smart Array", &SA5_access},
+       {0x3251103C, "Smart Array", &SA5_access},
+       {0x3252103C, "Smart Array", &SA5_access},
+       {0x3253103C, "Smart Array", &SA5_access},
+       {0x3254103C, "Smart Array", &SA5_access},
 };
 
 /* How long to wait (in milliseconds) for board to go into simple mode */
@@ -167,9 +175,13 @@ static DEFINE_MUTEX(scan_mutex);
 static LIST_HEAD(scan_q);
 
 static void do_cciss_request(struct request_queue *q);
-static irqreturn_t do_cciss_intr(int irq, void *dev_id);
+static irqreturn_t do_cciss_intx(int irq, void *dev_id);
+static irqreturn_t do_cciss_msix_intr(int irq, void *dev_id);
 static int cciss_open(struct block_device *bdev, fmode_t mode);
+static int cciss_unlocked_open(struct block_device *bdev, fmode_t mode);
 static int cciss_release(struct gendisk *disk, fmode_t mode);
+static int do_ioctl(struct block_device *bdev, fmode_t mode,
+                   unsigned int cmd, unsigned long arg);
 static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
                       unsigned int cmd, unsigned long arg);
 static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo);
@@ -179,25 +191,23 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time, int via_ioctl);
 static int deregister_disk(ctlr_info_t *h, int drv_index,
                           int clear_all, int via_ioctl);
 
-static void cciss_read_capacity(int ctlr, int logvol,
+static void cciss_read_capacity(ctlr_info_t *h, int logvol,
                        sector_t *total_size, unsigned int *block_size);
-static void cciss_read_capacity_16(int ctlr, int logvol,
+static void cciss_read_capacity_16(ctlr_info_t *h, int logvol,
                        sector_t *total_size, unsigned int *block_size);
-static void cciss_geometry_inquiry(int ctlr, int logvol,
+static void cciss_geometry_inquiry(ctlr_info_t *h, int logvol,
                        sector_t total_size,
                        unsigned int block_size, InquiryData_struct *inq_buff,
                                   drive_info_struct *drv);
-static void __devinit cciss_interrupt_mode(ctlr_info_t *, struct pci_dev *,
-                                          __u32);
+static void __devinit cciss_interrupt_mode(ctlr_info_t *);
 static void start_io(ctlr_info_t *h);
-static int sendcmd_withirq(__u8 cmd, int ctlr, void *buff, size_t size,
+static int sendcmd_withirq(ctlr_info_t *h, __u8 cmd, void *buff, size_t size,
                        __u8 page_code, unsigned char scsi3addr[],
                        int cmd_type);
 static int sendcmd_withirq_core(ctlr_info_t *h, CommandList_struct *c,
        int attempt_retry);
 static int process_sendcmd_error(ctlr_info_t *h, CommandList_struct *c);
 
-static void fail_all_cmds(unsigned long ctlr);
 static int add_to_scan_list(struct ctlr_info *h);
 static int scan_thread(void *data);
 static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c);
@@ -205,11 +215,23 @@ static void cciss_hba_release(struct device *dev);
 static void cciss_device_release(struct device *dev);
 static void cciss_free_gendisk(ctlr_info_t *h, int drv_index);
 static void cciss_free_drive_info(ctlr_info_t *h, int drv_index);
+static inline u32 next_command(ctlr_info_t *h);
+static int __devinit cciss_find_cfg_addrs(struct pci_dev *pdev,
+       void __iomem *vaddr, u32 *cfg_base_addr, u64 *cfg_base_addr_index,
+       u64 *cfg_offset);
+static int __devinit cciss_pci_find_memory_BAR(struct pci_dev *pdev,
+       unsigned long *memory_bar);
+
+
+/* performant mode helper functions */
+static void  calc_bucket_map(int *bucket, int num_buckets, int nsgs,
+                               int *bucket_map);
+static void cciss_put_controller_into_performant_mode(ctlr_info_t *h);
 
 #ifdef CONFIG_PROC_FS
-static void cciss_procinit(int i);
+static void cciss_procinit(ctlr_info_t *h);
 #else
-static void cciss_procinit(int i)
+static void cciss_procinit(ctlr_info_t *h)
 {
 }
 #endif                         /* CONFIG_PROC_FS */
@@ -221,9 +243,9 @@ static int cciss_compat_ioctl(struct block_device *, fmode_t,
 
 static const struct block_device_operations cciss_fops = {
        .owner = THIS_MODULE,
-       .open = cciss_open,
+       .open = cciss_unlocked_open,
        .release = cciss_release,
-       .locked_ioctl = cciss_ioctl,
+       .ioctl = do_ioctl,
        .getgeo = cciss_getgeo,
 #ifdef CONFIG_COMPAT
        .compat_ioctl = cciss_compat_ioctl,
@@ -231,6 +253,16 @@ static const struct block_device_operations cciss_fops = {
        .revalidate_disk = cciss_revalidate,
 };
 
+/* set_performant_mode: Modify the tag for cciss performant
+ * set bit 0 for pull model, bits 3-1 for block fetch
+ * register number
+ */
+static void set_performant_mode(ctlr_info_t *h, CommandList_struct *c)
+{
+       if (likely(h->transMethod == CFGTBL_Trans_Performant))
+               c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1);
+}
+
 /*
  * Enqueuing and dequeuing functions for cmdlists.
  */
@@ -257,6 +289,18 @@ static inline void removeQ(CommandList_struct *c)
        hlist_del_init(&c->list);
 }
 
+static void enqueue_cmd_and_start_io(ctlr_info_t *h,
+       CommandList_struct *c)
+{
+       unsigned long flags;
+       set_performant_mode(h, c);
+       spin_lock_irqsave(&h->lock, flags);
+       addQ(&h->reqQ, c);
+       h->Qdepth++;
+       start_io(h);
+       spin_unlock_irqrestore(&h->lock, flags);
+}
+
 static void cciss_free_sg_chain_blocks(SGDescriptor_struct **cmd_sg_list,
        int nr_cmds)
 {
@@ -366,32 +410,31 @@ static void cciss_seq_show_header(struct seq_file *seq)
                h->product_name,
                (unsigned long)h->board_id,
                h->firm_ver[0], h->firm_ver[1], h->firm_ver[2],
-               h->firm_ver[3], (unsigned int)h->intr[SIMPLE_MODE_INT],
+               h->firm_ver[3], (unsigned int)h->intr[PERF_MODE_INT],
                h->num_luns,
                h->Qdepth, h->commands_outstanding,
                h->maxQsinceinit, h->max_outstanding, h->maxSG);
 
 #ifdef CONFIG_CISS_SCSI_TAPE
-       cciss_seq_tape_report(seq, h->ctlr);
+       cciss_seq_tape_report(seq, h);
 #endif /* CONFIG_CISS_SCSI_TAPE */
 }
 
 static void *cciss_seq_start(struct seq_file *seq, loff_t *pos)
 {
        ctlr_info_t *h = seq->private;
-       unsigned ctlr = h->ctlr;
        unsigned long flags;
 
        /* prevent displaying bogus info during configuration
         * or deconfiguration of a logical volume
         */
-       spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
+       spin_lock_irqsave(&h->lock, flags);
        if (h->busy_configuring) {
-               spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
+               spin_unlock_irqrestore(&h->lock, flags);
                return ERR_PTR(-EBUSY);
        }
        h->busy_configuring = 1;
-       spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
+       spin_unlock_irqrestore(&h->lock, flags);
 
        if (*pos == 0)
                cciss_seq_show_header(seq);
@@ -499,7 +542,7 @@ cciss_proc_write(struct file *file, const char __user *buf,
                struct seq_file *seq = file->private_data;
                ctlr_info_t *h = seq->private;
 
-               err = cciss_engage_scsi(h->ctlr);
+               err = cciss_engage_scsi(h);
                if (err == 0)
                        err = length;
        } else
@@ -522,7 +565,7 @@ static const struct file_operations cciss_proc_fops = {
        .write   = cciss_proc_write,
 };
 
-static void __devinit cciss_procinit(int i)
+static void __devinit cciss_procinit(ctlr_info_t *h)
 {
        struct proc_dir_entry *pde;
 
@@ -530,9 +573,9 @@ static void __devinit cciss_procinit(int i)
                proc_cciss = proc_mkdir("driver/cciss", NULL);
        if (!proc_cciss)
                return;
-       pde = proc_create_data(hba[i]->devname, S_IWUSR | S_IRUSR | S_IRGRP |
+       pde = proc_create_data(h->devname, S_IWUSR | S_IRUSR | S_IRGRP |
                                        S_IROTH, proc_cciss,
-                                       &cciss_proc_fops, hba[i]);
+                                       &cciss_proc_fops, h);
 }
 #endif                         /* CONFIG_PROC_FS */
 
@@ -565,12 +608,12 @@ static ssize_t dev_show_unique_id(struct device *dev,
        unsigned long flags;
        int ret = 0;
 
-       spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+       spin_lock_irqsave(&h->lock, flags);
        if (h->busy_configuring)
                ret = -EBUSY;
        else
                memcpy(sn, drv->serial_no, sizeof(sn));
-       spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+       spin_unlock_irqrestore(&h->lock, flags);
 
        if (ret)
                return ret;
@@ -595,12 +638,12 @@ static ssize_t dev_show_vendor(struct device *dev,
        unsigned long flags;
        int ret = 0;
 
-       spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+       spin_lock_irqsave(&h->lock, flags);
        if (h->busy_configuring)
                ret = -EBUSY;
        else
                memcpy(vendor, drv->vendor, VENDOR_LEN + 1);
-       spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+       spin_unlock_irqrestore(&h->lock, flags);
 
        if (ret)
                return ret;
@@ -619,12 +662,12 @@ static ssize_t dev_show_model(struct device *dev,
        unsigned long flags;
        int ret = 0;
 
-       spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+       spin_lock_irqsave(&h->lock, flags);
        if (h->busy_configuring)
                ret = -EBUSY;
        else
                memcpy(model, drv->model, MODEL_LEN + 1);
-       spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+       spin_unlock_irqrestore(&h->lock, flags);
 
        if (ret)
                return ret;
@@ -643,12 +686,12 @@ static ssize_t dev_show_rev(struct device *dev,
        unsigned long flags;
        int ret = 0;
 
-       spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+       spin_lock_irqsave(&h->lock, flags);
        if (h->busy_configuring)
                ret = -EBUSY;
        else
                memcpy(rev, drv->rev, REV_LEN + 1);
-       spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+       spin_unlock_irqrestore(&h->lock, flags);
 
        if (ret)
                return ret;
@@ -665,17 +708,17 @@ static ssize_t cciss_show_lunid(struct device *dev,
        unsigned long flags;
        unsigned char lunid[8];
 
-       spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+       spin_lock_irqsave(&h->lock, flags);
        if (h->busy_configuring) {
-               spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+               spin_unlock_irqrestore(&h->lock, flags);
                return -EBUSY;
        }
        if (!drv->heads) {
-               spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+               spin_unlock_irqrestore(&h->lock, flags);
                return -ENOTTY;
        }
        memcpy(lunid, drv->LunID, sizeof(lunid));
-       spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+       spin_unlock_irqrestore(&h->lock, flags);
        return snprintf(buf, 20, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
                lunid[0], lunid[1], lunid[2], lunid[3],
                lunid[4], lunid[5], lunid[6], lunid[7]);
@@ -690,13 +733,13 @@ static ssize_t cciss_show_raid_level(struct device *dev,
        int raid;
        unsigned long flags;
 
-       spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+       spin_lock_irqsave(&h->lock, flags);
        if (h->busy_configuring) {
-               spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+               spin_unlock_irqrestore(&h->lock, flags);
                return -EBUSY;
        }
        raid = drv->raid_level;
-       spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+       spin_unlock_irqrestore(&h->lock, flags);
        if (raid < 0 || raid > RAID_UNKNOWN)
                raid = RAID_UNKNOWN;
 
@@ -713,13 +756,13 @@ static ssize_t cciss_show_usage_count(struct device *dev,
        unsigned long flags;
        int count;
 
-       spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+       spin_lock_irqsave(&h->lock, flags);
        if (h->busy_configuring) {
-               spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+               spin_unlock_irqrestore(&h->lock, flags);
                return -EBUSY;
        }
        count = drv->usage_count;
-       spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+       spin_unlock_irqrestore(&h->lock, flags);
        return snprintf(buf, 20, "%d\n", count);
 }
 static DEVICE_ATTR(usage_count, S_IRUGO, cciss_show_usage_count, NULL);
@@ -864,60 +907,70 @@ static void cciss_destroy_ld_sysfs_entry(struct ctlr_info *h, int drv_index,
 /*
  * For operations that cannot sleep, a command block is allocated at init,
  * and managed by cmd_alloc() and cmd_free() using a simple bitmap to track
- * which ones are free or in use.  For operations that can wait for kmalloc
- * to possible sleep, this routine can be called with get_from_pool set to 0.
- * cmd_free() MUST be called with a got_from_pool set to 0 if cmd_alloc was.
+ * which ones are free or in use.
  */
-static CommandList_struct *cmd_alloc(ctlr_info_t *h, int get_from_pool)
+static CommandList_struct *cmd_alloc(ctlr_info_t *h)
 {
        CommandList_struct *c;
        int i;
        u64bit temp64;
        dma_addr_t cmd_dma_handle, err_dma_handle;
 
-       if (!get_from_pool) {
-               c = (CommandList_struct *) pci_alloc_consistent(h->pdev,
-                       sizeof(CommandList_struct), &cmd_dma_handle);
-               if (c == NULL)
+       do {
+               i = find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds);
+               if (i == h->nr_cmds)
                        return NULL;
-               memset(c, 0, sizeof(CommandList_struct));
+       } while (test_and_set_bit(i & (BITS_PER_LONG - 1),
+                 h->cmd_pool_bits + (i / BITS_PER_LONG)) != 0);
+       c = h->cmd_pool + i;
+       memset(c, 0, sizeof(CommandList_struct));
+       cmd_dma_handle = h->cmd_pool_dhandle + i * sizeof(CommandList_struct);
+       c->err_info = h->errinfo_pool + i;
+       memset(c->err_info, 0, sizeof(ErrorInfo_struct));
+       err_dma_handle = h->errinfo_pool_dhandle
+           + i * sizeof(ErrorInfo_struct);
+       h->nr_allocs++;
 
-               c->cmdindex = -1;
+       c->cmdindex = i;
 
-               c->err_info = (ErrorInfo_struct *)
-                   pci_alloc_consistent(h->pdev, sizeof(ErrorInfo_struct),
-                           &err_dma_handle);
+       INIT_HLIST_NODE(&c->list);
+       c->busaddr = (__u32) cmd_dma_handle;
+       temp64.val = (__u64) err_dma_handle;
+       c->ErrDesc.Addr.lower = temp64.val32.lower;
+       c->ErrDesc.Addr.upper = temp64.val32.upper;
+       c->ErrDesc.Len = sizeof(ErrorInfo_struct);
 
-               if (c->err_info == NULL) {
-                       pci_free_consistent(h->pdev,
-                               sizeof(CommandList_struct), c, cmd_dma_handle);
-                       return NULL;
-               }
-               memset(c->err_info, 0, sizeof(ErrorInfo_struct));
-       } else {                /* get it out of the controllers pool */
-
-               do {
-                       i = find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds);
-                       if (i == h->nr_cmds)
-                               return NULL;
-               } while (test_and_set_bit
-                        (i & (BITS_PER_LONG - 1),
-                         h->cmd_pool_bits + (i / BITS_PER_LONG)) != 0);
-#ifdef CCISS_DEBUG
-               printk(KERN_DEBUG "cciss: using command buffer %d\n", i);
-#endif
-               c = h->cmd_pool + i;
-               memset(c, 0, sizeof(CommandList_struct));
-               cmd_dma_handle = h->cmd_pool_dhandle
-                   + i * sizeof(CommandList_struct);
-               c->err_info = h->errinfo_pool + i;
-               memset(c->err_info, 0, sizeof(ErrorInfo_struct));
-               err_dma_handle = h->errinfo_pool_dhandle
-                   + i * sizeof(ErrorInfo_struct);
-               h->nr_allocs++;
+       c->ctlr = h->ctlr;
+       return c;
+}
 
-               c->cmdindex = i;
+/* allocate a command using pci_alloc_consistent, used for ioctls,
+ * etc., not for the main i/o path.
+ */
+static CommandList_struct *cmd_special_alloc(ctlr_info_t *h)
+{
+       CommandList_struct *c;
+       u64bit temp64;
+       dma_addr_t cmd_dma_handle, err_dma_handle;
+
+       c = (CommandList_struct *) pci_alloc_consistent(h->pdev,
+               sizeof(CommandList_struct), &cmd_dma_handle);
+       if (c == NULL)
+               return NULL;
+       memset(c, 0, sizeof(CommandList_struct));
+
+       c->cmdindex = -1;
+
+       c->err_info = (ErrorInfo_struct *)
+           pci_alloc_consistent(h->pdev, sizeof(ErrorInfo_struct),
+                   &err_dma_handle);
+
+       if (c->err_info == NULL) {
+               pci_free_consistent(h->pdev,
+                       sizeof(CommandList_struct), c, cmd_dma_handle);
+               return NULL;
        }
+       memset(c->err_info, 0, sizeof(ErrorInfo_struct));
 
        INIT_HLIST_NODE(&c->list);
        c->busaddr = (__u32) cmd_dma_handle;
@@ -930,27 +983,26 @@ static CommandList_struct *cmd_alloc(ctlr_info_t *h, int get_from_pool)
        return c;
 }
 
-/*
- * Frees a command block that was previously allocated with cmd_alloc().
- */
-static void cmd_free(ctlr_info_t *h, CommandList_struct *c, int got_from_pool)
+static void cmd_free(ctlr_info_t *h, CommandList_struct *c)
 {
        int i;
+
+       i = c - h->cmd_pool;
+       clear_bit(i & (BITS_PER_LONG - 1),
+                 h->cmd_pool_bits + (i / BITS_PER_LONG));
+       h->nr_frees++;
+}
+
+static void cmd_special_free(ctlr_info_t *h, CommandList_struct *c)
+{
        u64bit temp64;
 
-       if (!got_from_pool) {
-               temp64.val32.lower = c->ErrDesc.Addr.lower;
-               temp64.val32.upper = c->ErrDesc.Addr.upper;
-               pci_free_consistent(h->pdev, sizeof(ErrorInfo_struct),
-                                   c->err_info, (dma_addr_t) temp64.val);
-               pci_free_consistent(h->pdev, sizeof(CommandList_struct),
-                                   c, (dma_addr_t) c->busaddr);
-       } else {
-               i = c - h->cmd_pool;
-               clear_bit(i & (BITS_PER_LONG - 1),
-                         h->cmd_pool_bits + (i / BITS_PER_LONG));
-               h->nr_frees++;
-       }
+       temp64.val32.lower = c->ErrDesc.Addr.lower;
+       temp64.val32.upper = c->ErrDesc.Addr.upper;
+       pci_free_consistent(h->pdev, sizeof(ErrorInfo_struct),
+                           c->err_info, (dma_addr_t) temp64.val);
+       pci_free_consistent(h->pdev, sizeof(CommandList_struct),
+                           c, (dma_addr_t) c->busaddr);
 }
 
 static inline ctlr_info_t *get_host(struct gendisk *disk)
@@ -968,13 +1020,10 @@ static inline drive_info_struct *get_drv(struct gendisk *disk)
  */
 static int cciss_open(struct block_device *bdev, fmode_t mode)
 {
-       ctlr_info_t *host = get_host(bdev->bd_disk);
+       ctlr_info_t *h = get_host(bdev->bd_disk);
        drive_info_struct *drv = get_drv(bdev->bd_disk);
 
-#ifdef CCISS_DEBUG
-       printk(KERN_DEBUG "cciss_open %s\n", bdev->bd_disk->disk_name);
-#endif                         /* CCISS_DEBUG */
-
+       dev_dbg(&h->pdev->dev, "cciss_open %s\n", bdev->bd_disk->disk_name);
        if (drv->busy_configuring)
                return -EBUSY;
        /*
@@ -1000,29 +1049,39 @@ static int cciss_open(struct block_device *bdev, fmode_t mode)
                        return -EPERM;
        }
        drv->usage_count++;
-       host->usage_count++;
+       h->usage_count++;
        return 0;
 }
 
+static int cciss_unlocked_open(struct block_device *bdev, fmode_t mode)
+{
+       int ret;
+
+       lock_kernel();
+       ret = cciss_open(bdev, mode);
+       unlock_kernel();
+
+       return ret;
+}
+
 /*
  * Close.  Sync first.
  */
 static int cciss_release(struct gendisk *disk, fmode_t mode)
 {
-       ctlr_info_t *host = get_host(disk);
-       drive_info_struct *drv = get_drv(disk);
-
-#ifdef CCISS_DEBUG
-       printk(KERN_DEBUG "cciss_release %s\n", disk->disk_name);
-#endif                         /* CCISS_DEBUG */
+       ctlr_info_t *h;
+       drive_info_struct *drv;
 
+       lock_kernel();
+       h = get_host(disk);
+       drv = get_drv(disk);
+       dev_dbg(&h->pdev->dev, "cciss_release %s\n", disk->disk_name);
        drv->usage_count--;
-       host->usage_count--;
+       h->usage_count--;
+       unlock_kernel();
        return 0;
 }
 
-#ifdef CONFIG_COMPAT
-
 static int do_ioctl(struct block_device *bdev, fmode_t mode,
                    unsigned cmd, unsigned long arg)
 {
@@ -1033,6 +1092,8 @@ static int do_ioctl(struct block_device *bdev, fmode_t mode,
        return ret;
 }
 
+#ifdef CONFIG_COMPAT
+
 static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode,
                                  unsigned cmd, unsigned long arg);
 static int cciss_ioctl32_big_passthru(struct block_device *bdev, fmode_t mode,
@@ -1163,11 +1224,11 @@ static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo)
        return 0;
 }
 
-static void check_ioctl_unit_attention(ctlr_info_t *host, CommandList_struct *c)
+static void check_ioctl_unit_attention(ctlr_info_t *h, CommandList_struct *c)
 {
        if (c->err_info->CommandStatus == CMD_TARGET_STATUS &&
                        c->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION)
-               (void)check_for_unit_attention(host, c);
+               (void)check_for_unit_attention(h, c);
 }
 /*
  * ioctl
@@ -1176,15 +1237,12 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
                       unsigned int cmd, unsigned long arg)
 {
        struct gendisk *disk = bdev->bd_disk;
-       ctlr_info_t *host = get_host(disk);
+       ctlr_info_t *h = get_host(disk);
        drive_info_struct *drv = get_drv(disk);
-       int ctlr = host->ctlr;
        void __user *argp = (void __user *)arg;
 
-#ifdef CCISS_DEBUG
-       printk(KERN_DEBUG "cciss_ioctl: Called with cmd=%x %lx\n", cmd, arg);
-#endif                         /* CCISS_DEBUG */
-
+       dev_dbg(&h->pdev->dev, "cciss_ioctl: Called with cmd=%x %lx\n",
+               cmd, arg);
        switch (cmd) {
        case CCISS_GETPCIINFO:
                {
@@ -1192,10 +1250,10 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
 
                        if (!arg)
                                return -EINVAL;
-                       pciinfo.domain = pci_domain_nr(host->pdev->bus);
-                       pciinfo.bus = host->pdev->bus->number;
-                       pciinfo.dev_fn = host->pdev->devfn;
-                       pciinfo.board_id = host->board_id;
+                       pciinfo.domain = pci_domain_nr(h->pdev->bus);
+                       pciinfo.bus = h->pdev->bus->number;
+                       pciinfo.dev_fn = h->pdev->devfn;
+                       pciinfo.board_id = h->board_id;
                        if (copy_to_user
                            (argp, &pciinfo, sizeof(cciss_pci_info_struct)))
                                return -EFAULT;
@@ -1207,9 +1265,9 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
                        if (!arg)
                                return -EINVAL;
                        intinfo.delay =
-                           readl(&host->cfgtable->HostWrite.CoalIntDelay);
+                           readl(&h->cfgtable->HostWrite.CoalIntDelay);
                        intinfo.count =
-                           readl(&host->cfgtable->HostWrite.CoalIntCount);
+                           readl(&h->cfgtable->HostWrite.CoalIntCount);
                        if (copy_to_user
                            (argp, &intinfo, sizeof(cciss_coalint_struct)))
                                return -EFAULT;
@@ -1229,26 +1287,23 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
                            (&intinfo, argp, sizeof(cciss_coalint_struct)))
                                return -EFAULT;
                        if ((intinfo.delay == 0) && (intinfo.count == 0))
-                       {
-//                      printk("cciss_ioctl: delay and count cannot be 0\n");
                                return -EINVAL;
-                       }
-                       spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
+                       spin_lock_irqsave(&h->lock, flags);
                        /* Update the field, and then ring the doorbell */
                        writel(intinfo.delay,
-                              &(host->cfgtable->HostWrite.CoalIntDelay));
+                              &(h->cfgtable->HostWrite.CoalIntDelay));
                        writel(intinfo.count,
-                              &(host->cfgtable->HostWrite.CoalIntCount));
-                       writel(CFGTBL_ChangeReq, host->vaddr + SA5_DOORBELL);
+                              &(h->cfgtable->HostWrite.CoalIntCount));
+                       writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
 
                        for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
-                               if (!(readl(host->vaddr + SA5_DOORBELL)
+                               if (!(readl(h->vaddr + SA5_DOORBELL)
                                      & CFGTBL_ChangeReq))
                                        break;
                                /* delay and try again */
                                udelay(1000);
                        }
-                       spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
+                       spin_unlock_irqrestore(&h->lock, flags);
                        if (i >= MAX_IOCTL_CONFIG_WAIT)
                                return -EAGAIN;
                        return 0;
@@ -1262,7 +1317,7 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
                                return -EINVAL;
                        for (i = 0; i < 16; i++)
                                NodeName[i] =
-                                   readb(&host->cfgtable->ServerName[i]);
+                                   readb(&h->cfgtable->ServerName[i]);
                        if (copy_to_user(argp, NodeName, sizeof(NodeName_type)))
                                return -EFAULT;
                        return 0;
@@ -1282,23 +1337,23 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
                            (NodeName, argp, sizeof(NodeName_type)))
                                return -EFAULT;
 
-                       spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
+                       spin_lock_irqsave(&h->lock, flags);
 
                        /* Update the field, and then ring the doorbell */
                        for (i = 0; i < 16; i++)
                                writeb(NodeName[i],
-                                      &host->cfgtable->ServerName[i]);
+                                      &h->cfgtable->ServerName[i]);
 
-                       writel(CFGTBL_ChangeReq, host->vaddr + SA5_DOORBELL);
+                       writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
 
                        for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
-                               if (!(readl(host->vaddr + SA5_DOORBELL)
+                               if (!(readl(h->vaddr + SA5_DOORBELL)
                                      & CFGTBL_ChangeReq))
                                        break;
                                /* delay and try again */
                                udelay(1000);
                        }
-                       spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
+                       spin_unlock_irqrestore(&h->lock, flags);
                        if (i >= MAX_IOCTL_CONFIG_WAIT)
                                return -EAGAIN;
                        return 0;
@@ -1310,7 +1365,7 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
 
                        if (!arg)
                                return -EINVAL;
-                       heartbeat = readl(&host->cfgtable->HeartBeat);
+                       heartbeat = readl(&h->cfgtable->HeartBeat);
                        if (copy_to_user
                            (argp, &heartbeat, sizeof(Heartbeat_type)))
                                return -EFAULT;
@@ -1322,7 +1377,7 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
 
                        if (!arg)
                                return -EINVAL;
-                       BusTypes = readl(&host->cfgtable->BusTypes);
+                       BusTypes = readl(&h->cfgtable->BusTypes);
                        if (copy_to_user
                            (argp, &BusTypes, sizeof(BusTypes_type)))
                                return -EFAULT;
@@ -1334,7 +1389,7 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
 
                        if (!arg)
                                return -EINVAL;
-                       memcpy(firmware, host->firm_ver, 4);
+                       memcpy(firmware, h->firm_ver, 4);
 
                        if (copy_to_user
                            (argp, firmware, sizeof(FirmwareVer_type)))
@@ -1357,7 +1412,7 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
        case CCISS_DEREGDISK:
        case CCISS_REGNEWD:
        case CCISS_REVALIDVOLS:
-               return rebuild_lun_table(host, 0, 1);
+               return rebuild_lun_table(h, 0, 1);
 
        case CCISS_GETLUNINFO:{
                        LogvolInfo_struct luninfo;
@@ -1377,7 +1432,6 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
                        CommandList_struct *c;
                        char *buff = NULL;
                        u64bit temp64;
-                       unsigned long flags;
                        DECLARE_COMPLETION_ONSTACK(wait);
 
                        if (!arg)
@@ -1413,7 +1467,8 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
                        } else {
                                memset(buff, 0, iocommand.buf_size);
                        }
-                       if ((c = cmd_alloc(host, 0)) == NULL) {
+                       c = cmd_special_alloc(h);
+                       if (!c) {
                                kfree(buff);
                                return -ENOMEM;
                        }
@@ -1439,7 +1494,7 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
 
                        /* Fill in the scatter gather information */
                        if (iocommand.buf_size > 0) {
-                               temp64.val = pci_map_single(host->pdev, buff,
+                               temp64.val = pci_map_single(h->pdev, buff,
                                        iocommand.buf_size,
                                        PCI_DMA_BIDIRECTIONAL);
                                c->SG[0].Addr.lower = temp64.val32.lower;
@@ -1449,30 +1504,24 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
                        }
                        c->waiting = &wait;
 
-                       /* Put the request on the tail of the request queue */
-                       spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
-                       addQ(&host->reqQ, c);
-                       host->Qdepth++;
-                       start_io(host);
-                       spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
-
+                       enqueue_cmd_and_start_io(h, c);
                        wait_for_completion(&wait);
 
                        /* unlock the buffers from DMA */
                        temp64.val32.lower = c->SG[0].Addr.lower;
                        temp64.val32.upper = c->SG[0].Addr.upper;
-                       pci_unmap_single(host->pdev, (dma_addr_t) temp64.val,
+                       pci_unmap_single(h->pdev, (dma_addr_t) temp64.val,
                                         iocommand.buf_size,
                                         PCI_DMA_BIDIRECTIONAL);
 
-                       check_ioctl_unit_attention(host, c);
+                       check_ioctl_unit_attention(h, c);
 
                        /* Copy the error information out */
                        iocommand.error_info = *(c->err_info);
                        if (copy_to_user
                            (argp, &iocommand, sizeof(IOCTL_Command_struct))) {
                                kfree(buff);
-                               cmd_free(host, c, 0);
+                               cmd_special_free(h, c);
                                return -EFAULT;
                        }
 
@@ -1481,12 +1530,12 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
                                if (copy_to_user
                                    (iocommand.buf, buff, iocommand.buf_size)) {
                                        kfree(buff);
-                                       cmd_free(host, c, 0);
+                                       cmd_special_free(h, c);
                                        return -EFAULT;
                                }
                        }
                        kfree(buff);
-                       cmd_free(host, c, 0);
+                       cmd_special_free(h, c);
                        return 0;
                }
        case CCISS_BIG_PASSTHRU:{
@@ -1495,7 +1544,6 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
                        unsigned char **buff = NULL;
                        int *buff_size = NULL;
                        u64bit temp64;
-                       unsigned long flags;
                        BYTE sg_used = 0;
                        int status = 0;
                        int i;
@@ -1569,7 +1617,8 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
                                data_ptr += sz;
                                sg_used++;
                        }
-                       if ((c = cmd_alloc(host, 0)) == NULL) {
+                       c = cmd_special_alloc(h);
+                       if (!c) {
                                status = -ENOMEM;
                                goto cleanup1;
                        }
@@ -1590,7 +1639,7 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
                        if (ioc->buf_size > 0) {
                                for (i = 0; i < sg_used; i++) {
                                        temp64.val =
-                                           pci_map_single(host->pdev, buff[i],
+                                           pci_map_single(h->pdev, buff[i],
                                                    buff_size[i],
                                                    PCI_DMA_BIDIRECTIONAL);
                                        c->SG[i].Addr.lower =
@@ -1602,26 +1651,21 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
                                }
                        }
                        c->waiting = &wait;
-                       /* Put the request on the tail of the request queue */
-                       spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
-                       addQ(&host->reqQ, c);
-                       host->Qdepth++;
-                       start_io(host);
-                       spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
+                       enqueue_cmd_and_start_io(h, c);
                        wait_for_completion(&wait);
                        /* unlock the buffers from DMA */
                        for (i = 0; i < sg_used; i++) {
                                temp64.val32.lower = c->SG[i].Addr.lower;
                                temp64.val32.upper = c->SG[i].Addr.upper;
-                               pci_unmap_single(host->pdev,
+                               pci_unmap_single(h->pdev,
                                        (dma_addr_t) temp64.val, buff_size[i],
                                        PCI_DMA_BIDIRECTIONAL);
                        }
-                       check_ioctl_unit_attention(host, c);
+                       check_ioctl_unit_attention(h, c);
                        /* Copy the error information out */
                        ioc->error_info = *(c->err_info);
                        if (copy_to_user(argp, ioc, sizeof(*ioc))) {
-                               cmd_free(host, c, 0);
+                               cmd_special_free(h, c);
                                status = -EFAULT;
                                goto cleanup1;
                        }
@@ -1631,14 +1675,14 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
                                for (i = 0; i < sg_used; i++) {
                                        if (copy_to_user
                                            (ptr, buff[i], buff_size[i])) {
-                                               cmd_free(host, c, 0);
+                                               cmd_special_free(h, c);
                                                status = -EFAULT;
                                                goto cleanup1;
                                        }
                                        ptr += buff_size[i];
                                }
                        }
-                       cmd_free(host, c, 0);
+                       cmd_special_free(h, c);
                        status = 0;
                      cleanup1:
                        if (buff) {
@@ -1726,26 +1770,26 @@ static void cciss_check_queues(ctlr_info_t *h)
 
 static void cciss_softirq_done(struct request *rq)
 {
-       CommandList_struct *cmd = rq->completion_data;
-       ctlr_info_t *h = hba[cmd->ctlr];
-       SGDescriptor_struct *curr_sg = cmd->SG;
-       unsigned long flags;
+       CommandList_struct *c = rq->completion_data;
+       ctlr_info_t *h = hba[c->ctlr];
+       SGDescriptor_struct *curr_sg = c->SG;
        u64bit temp64;
+       unsigned long flags;
        int i, ddir;
        int sg_index = 0;
 
-       if (cmd->Request.Type.Direction == XFER_READ)
+       if (c->Request.Type.Direction == XFER_READ)
                ddir = PCI_DMA_FROMDEVICE;
        else
                ddir = PCI_DMA_TODEVICE;
 
        /* command did not need to be retried */
        /* unmap the DMA mapping for all the scatter gather elements */
-       for (i = 0; i < cmd->Header.SGList; i++) {
+       for (i = 0; i < c->Header.SGList; i++) {
                if (curr_sg[sg_index].Ext == CCISS_SG_CHAIN) {
-                       cciss_unmap_sg_chain_block(h, cmd);
+                       cciss_unmap_sg_chain_block(h, c);
                        /* Point to the next block */
-                       curr_sg = h->cmd_sg_list[cmd->cmdindex];
+                       curr_sg = h->cmd_sg_list[c->cmdindex];
                        sg_index = 0;
                }
                temp64.val32.lower = curr_sg[sg_index].Addr.lower;
@@ -1755,18 +1799,16 @@ static void cciss_softirq_done(struct request *rq)
                ++sg_index;
        }
 
-#ifdef CCISS_DEBUG
-       printk("Done with %p\n", rq);
-#endif                         /* CCISS_DEBUG */
+       dev_dbg(&h->pdev->dev, "Done with %p\n", rq);
 
        /* set the residual count for pc requests */
-       if (blk_pc_request(rq))
-               rq->resid_len = cmd->err_info->ResidualCnt;
+       if (rq->cmd_type == REQ_TYPE_BLOCK_PC)
+               rq->resid_len = c->err_info->ResidualCnt;
 
        blk_end_request_all(rq, (rq->errors == 0) ? 0 : -EIO);
 
        spin_lock_irqsave(&h->lock, flags);
-       cmd_free(h, cmd, 1);
+       cmd_free(h, c);
        cciss_check_queues(h);
        spin_unlock_irqrestore(&h->lock, flags);
 }
@@ -1782,7 +1824,7 @@ static inline void log_unit_to_scsi3addr(ctlr_info_t *h,
  * via the inquiry page 0.  Model, vendor, and rev are set to empty strings if
  * they cannot be read.
  */
-static void cciss_get_device_descr(int ctlr, int logvol,
+static void cciss_get_device_descr(ctlr_info_t *h, int logvol,
                                   char *vendor, char *model, char *rev)
 {
        int rc;
@@ -1797,8 +1839,8 @@ static void cciss_get_device_descr(int ctlr, int logvol,
        if (!inq_buf)
                return;
 
-       log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
-       rc = sendcmd_withirq(CISS_INQUIRY, ctlr, inq_buf, sizeof(*inq_buf), 0,
+       log_unit_to_scsi3addr(h, scsi3addr, logvol);
+       rc = sendcmd_withirq(h, CISS_INQUIRY, inq_buf, sizeof(*inq_buf), 0,
                        scsi3addr, TYPE_CMD);
        if (rc == IO_OK) {
                memcpy(vendor, &inq_buf->data_byte[8], VENDOR_LEN);
@@ -1818,7 +1860,7 @@ static void cciss_get_device_descr(int ctlr, int logvol,
  * number cannot be had, for whatever reason, 16 bytes of 0xff
  * are returned instead.
  */
-static void cciss_get_serial_no(int ctlr, int logvol,
+static void cciss_get_serial_no(ctlr_info_t *h, int logvol,
                                unsigned char *serial_no, int buflen)
 {
 #define PAGE_83_INQ_BYTES 64
@@ -1833,8 +1875,8 @@ static void cciss_get_serial_no(int ctlr, int logvol,
        if (!buf)
                return;
        memset(serial_no, 0, buflen);
-       log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
-       rc = sendcmd_withirq(CISS_INQUIRY, ctlr, buf,
+       log_unit_to_scsi3addr(h, scsi3addr, logvol);
+       rc = sendcmd_withirq(h, CISS_INQUIRY, buf,
                PAGE_83_INQ_BYTES, 0x83, scsi3addr, TYPE_CMD);
        if (rc == IO_OK)
                memcpy(serial_no, &buf[8], buflen);
@@ -1900,10 +1942,9 @@ init_queue_failure:
  * is also the controller node.  Any changes to disk 0 will show up on
  * the next reboot.
  */
-static void cciss_update_drive_info(int ctlr, int drv_index, int first_time,
-       int via_ioctl)
+static void cciss_update_drive_info(ctlr_info_t *h, int drv_index,
+       int first_time, int via_ioctl)
 {
-       ctlr_info_t *h = hba[ctlr];
        struct gendisk *disk;
        InquiryData_struct *inq_buff = NULL;
        unsigned int block_size;
@@ -1920,16 +1961,16 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time,
 
        /* testing to see if 16-byte CDBs are already being used */
        if (h->cciss_read == CCISS_READ_16) {
-               cciss_read_capacity_16(h->ctlr, drv_index,
+               cciss_read_capacity_16(h, drv_index,
                        &total_size, &block_size);
 
        } else {
-               cciss_read_capacity(ctlr, drv_index, &total_size, &block_size);
+               cciss_read_capacity(h, drv_index, &total_size, &block_size);
                /* if read_capacity returns all F's this volume is >2TB */
                /* in size so we switch to 16-byte CDB's for all */
                /* read/write ops */
                if (total_size == 0xFFFFFFFFULL) {
-                       cciss_read_capacity_16(ctlr, drv_index,
+                       cciss_read_capacity_16(h, drv_index,
                        &total_size, &block_size);
                        h->cciss_read = CCISS_READ_16;
                        h->cciss_write = CCISS_WRITE_16;
@@ -1939,14 +1980,14 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time,
                }
        }
 
-       cciss_geometry_inquiry(ctlr, drv_index, total_size, block_size,
+       cciss_geometry_inquiry(h, drv_index, total_size, block_size,
                               inq_buff, drvinfo);
        drvinfo->block_size = block_size;
        drvinfo->nr_blocks = total_size + 1;
 
-       cciss_get_device_descr(ctlr, drv_index, drvinfo->vendor,
+       cciss_get_device_descr(h, drv_index, drvinfo->vendor,
                                drvinfo->model, drvinfo->rev);
-       cciss_get_serial_no(ctlr, drv_index, drvinfo->serial_no,
+       cciss_get_serial_no(h, drv_index, drvinfo->serial_no,
                        sizeof(drvinfo->serial_no));
        /* Save the lunid in case we deregister the disk, below. */
        memcpy(drvinfo->LunID, h->drv[drv_index]->LunID,
@@ -1971,10 +2012,10 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time,
         * (unless it's the first disk (for the controller node).
         */
        if (h->drv[drv_index]->raid_level != -1 && drv_index != 0) {
-               printk(KERN_WARNING "disk %d has changed.\n", drv_index);
-               spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+               dev_warn(&h->pdev->dev, "disk %d has changed.\n", drv_index);
+               spin_lock_irqsave(&h->lock, flags);
                h->drv[drv_index]->busy_configuring = 1;
-               spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+               spin_unlock_irqrestore(&h->lock, flags);
 
                /* deregister_disk sets h->drv[drv_index]->queue = NULL
                 * which keeps the interrupt handler from starting
@@ -2024,8 +2065,8 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time,
                if (cciss_add_disk(h, disk, drv_index) != 0) {
                        cciss_free_gendisk(h, drv_index);
                        cciss_free_drive_info(h, drv_index);
-                       printk(KERN_WARNING "cciss:%d could not update "
-                               "disk %d\n", h->ctlr, drv_index);
+                       dev_warn(&h->pdev->dev, "could not update disk %d\n",
+                               drv_index);
                        --h->num_luns;
                }
        }
@@ -2035,7 +2076,7 @@ freeret:
        kfree(drvinfo);
        return;
 mem_msg:
-       printk(KERN_ERR "cciss: out of memory\n");
+       dev_err(&h->pdev->dev, "out of memory\n");
        goto freeret;
 }
 
@@ -2127,9 +2168,9 @@ static int cciss_add_gendisk(ctlr_info_t *h, unsigned char lunid[],
                h->gendisk[drv_index] =
                        alloc_disk(1 << NWD_SHIFT);
                if (!h->gendisk[drv_index]) {
-                       printk(KERN_ERR "cciss%d: could not "
-                               "allocate a new disk %d\n",
-                               h->ctlr, drv_index);
+                       dev_err(&h->pdev->dev,
+                               "could not allocate a new disk %d\n",
+                               drv_index);
                        goto err_free_drive_info;
                }
        }
@@ -2180,8 +2221,7 @@ static void cciss_add_controller_node(ctlr_info_t *h)
        cciss_free_gendisk(h, drv_index);
        cciss_free_drive_info(h, drv_index);
 error:
-       printk(KERN_WARNING "cciss%d: could not "
-               "add disk 0.\n", h->ctlr);
+       dev_warn(&h->pdev->dev, "could not add disk 0.\n");
        return;
 }
 
@@ -2196,7 +2236,6 @@ error:
 static int rebuild_lun_table(ctlr_info_t *h, int first_time,
        int via_ioctl)
 {
-       int ctlr = h->ctlr;
        int num_luns;
        ReportLunData_struct *ld_buff = NULL;
        int return_code;
@@ -2211,27 +2250,27 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time,
                return -EPERM;
 
        /* Set busy_configuring flag for this operation */
-       spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+       spin_lock_irqsave(&h->lock, flags);
        if (h->busy_configuring) {
-               spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+               spin_unlock_irqrestore(&h->lock, flags);
                return -EBUSY;
        }
        h->busy_configuring = 1;
-       spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+       spin_unlock_irqrestore(&h->lock, flags);
 
        ld_buff = kzalloc(sizeof(ReportLunData_struct), GFP_KERNEL);
        if (ld_buff == NULL)
                goto mem_msg;
 
-       return_code = sendcmd_withirq(CISS_REPORT_LOG, ctlr, ld_buff,
+       return_code = sendcmd_withirq(h, CISS_REPORT_LOG, ld_buff,
                                      sizeof(ReportLunData_struct),
                                      0, CTLR_LUNID, TYPE_CMD);
 
        if (return_code == IO_OK)
                listlength = be32_to_cpu(*(__be32 *) ld_buff->LUNListLength);
        else {  /* reading number of logical volumes failed */
-               printk(KERN_WARNING "cciss: report logical volume"
-                      " command failed\n");
+               dev_warn(&h->pdev->dev,
+                       "report logical volume command failed\n");
                listlength = 0;
                goto freeret;
        }
@@ -2239,7 +2278,7 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time,
        num_luns = listlength / 8;      /* 8 bytes per entry */
        if (num_luns > CISS_MAX_LUN) {
                num_luns = CISS_MAX_LUN;
-               printk(KERN_WARNING "cciss: more luns configured"
+               dev_warn(&h->pdev->dev, "more luns configured"
                       " on controller than can be handled by"
                       " this driver.\n");
        }
@@ -2270,9 +2309,9 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time,
                }
                if (!drv_found) {
                        /* Deregister it from the OS, it's gone. */
-                       spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+                       spin_lock_irqsave(&h->lock, flags);
                        h->drv[i]->busy_configuring = 1;
-                       spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+                       spin_unlock_irqrestore(&h->lock, flags);
                        return_code = deregister_disk(h, i, 1, via_ioctl);
                        if (h->drv[i] != NULL)
                                h->drv[i]->busy_configuring = 0;
@@ -2311,8 +2350,7 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time,
                        if (drv_index == -1)
                                goto freeret;
                }
-               cciss_update_drive_info(ctlr, drv_index, first_time,
-                       via_ioctl);
+               cciss_update_drive_info(h, drv_index, first_time, via_ioctl);
        }               /* end for */
 
 freeret:
@@ -2324,7 +2362,7 @@ freeret:
         */
        return -1;
 mem_msg:
-       printk(KERN_ERR "cciss: out of memory\n");
+       dev_err(&h->pdev->dev, "out of memory\n");
        h->busy_configuring = 0;
        goto freeret;
 }
@@ -2444,11 +2482,10 @@ static int deregister_disk(ctlr_info_t *h, int drv_index,
        return 0;
 }
 
-static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff,
+static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff,
                size_t size, __u8 page_code, unsigned char *scsi3addr,
                int cmd_type)
 {
-       ctlr_info_t *h = hba[ctlr];
        u64bit buff_dma_handle;
        int status = IO_OK;
 
@@ -2532,8 +2569,7 @@ static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff,
                        c->Request.Timeout = 0;
                        break;
                default:
-                       printk(KERN_WARNING
-                              "cciss%d:  Unknown Command 0x%c\n", ctlr, cmd);
+                       dev_warn(&h->pdev->dev, "Unknown Command 0x%c\n", cmd);
                        return IO_ERROR;
                }
        } else if (cmd_type == TYPE_MSG) {
@@ -2565,13 +2601,12 @@ static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff,
                        c->Request.CDB[0] = cmd;
                        break;
                default:
-                       printk(KERN_WARNING
-                              "cciss%d: unknown message type %d\n", ctlr, cmd);
+                       dev_warn(&h->pdev->dev,
+                               "unknown message type %d\n", cmd);
                        return IO_ERROR;
                }
        } else {
-               printk(KERN_WARNING
-                      "cciss%d: unknown command type %d\n", ctlr, cmd_type);
+               dev_warn(&h->pdev->dev, "unknown command type %d\n", cmd_type);
                return IO_ERROR;
        }
        /* Fill in the scatter gather information */
@@ -2599,15 +2634,14 @@ static int check_target_status(ctlr_info_t *h, CommandList_struct *c)
                default:
                        if (check_for_unit_attention(h, c))
                                return IO_NEEDS_RETRY;
-                       printk(KERN_WARNING "cciss%d: cmd 0x%02x "
+                       dev_warn(&h->pdev->dev, "cmd 0x%02x "
                                "check condition, sense key = 0x%02x\n",
-                               h->ctlr, c->Request.CDB[0],
-                               c->err_info->SenseInfo[2]);
+                               c->Request.CDB[0], c->err_info->SenseInfo[2]);
                }
                break;
        default:
-               printk(KERN_WARNING "cciss%d: cmd 0x%02x"
-                       "scsi status = 0x%02x\n", h->ctlr,
+               dev_warn(&h->pdev->dev, "cmd 0x%02x"
+                       "scsi status = 0x%02x\n",
                        c->Request.CDB[0], c->err_info->ScsiStatus);
                break;
        }
@@ -2630,43 +2664,42 @@ static int process_sendcmd_error(ctlr_info_t *h, CommandList_struct *c)
                /* expected for inquiry and report lun commands */
                break;
        case CMD_INVALID:
-               printk(KERN_WARNING "cciss: cmd 0x%02x is "
+               dev_warn(&h->pdev->dev, "cmd 0x%02x is "
                       "reported invalid\n", c->Request.CDB[0]);
                return_status = IO_ERROR;
                break;
        case CMD_PROTOCOL_ERR:
-               printk(KERN_WARNING "cciss: cmd 0x%02x has "
-                      "protocol error \n", c->Request.CDB[0]);
+               dev_warn(&h->pdev->dev, "cmd 0x%02x has "
+                      "protocol error\n", c->Request.CDB[0]);
                return_status = IO_ERROR;
                break;
        case CMD_HARDWARE_ERR:
-               printk(KERN_WARNING "cciss: cmd 0x%02x had "
+               dev_warn(&h->pdev->dev, "cmd 0x%02x had "
                       " hardware error\n", c->Request.CDB[0]);
                return_status = IO_ERROR;
                break;
        case CMD_CONNECTION_LOST:
-               printk(KERN_WARNING "cciss: cmd 0x%02x had "
+               dev_warn(&h->pdev->dev, "cmd 0x%02x had "
                       "connection lost\n", c->Request.CDB[0]);
                return_status = IO_ERROR;
                break;
        case CMD_ABORTED:
-               printk(KERN_WARNING "cciss: cmd 0x%02x was "
+               dev_warn(&h->pdev->dev, "cmd 0x%02x was "
                       "aborted\n", c->Request.CDB[0]);
                return_status = IO_ERROR;
                break;
        case CMD_ABORT_FAILED:
-               printk(KERN_WARNING "cciss: cmd 0x%02x reports "
+               dev_warn(&h->pdev->dev, "cmd 0x%02x reports "
                       "abort failed\n", c->Request.CDB[0]);
                return_status = IO_ERROR;
                break;
        case CMD_UNSOLICITED_ABORT:
-               printk(KERN_WARNING
-                      "cciss%d: unsolicited abort 0x%02x\n", h->ctlr,
+               dev_warn(&h->pdev->dev, "unsolicited abort 0x%02x\n",
                        c->Request.CDB[0]);
                return_status = IO_NEEDS_RETRY;
                break;
        default:
-               printk(KERN_WARNING "cciss: cmd 0x%02x returned "
+               dev_warn(&h->pdev->dev, "cmd 0x%02x returned "
                       "unknown status %x\n", c->Request.CDB[0],
                       c->err_info->CommandStatus);
                return_status = IO_ERROR;
@@ -2679,17 +2712,11 @@ static int sendcmd_withirq_core(ctlr_info_t *h, CommandList_struct *c,
 {
        DECLARE_COMPLETION_ONSTACK(wait);
        u64bit buff_dma_handle;
-       unsigned long flags;
        int return_status = IO_OK;
 
 resend_cmd2:
        c->waiting = &wait;
-       /* Put the request on the tail of the queue and send it */
-       spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
-       addQ(&h->reqQ, c);
-       h->Qdepth++;
-       start_io(h);
-       spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+       enqueue_cmd_and_start_io(h, c);
 
        wait_for_completion(&wait);
 
@@ -2700,7 +2727,7 @@ resend_cmd2:
 
        if (return_status == IO_NEEDS_RETRY &&
                c->retry_count < MAX_CMD_RETRIES) {
-               printk(KERN_WARNING "cciss%d: retrying 0x%02x\n", h->ctlr,
+               dev_warn(&h->pdev->dev, "retrying 0x%02x\n",
                        c->Request.CDB[0]);
                c->retry_count++;
                /* erase the old error information */
@@ -2719,27 +2746,26 @@ command_done:
        return return_status;
 }
 
-static int sendcmd_withirq(__u8 cmd, int ctlr, void *buff, size_t size,
+static int sendcmd_withirq(ctlr_info_t *h, __u8 cmd, void *buff, size_t size,
                           __u8 page_code, unsigned char scsi3addr[],
                        int cmd_type)
 {
-       ctlr_info_t *h = hba[ctlr];
        CommandList_struct *c;
        int return_status;
 
-       c = cmd_alloc(h, 0);
+       c = cmd_special_alloc(h);
        if (!c)
                return -ENOMEM;
-       return_status = fill_cmd(c, cmd, ctlr, buff, size, page_code,
+       return_status = fill_cmd(h, c, cmd, buff, size, page_code,
                scsi3addr, cmd_type);
        if (return_status == IO_OK)
                return_status = sendcmd_withirq_core(h, c, 1);
 
-       cmd_free(h, c, 0);
+       cmd_special_free(h, c);
        return return_status;
 }
 
-static void cciss_geometry_inquiry(int ctlr, int logvol,
+static void cciss_geometry_inquiry(ctlr_info_t *h, int logvol,
                                   sector_t total_size,
                                   unsigned int block_size,
                                   InquiryData_struct *inq_buff,
@@ -2750,13 +2776,13 @@ static void cciss_geometry_inquiry(int ctlr, int logvol,
        unsigned char scsi3addr[8];
 
        memset(inq_buff, 0, sizeof(InquiryData_struct));
-       log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
-       return_code = sendcmd_withirq(CISS_INQUIRY, ctlr, inq_buff,
+       log_unit_to_scsi3addr(h, scsi3addr, logvol);
+       return_code = sendcmd_withirq(h, CISS_INQUIRY, inq_buff,
                        sizeof(*inq_buff), 0xC1, scsi3addr, TYPE_CMD);
        if (return_code == IO_OK) {
                if (inq_buff->data_byte[8] == 0xFF) {
-                       printk(KERN_WARNING
-                              "cciss: reading geometry failed, volume "
+                       dev_warn(&h->pdev->dev,
+                              "reading geometry failed, volume "
                               "does not support reading geometry\n");
                        drv->heads = 255;
                        drv->sectors = 32;      /* Sectors per track */
@@ -2780,12 +2806,12 @@ static void cciss_geometry_inquiry(int ctlr, int logvol,
                        drv->cylinders = real_size;
                }
        } else {                /* Get geometry failed */
-               printk(KERN_WARNING "cciss: reading geometry failed\n");
+               dev_warn(&h->pdev->dev, "reading geometry failed\n");
        }
 }
 
 static void
-cciss_read_capacity(int ctlr, int logvol, sector_t *total_size,
+cciss_read_capacity(ctlr_info_t *h, int logvol, sector_t *total_size,
                    unsigned int *block_size)
 {
        ReadCapdata_struct *buf;
@@ -2794,25 +2820,25 @@ cciss_read_capacity(int ctlr, int logvol, sector_t *total_size,
 
        buf = kzalloc(sizeof(ReadCapdata_struct), GFP_KERNEL);
        if (!buf) {
-               printk(KERN_WARNING "cciss: out of memory\n");
+               dev_warn(&h->pdev->dev, "out of memory\n");
                return;
        }
 
-       log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
-       return_code = sendcmd_withirq(CCISS_READ_CAPACITY, ctlr, buf,
+       log_unit_to_scsi3addr(h, scsi3addr, logvol);
+       return_code = sendcmd_withirq(h, CCISS_READ_CAPACITY, buf,
                sizeof(ReadCapdata_struct), 0, scsi3addr, TYPE_CMD);
        if (return_code == IO_OK) {
                *total_size = be32_to_cpu(*(__be32 *) buf->total_size);
                *block_size = be32_to_cpu(*(__be32 *) buf->block_size);
        } else {                /* read capacity command failed */
-               printk(KERN_WARNING "cciss: read capacity failed\n");
+               dev_warn(&h->pdev->dev, "read capacity failed\n");
                *total_size = 0;
                *block_size = BLOCK_SIZE;
        }
        kfree(buf);
 }
 
-static void cciss_read_capacity_16(int ctlr, int logvol,
+static void cciss_read_capacity_16(ctlr_info_t *h, int logvol,
        sector_t *total_size, unsigned int *block_size)
 {
        ReadCapdata_struct_16 *buf;
@@ -2821,23 +2847,23 @@ static void cciss_read_capacity_16(int ctlr, int logvol,
 
        buf = kzalloc(sizeof(ReadCapdata_struct_16), GFP_KERNEL);
        if (!buf) {
-               printk(KERN_WARNING "cciss: out of memory\n");
+               dev_warn(&h->pdev->dev, "out of memory\n");
                return;
        }
 
-       log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
-       return_code = sendcmd_withirq(CCISS_READ_CAPACITY_16,
-               ctlr, buf, sizeof(ReadCapdata_struct_16),
+       log_unit_to_scsi3addr(h, scsi3addr, logvol);
+       return_code = sendcmd_withirq(h, CCISS_READ_CAPACITY_16,
+               buf, sizeof(ReadCapdata_struct_16),
                        0, scsi3addr, TYPE_CMD);
        if (return_code == IO_OK) {
                *total_size = be64_to_cpu(*(__be64 *) buf->total_size);
                *block_size = be32_to_cpu(*(__be32 *) buf->block_size);
        } else {                /* read capacity command failed */
-               printk(KERN_WARNING "cciss: read capacity failed\n");
+               dev_warn(&h->pdev->dev, "read capacity failed\n");
                *total_size = 0;
                *block_size = BLOCK_SIZE;
        }
-       printk(KERN_INFO "      blocks= %llu block_size= %d\n",
+       dev_info(&h->pdev->dev, "      blocks= %llu block_size= %d\n",
               (unsigned long long)*total_size+1, *block_size);
        kfree(buf);
 }
@@ -2865,17 +2891,17 @@ static int cciss_revalidate(struct gendisk *disk)
 
        inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL);
        if (inq_buff == NULL) {
-               printk(KERN_WARNING "cciss: out of memory\n");
+               dev_warn(&h->pdev->dev, "out of memory\n");
                return 1;
        }
        if (h->cciss_read == CCISS_READ_10) {
-               cciss_read_capacity(h->ctlr, logvol,
+               cciss_read_capacity(h, logvol,
                                        &total_size, &block_size);
        } else {
-               cciss_read_capacity_16(h->ctlr, logvol,
+               cciss_read_capacity_16(h, logvol,
                                        &total_size, &block_size);
        }
-       cciss_geometry_inquiry(h->ctlr, logvol, total_size, block_size,
+       cciss_geometry_inquiry(h, logvol, total_size, block_size,
                               inq_buff, drv);
 
        blk_queue_logical_block_size(drv->queue, drv->block_size);
@@ -2909,7 +2935,7 @@ static void start_io(ctlr_info_t *h)
                c = hlist_entry(h->reqQ.first, CommandList_struct, list);
                /* can't do anything if fifo is full */
                if ((h->access.fifo_full(h))) {
-                       printk(KERN_WARNING "cciss: fifo full\n");
+                       dev_warn(&h->pdev->dev, "fifo full\n");
                        break;
                }
 
@@ -2925,7 +2951,7 @@ static void start_io(ctlr_info_t *h)
        }
 }
 
-/* Assumes that CCISS_LOCK(h->ctlr) is held. */
+/* Assumes that h->lock is held. */
 /* Zeros out the error record and then resends the command back */
 /* to the controller */
 static inline void resend_cciss_cmd(ctlr_info_t *h, CommandList_struct *c)
@@ -2966,7 +2992,7 @@ static inline int evaluate_target_status(ctlr_info_t *h,
        driver_byte = DRIVER_OK;
        msg_byte = cmd->err_info->CommandStatus; /* correct?  seems too device specific */
 
-       if (blk_pc_request(cmd->rq))
+       if (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC)
                host_byte = DID_PASSTHROUGH;
        else
                host_byte = DID_OK;
@@ -2975,8 +3001,8 @@ static inline int evaluate_target_status(ctlr_info_t *h,
                host_byte, driver_byte);
 
        if (cmd->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION) {
-               if (!blk_pc_request(cmd->rq))
-                       printk(KERN_WARNING "cciss: cmd %p "
+               if (cmd->rq->cmd_type != REQ_TYPE_BLOCK_PC)
+                       dev_warn(&h->pdev->dev, "cmd %p "
                               "has SCSI Status 0x%x\n",
                               cmd, cmd->err_info->ScsiStatus);
                return error_value;
@@ -2985,17 +3011,19 @@ static inline int evaluate_target_status(ctlr_info_t *h,
        /* check the sense key */
        sense_key = 0xf & cmd->err_info->SenseInfo[2];
        /* no status or recovered error */
-       if (((sense_key == 0x0) || (sense_key == 0x1)) && !blk_pc_request(cmd->rq))
+       if (((sense_key == 0x0) || (sense_key == 0x1)) &&
+           (cmd->rq->cmd_type != REQ_TYPE_BLOCK_PC))
                error_value = 0;
 
        if (check_for_unit_attention(h, cmd)) {
-               *retry_cmd = !blk_pc_request(cmd->rq);
+               *retry_cmd = !(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC);
                return 0;
        }
 
-       if (!blk_pc_request(cmd->rq)) { /* Not SG_IO or similar? */
+       /* Not SG_IO or similar? */
+       if (cmd->rq->cmd_type != REQ_TYPE_BLOCK_PC) {
                if (error_value != 0)
-                       printk(KERN_WARNING "cciss: cmd %p has CHECK CONDITION"
+                       dev_warn(&h->pdev->dev, "cmd %p has CHECK CONDITION"
                               " sense key = 0x%x\n", cmd, sense_key);
                return error_value;
        }
@@ -3035,90 +3063,97 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
                rq->errors = evaluate_target_status(h, cmd, &retry_cmd);
                break;
        case CMD_DATA_UNDERRUN:
-               if (blk_fs_request(cmd->rq)) {
-                       printk(KERN_WARNING "cciss: cmd %p has"
+               if (cmd->rq->cmd_type == REQ_TYPE_FS) {
+                       dev_warn(&h->pdev->dev, "cmd %p has"
                               " completed with data underrun "
                               "reported\n", cmd);
                        cmd->rq->resid_len = cmd->err_info->ResidualCnt;
                }
                break;
        case CMD_DATA_OVERRUN:
-               if (blk_fs_request(cmd->rq))
-                       printk(KERN_WARNING "cciss: cmd %p has"
+               if (cmd->rq->cmd_type == REQ_TYPE_FS)
+                       dev_warn(&h->pdev->dev, "cciss: cmd %p has"
                               " completed with data overrun "
                               "reported\n", cmd);
                break;
        case CMD_INVALID:
-               printk(KERN_WARNING "cciss: cmd %p is "
+               dev_warn(&h->pdev->dev, "cciss: cmd %p is "
                       "reported invalid\n", cmd);
                rq->errors = make_status_bytes(SAM_STAT_GOOD,
                        cmd->err_info->CommandStatus, DRIVER_OK,
-                       blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
+                       (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                               DID_PASSTHROUGH : DID_ERROR);
                break;
        case CMD_PROTOCOL_ERR:
-               printk(KERN_WARNING "cciss: cmd %p has "
-                      "protocol error \n", cmd);
+               dev_warn(&h->pdev->dev, "cciss: cmd %p has "
+                      "protocol error\n", cmd);
                rq->errors = make_status_bytes(SAM_STAT_GOOD,
                        cmd->err_info->CommandStatus, DRIVER_OK,
-                       blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
+                       (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                               DID_PASSTHROUGH : DID_ERROR);
                break;
        case CMD_HARDWARE_ERR:
-               printk(KERN_WARNING "cciss: cmd %p had "
+               dev_warn(&h->pdev->dev, "cciss: cmd %p had "
                       " hardware error\n", cmd);
                rq->errors = make_status_bytes(SAM_STAT_GOOD,
                        cmd->err_info->CommandStatus, DRIVER_OK,
-                       blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
+                       (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                               DID_PASSTHROUGH : DID_ERROR);
                break;
        case CMD_CONNECTION_LOST:
-               printk(KERN_WARNING "cciss: cmd %p had "
+               dev_warn(&h->pdev->dev, "cciss: cmd %p had "
                       "connection lost\n", cmd);
                rq->errors = make_status_bytes(SAM_STAT_GOOD,
                        cmd->err_info->CommandStatus, DRIVER_OK,
-                       blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
+                       (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                               DID_PASSTHROUGH : DID_ERROR);
                break;
        case CMD_ABORTED:
-               printk(KERN_WARNING "cciss: cmd %p was "
+               dev_warn(&h->pdev->dev, "cciss: cmd %p was "
                       "aborted\n", cmd);
                rq->errors = make_status_bytes(SAM_STAT_GOOD,
                        cmd->err_info->CommandStatus, DRIVER_OK,
-                       blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ABORT);
+                       (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                               DID_PASSTHROUGH : DID_ABORT);
                break;
        case CMD_ABORT_FAILED:
-               printk(KERN_WARNING "cciss: cmd %p reports "
+               dev_warn(&h->pdev->dev, "cciss: cmd %p reports "
                       "abort failed\n", cmd);
                rq->errors = make_status_bytes(SAM_STAT_GOOD,
                        cmd->err_info->CommandStatus, DRIVER_OK,
-                       blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
+                       (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                               DID_PASSTHROUGH : DID_ERROR);
                break;
        case CMD_UNSOLICITED_ABORT:
-               printk(KERN_WARNING "cciss%d: unsolicited "
+               dev_warn(&h->pdev->dev, "cciss%d: unsolicited "
                       "abort %p\n", h->ctlr, cmd);
                if (cmd->retry_count < MAX_CMD_RETRIES) {
                        retry_cmd = 1;
-                       printk(KERN_WARNING
-                              "cciss%d: retrying %p\n", h->ctlr, cmd);
+                       dev_warn(&h->pdev->dev, "retrying %p\n", cmd);
                        cmd->retry_count++;
                } else
-                       printk(KERN_WARNING
-                              "cciss%d: %p retried too "
-                              "many times\n", h->ctlr, cmd);
+                       dev_warn(&h->pdev->dev,
+                               "%p retried too many times\n", cmd);
                rq->errors = make_status_bytes(SAM_STAT_GOOD,
                        cmd->err_info->CommandStatus, DRIVER_OK,
-                       blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ABORT);
+                       (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                               DID_PASSTHROUGH : DID_ABORT);
                break;
        case CMD_TIMEOUT:
-               printk(KERN_WARNING "cciss: cmd %p timedout\n", cmd);
+               dev_warn(&h->pdev->dev, "cmd %p timedout\n", cmd);
                rq->errors = make_status_bytes(SAM_STAT_GOOD,
                        cmd->err_info->CommandStatus, DRIVER_OK,
-                       blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
+                       (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                               DID_PASSTHROUGH : DID_ERROR);
                break;
        default:
-               printk(KERN_WARNING "cciss: cmd %p returned "
+               dev_warn(&h->pdev->dev, "cmd %p returned "
                       "unknown status %x\n", cmd,
                       cmd->err_info->CommandStatus);
                rq->errors = make_status_bytes(SAM_STAT_GOOD,
                        cmd->err_info->CommandStatus, DRIVER_OK,
-                       blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
+                       (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                               DID_PASSTHROUGH : DID_ERROR);
        }
 
 after_error_processing:
@@ -3132,6 +3167,34 @@ after_error_processing:
        blk_complete_request(cmd->rq);
 }
 
+static inline u32 cciss_tag_contains_index(u32 tag)
+{
+#define DIRECT_LOOKUP_BIT 0x10
+       return tag & DIRECT_LOOKUP_BIT;
+}
+
+static inline u32 cciss_tag_to_index(u32 tag)
+{
+#define DIRECT_LOOKUP_SHIFT 5
+       return tag >> DIRECT_LOOKUP_SHIFT;
+}
+
+static inline u32 cciss_tag_discard_error_bits(u32 tag)
+{
+#define CCISS_ERROR_BITS 0x03
+       return tag & ~CCISS_ERROR_BITS;
+}
+
+static inline void cciss_mark_tag_indexed(u32 *tag)
+{
+       *tag |= DIRECT_LOOKUP_BIT;
+}
+
+static inline void cciss_set_tag_index(u32 *tag, u32 index)
+{
+       *tag |= (index << DIRECT_LOOKUP_SHIFT);
+}
+
 /*
  * Get a request and submit it to the controller.
  */
@@ -3163,7 +3226,8 @@ static void do_cciss_request(struct request_queue *q)
 
        BUG_ON(creq->nr_phys_segments > h->maxsgentries);
 
-       if ((c = cmd_alloc(h, 1)) == NULL)
+       c = cmd_alloc(h);
+       if (!c)
                goto full;
 
        blk_start_request(creq);
@@ -3180,8 +3244,8 @@ static void do_cciss_request(struct request_queue *q)
        /* got command from pool, so use the command block index instead */
        /* for direct lookups. */
        /* The first 2 bits are reserved for controller error reporting. */
-       c->Header.Tag.lower = (c->cmdindex << 3);
-       c->Header.Tag.lower |= 0x04;    /* flag for direct lookup. */
+       cciss_set_tag_index(&c->Header.Tag.lower, c->cmdindex);
+       cciss_mark_tag_indexed(&c->Header.Tag.lower);
        memcpy(&c->Header.LUN, drv->LunID, sizeof(drv->LunID));
        c->Request.CDBLen = 10; /* 12 byte commands not in FW yet; */
        c->Request.Type.Type = TYPE_CMD;        /* It is a command. */
@@ -3192,11 +3256,8 @@ static void do_cciss_request(struct request_queue *q)
        c->Request.CDB[0] =
            (rq_data_dir(creq) == READ) ? h->cciss_read : h->cciss_write;
        start_blk = blk_rq_pos(creq);
-#ifdef CCISS_DEBUG
-       printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n",
+       dev_dbg(&h->pdev->dev, "sector =%d nr_sectors=%d\n",
               (int)blk_rq_pos(creq), (int)blk_rq_sectors(creq));
-#endif                         /* CCISS_DEBUG */
-
        sg_init_table(tmp_sg, h->maxsgentries);
        seg = blk_rq_map_sg(q, creq, tmp_sg);
 
@@ -3236,17 +3297,18 @@ static void do_cciss_request(struct request_queue *q)
        if (seg > h->maxSG)
                h->maxSG = seg;
 
-#ifdef CCISS_DEBUG
-       printk(KERN_DEBUG "cciss: Submitting %ld sectors in %d segments "
+       dev_dbg(&h->pdev->dev, "Submitting %u sectors in %d segments "
                        "chained[%d]\n",
                        blk_rq_sectors(creq), seg, chained);
-#endif                         /* CCISS_DEBUG */
 
-       c->Header.SGList = c->Header.SGTotal = seg + chained;
-       if (seg > h->max_cmd_sgentries)
+       c->Header.SGTotal = seg + chained;
+       if (seg <= h->max_cmd_sgentries)
+               c->Header.SGList = c->Header.SGTotal;
+       else
                c->Header.SGList = h->max_cmd_sgentries;
+       set_performant_mode(h, c);
 
-       if (likely(blk_fs_request(creq))) {
+       if (likely(creq->cmd_type == REQ_TYPE_FS)) {
                if(h->cciss_read == CCISS_READ_10) {
                        c->Request.CDB[1] = 0;
                        c->Request.CDB[2] = (start_blk >> 24) & 0xff; /* MSB */
@@ -3276,11 +3338,12 @@ static void do_cciss_request(struct request_queue *q)
                        c->Request.CDB[13]= blk_rq_sectors(creq) & 0xff;
                        c->Request.CDB[14] = c->Request.CDB[15] = 0;
                }
-       } else if (blk_pc_request(creq)) {
+       } else if (creq->cmd_type == REQ_TYPE_BLOCK_PC) {
                c->Request.CDBLen = creq->cmd_len;
                memcpy(c->Request.CDB, creq->cmd, BLK_MAX_CDB);
        } else {
-               printk(KERN_WARNING "cciss%d: bad request type %d\n", h->ctlr, creq->cmd_type);
+               dev_warn(&h->pdev->dev, "bad request type %d\n",
+                       creq->cmd_type);
                BUG();
        }
 
@@ -3313,72 +3376,131 @@ static inline int interrupt_pending(ctlr_info_t *h)
 
 static inline long interrupt_not_for_us(ctlr_info_t *h)
 {
-       return (((h->access.intr_pending(h) == 0) ||
-                (h->interrupts_enabled == 0)));
+       return ((h->access.intr_pending(h) == 0) ||
+               (h->interrupts_enabled == 0));
 }
 
-static irqreturn_t do_cciss_intr(int irq, void *dev_id)
+static inline int bad_tag(ctlr_info_t *h, u32 tag_index,
+                       u32 raw_tag)
 {
-       ctlr_info_t *h = dev_id;
+       if (unlikely(tag_index >= h->nr_cmds)) {
+               dev_warn(&h->pdev->dev, "bad tag 0x%08x ignored.\n", raw_tag);
+               return 1;
+       }
+       return 0;
+}
+
+static inline void finish_cmd(ctlr_info_t *h, CommandList_struct *c,
+                               u32 raw_tag)
+{
+       removeQ(c);
+       if (likely(c->cmd_type == CMD_RWREQ))
+               complete_command(h, c, 0);
+       else if (c->cmd_type == CMD_IOCTL_PEND)
+               complete(c->waiting);
+#ifdef CONFIG_CISS_SCSI_TAPE
+       else if (c->cmd_type == CMD_SCSI)
+               complete_scsi_command(c, 0, raw_tag);
+#endif
+}
+
+static inline u32 next_command(ctlr_info_t *h)
+{
+       u32 a;
+
+       if (unlikely(h->transMethod != CFGTBL_Trans_Performant))
+               return h->access.command_completed(h);
+
+       if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) {
+               a = *(h->reply_pool_head); /* Next cmd in ring buffer */
+               (h->reply_pool_head)++;
+               h->commands_outstanding--;
+       } else {
+               a = FIFO_EMPTY;
+       }
+       /* Check for wraparound */
+       if (h->reply_pool_head == (h->reply_pool + h->max_commands)) {
+               h->reply_pool_head = h->reply_pool;
+               h->reply_pool_wraparound ^= 1;
+       }
+       return a;
+}
+
+/* process completion of an indexed ("direct lookup") command */
+static inline u32 process_indexed_cmd(ctlr_info_t *h, u32 raw_tag)
+{
+       u32 tag_index;
        CommandList_struct *c;
+
+       tag_index = cciss_tag_to_index(raw_tag);
+       if (bad_tag(h, tag_index, raw_tag))
+               return next_command(h);
+       c = h->cmd_pool + tag_index;
+       finish_cmd(h, c, raw_tag);
+       return next_command(h);
+}
+
+/* process completion of a non-indexed command */
+static inline u32 process_nonindexed_cmd(ctlr_info_t *h, u32 raw_tag)
+{
+       u32 tag;
+       CommandList_struct *c = NULL;
+       struct hlist_node *tmp;
+       __u32 busaddr_masked, tag_masked;
+
+       tag = cciss_tag_discard_error_bits(raw_tag);
+       hlist_for_each_entry(c, tmp, &h->cmpQ, list) {
+               busaddr_masked = cciss_tag_discard_error_bits(c->busaddr);
+               tag_masked = cciss_tag_discard_error_bits(tag);
+               if (busaddr_masked == tag_masked) {
+                       finish_cmd(h, c, raw_tag);
+                       return next_command(h);
+               }
+       }
+       bad_tag(h, h->nr_cmds + 1, raw_tag);
+       return next_command(h);
+}
+
+static irqreturn_t do_cciss_intx(int irq, void *dev_id)
+{
+       ctlr_info_t *h = dev_id;
        unsigned long flags;
-       __u32 a, a1, a2;
+       u32 raw_tag;
 
        if (interrupt_not_for_us(h))
                return IRQ_NONE;
-       /*
-        * If there are completed commands in the completion queue,
-        * we had better do something about it.
-        */
-       spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+       spin_lock_irqsave(&h->lock, flags);
        while (interrupt_pending(h)) {
-               while ((a = get_next_completion(h)) != FIFO_EMPTY) {
-                       a1 = a;
-                       if ((a & 0x04)) {
-                               a2 = (a >> 3);
-                               if (a2 >= h->nr_cmds) {
-                                       printk(KERN_WARNING
-                                              "cciss: controller cciss%d failed, stopping.\n",
-                                              h->ctlr);
-                                       spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
-                                       fail_all_cmds(h->ctlr);
-                                       return IRQ_HANDLED;
-                               }
-
-                               c = h->cmd_pool + a2;
-                               a = c->busaddr;
-
-                       } else {
-                               struct hlist_node *tmp;
-
-                               a &= ~3;
-                               c = NULL;
-                               hlist_for_each_entry(c, tmp, &h->cmpQ, list) {
-                                       if (c->busaddr == a)
-                                               break;
-                               }
-                       }
-                       /*
-                        * If we've found the command, take it off the
-                        * completion Q and free it
-                        */
-                       if (c && c->busaddr == a) {
-                               removeQ(c);
-                               if (c->cmd_type == CMD_RWREQ) {
-                                       complete_command(h, c, 0);
-                               } else if (c->cmd_type == CMD_IOCTL_PEND) {
-                                       complete(c->waiting);
-                               }
-#                              ifdef CONFIG_CISS_SCSI_TAPE
-                               else if (c->cmd_type == CMD_SCSI)
-                                       complete_scsi_command(c, 0, a1);
-#                              endif
-                               continue;
-                       }
+               raw_tag = get_next_completion(h);
+               while (raw_tag != FIFO_EMPTY) {
+                       if (cciss_tag_contains_index(raw_tag))
+                               raw_tag = process_indexed_cmd(h, raw_tag);
+                       else
+                               raw_tag = process_nonindexed_cmd(h, raw_tag);
                }
        }
+       spin_unlock_irqrestore(&h->lock, flags);
+       return IRQ_HANDLED;
+}
 
-       spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+/* Add a second interrupt handler for MSI/MSI-X mode. In this mode we never
+ * check the interrupt pending register because it is not set.
+ */
+static irqreturn_t do_cciss_msix_intr(int irq, void *dev_id)
+{
+       ctlr_info_t *h = dev_id;
+       unsigned long flags;
+       u32 raw_tag;
+
+       spin_lock_irqsave(&h->lock, flags);
+       raw_tag = get_next_completion(h);
+       while (raw_tag != FIFO_EMPTY) {
+               if (cciss_tag_contains_index(raw_tag))
+                       raw_tag = process_indexed_cmd(h, raw_tag);
+               else
+                       raw_tag = process_nonindexed_cmd(h, raw_tag);
+       }
+       spin_unlock_irqrestore(&h->lock, flags);
        return IRQ_HANDLED;
 }
 
@@ -3510,18 +3632,17 @@ static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c)
 
        switch (c->err_info->SenseInfo[12]) {
        case STATE_CHANGED:
-               printk(KERN_WARNING "cciss%d: a state change "
-                       "detected, command retried\n", h->ctlr);
+               dev_warn(&h->pdev->dev, "a state change "
+                       "detected, command retried\n");
                return 1;
        break;
        case LUN_FAILED:
-               printk(KERN_WARNING "cciss%d: LUN failure "
-                       "detected, action required\n", h->ctlr);
+               dev_warn(&h->pdev->dev, "LUN failure "
+                       "detected, action required\n");
                return 1;
        break;
        case REPORT_LUNS_CHANGED:
-               printk(KERN_WARNING "cciss%d: report LUN data "
-                       "changed\n", h->ctlr);
+               dev_warn(&h->pdev->dev, "report LUN data changed\n");
        /*
         * Here, we could call add_to_scan_list and wake up the scan thread,
         * except that it's quite likely that we will get more than one
@@ -3541,19 +3662,18 @@ static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c)
                return 1;
        break;
        case POWER_OR_RESET:
-               printk(KERN_WARNING "cciss%d: a power on "
-                       "or device reset detected\n", h->ctlr);
+               dev_warn(&h->pdev->dev,
+                       "a power on or device reset detected\n");
                return 1;
        break;
        case UNIT_ATTENTION_CLEARED:
-               printk(KERN_WARNING "cciss%d: unit attention "
-                   "cleared by another initiator\n", h->ctlr);
+               dev_warn(&h->pdev->dev,
+                       "unit attention cleared by another initiator\n");
                return 1;
        break;
        default:
-               printk(KERN_WARNING "cciss%d: unknown "
-                       "unit attention detected\n", h->ctlr);
-                               return 1;
+               dev_warn(&h->pdev->dev, "unknown unit attention detected\n");
+               return 1;
        }
 }
 
@@ -3562,39 +3682,41 @@ static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c)
  *   the io functions.
  *   This is for debug only.
  */
-#ifdef CCISS_DEBUG
-static void print_cfg_table(CfgTable_struct *tb)
+static void print_cfg_table(ctlr_info_t *h)
 {
        int i;
        char temp_name[17];
+       CfgTable_struct *tb = h->cfgtable;
 
-       printk("Controller Configuration information\n");
-       printk("------------------------------------\n");
+       dev_dbg(&h->pdev->dev, "Controller Configuration information\n");
+       dev_dbg(&h->pdev->dev, "------------------------------------\n");
        for (i = 0; i < 4; i++)
                temp_name[i] = readb(&(tb->Signature[i]));
        temp_name[4] = '\0';
-       printk("   Signature = %s\n", temp_name);
-       printk("   Spec Number = %d\n", readl(&(tb->SpecValence)));
-       printk("   Transport methods supported = 0x%x\n",
+       dev_dbg(&h->pdev->dev, "   Signature = %s\n", temp_name);
+       dev_dbg(&h->pdev->dev, "   Spec Number = %d\n",
+               readl(&(tb->SpecValence)));
+       dev_dbg(&h->pdev->dev, "   Transport methods supported = 0x%x\n",
               readl(&(tb->TransportSupport)));
-       printk("   Transport methods active = 0x%x\n",
+       dev_dbg(&h->pdev->dev, "   Transport methods active = 0x%x\n",
               readl(&(tb->TransportActive)));
-       printk("   Requested transport Method = 0x%x\n",
+       dev_dbg(&h->pdev->dev, "   Requested transport Method = 0x%x\n",
               readl(&(tb->HostWrite.TransportRequest)));
-       printk("   Coalesce Interrupt Delay = 0x%x\n",
+       dev_dbg(&h->pdev->dev, "   Coalesce Interrupt Delay = 0x%x\n",
               readl(&(tb->HostWrite.CoalIntDelay)));
-       printk("   Coalesce Interrupt Count = 0x%x\n",
+       dev_dbg(&h->pdev->dev, "   Coalesce Interrupt Count = 0x%x\n",
               readl(&(tb->HostWrite.CoalIntCount)));
-       printk("   Max outstanding commands = 0x%d\n",
+       dev_dbg(&h->pdev->dev, "   Max outstanding commands = 0x%d\n",
               readl(&(tb->CmdsOutMax)));
-       printk("   Bus Types = 0x%x\n", readl(&(tb->BusTypes)));
+       dev_dbg(&h->pdev->dev, "   Bus Types = 0x%x\n",
+               readl(&(tb->BusTypes)));
        for (i = 0; i < 16; i++)
                temp_name[i] = readb(&(tb->ServerName[i]));
        temp_name[16] = '\0';
-       printk("   Server Name = %s\n", temp_name);
-       printk("   Heartbeat Counter = 0x%x\n\n\n", readl(&(tb->HeartBeat)));
+       dev_dbg(&h->pdev->dev, "   Server Name = %s\n", temp_name);
+       dev_dbg(&h->pdev->dev, "   Heartbeat Counter = 0x%x\n\n\n",
+               readl(&(tb->HeartBeat)));
 }
-#endif                         /* CCISS_DEBUG */
 
 static int find_PCI_BAR_index(struct pci_dev *pdev, unsigned long pci_bar_addr)
 {
@@ -3618,7 +3740,7 @@ static int find_PCI_BAR_index(struct pci_dev *pdev, unsigned long pci_bar_addr)
                                offset += 8;
                                break;
                        default:        /* reserved in PCI 2.2 */
-                               printk(KERN_WARNING
+                               dev_warn(&pdev->dev,
                                       "Base address is invalid\n");
                                return -1;
                                break;
@@ -3630,12 +3752,182 @@ static int find_PCI_BAR_index(struct pci_dev *pdev, unsigned long pci_bar_addr)
        return -1;
 }
 
+/* Fill in bucket_map[], given nsgs (the max number of
+ * scatter gather elements supported) and bucket[],
+ * which is an array of 8 integers.  The bucket[] array
+ * contains 8 different DMA transfer sizes (in 16
+ * byte increments) which the controller uses to fetch
+ * commands.  This function fills in bucket_map[], which
+ * maps a given number of scatter gather elements to one of
+ * the 8 DMA transfer sizes.  The point of it is to allow the
+ * controller to only do as much DMA as needed to fetch the
+ * command, with the DMA transfer size encoded in the lower
+ * bits of the command address.
+ */
+static void  calc_bucket_map(int bucket[], int num_buckets,
+       int nsgs, int *bucket_map)
+{
+       int i, j, b, size;
+
+       /* even a command with 0 SGs requires 4 blocks */
+#define MINIMUM_TRANSFER_BLOCKS 4
+#define NUM_BUCKETS 8
+       /* Note, bucket_map must have nsgs+1 entries. */
+       for (i = 0; i <= nsgs; i++) {
+               /* Compute size of a command with i SG entries */
+               size = i + MINIMUM_TRANSFER_BLOCKS;
+               b = num_buckets; /* Assume the biggest bucket */
+               /* Find the bucket that is just big enough */
+               for (j = 0; j < 8; j++) {
+                       if (bucket[j] >= size) {
+                               b = j;
+                               break;
+                       }
+               }
+               /* for a command with i SG entries, use bucket b. */
+               bucket_map[i] = b;
+       }
+}
+
+static void __devinit cciss_wait_for_mode_change_ack(ctlr_info_t *h)
+{
+       int i;
+
+       /* under certain very rare conditions, this can take awhile.
+        * (e.g.: hot replace a failed 144GB drive in a RAID 5 set right
+        * as we enter this code.) */
+       for (i = 0; i < MAX_CONFIG_WAIT; i++) {
+               if (!(readl(h->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
+                       break;
+               msleep(10);
+       }
+}
+
+static __devinit void cciss_enter_performant_mode(ctlr_info_t *h)
+{
+       /* This is a bit complicated.  There are 8 registers on
+        * the controller which we write to to tell it 8 different
+        * sizes of commands which there may be.  It's a way of
+        * reducing the DMA done to fetch each command.  Encoded into
+        * each command's tag are 3 bits which communicate to the controller
+        * which of the eight sizes that command fits within.  The size of
+        * each command depends on how many scatter gather entries there are.
+        * Each SG entry requires 16 bytes.  The eight registers are programmed
+        * with the number of 16-byte blocks a command of that size requires.
+        * The smallest command possible requires 5 such 16 byte blocks.
+        * the largest command possible requires MAXSGENTRIES + 4 16-byte
+        * blocks.  Note, this only extends to the SG entries contained
+        * within the command block, and does not extend to chained blocks
+        * of SG elements.   bft[] contains the eight values we write to
+        * the registers.  They are not evenly distributed, but have more
+        * sizes for small commands, and fewer sizes for larger commands.
+        */
+       __u32 trans_offset;
+       int bft[8] = { 5, 6, 8, 10, 12, 20, 28, MAXSGENTRIES + 4};
+                       /*
+                        *  5 = 1 s/g entry or 4k
+                        *  6 = 2 s/g entry or 8k
+                        *  8 = 4 s/g entry or 16k
+                        * 10 = 6 s/g entry or 24k
+                        */
+       unsigned long register_value;
+       BUILD_BUG_ON(28 > MAXSGENTRIES + 4);
+
+       h->reply_pool_wraparound = 1; /* spec: init to 1 */
+
+       /* Controller spec: zero out this buffer. */
+       memset(h->reply_pool, 0, h->max_commands * sizeof(__u64));
+       h->reply_pool_head = h->reply_pool;
+
+       trans_offset = readl(&(h->cfgtable->TransMethodOffset));
+       calc_bucket_map(bft, ARRAY_SIZE(bft), h->maxsgentries,
+                               h->blockFetchTable);
+       writel(bft[0], &h->transtable->BlockFetch0);
+       writel(bft[1], &h->transtable->BlockFetch1);
+       writel(bft[2], &h->transtable->BlockFetch2);
+       writel(bft[3], &h->transtable->BlockFetch3);
+       writel(bft[4], &h->transtable->BlockFetch4);
+       writel(bft[5], &h->transtable->BlockFetch5);
+       writel(bft[6], &h->transtable->BlockFetch6);
+       writel(bft[7], &h->transtable->BlockFetch7);
+
+       /* size of controller ring buffer */
+       writel(h->max_commands, &h->transtable->RepQSize);
+       writel(1, &h->transtable->RepQCount);
+       writel(0, &h->transtable->RepQCtrAddrLow32);
+       writel(0, &h->transtable->RepQCtrAddrHigh32);
+       writel(h->reply_pool_dhandle, &h->transtable->RepQAddr0Low32);
+       writel(0, &h->transtable->RepQAddr0High32);
+       writel(CFGTBL_Trans_Performant,
+                       &(h->cfgtable->HostWrite.TransportRequest));
+
+       writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
+       cciss_wait_for_mode_change_ack(h);
+       register_value = readl(&(h->cfgtable->TransportActive));
+       if (!(register_value & CFGTBL_Trans_Performant))
+               dev_warn(&h->pdev->dev, "cciss: unable to get board into"
+                                       " performant mode\n");
+}
+
+static void __devinit cciss_put_controller_into_performant_mode(ctlr_info_t *h)
+{
+       __u32 trans_support;
+
+       dev_dbg(&h->pdev->dev, "Trying to put board into Performant mode\n");
+       /* Attempt to put controller into performant mode if supported */
+       /* Does board support performant mode? */
+       trans_support = readl(&(h->cfgtable->TransportSupport));
+       if (!(trans_support & PERFORMANT_MODE))
+               return;
+
+       dev_dbg(&h->pdev->dev, "Placing controller into performant mode\n");
+       /* Performant mode demands commands on a 32 byte boundary
+        * pci_alloc_consistent aligns on page boundarys already.
+        * Just need to check if divisible by 32
+        */
+       if ((sizeof(CommandList_struct) % 32) != 0) {
+               dev_warn(&h->pdev->dev, "%s %d %s\n",
+                       "cciss info: command size[",
+                       (int)sizeof(CommandList_struct),
+                       "] not divisible by 32, no performant mode..\n");
+               return;
+       }
+
+       /* Performant mode ring buffer and supporting data structures */
+       h->reply_pool = (__u64 *)pci_alloc_consistent(
+               h->pdev, h->max_commands * sizeof(__u64),
+               &(h->reply_pool_dhandle));
+
+       /* Need a block fetch table for performant mode */
+       h->blockFetchTable = kmalloc(((h->maxsgentries+1) *
+               sizeof(__u32)), GFP_KERNEL);
+
+       if ((h->reply_pool == NULL) || (h->blockFetchTable == NULL))
+               goto clean_up;
+
+       cciss_enter_performant_mode(h);
+
+       /* Change the access methods to the performant access methods */
+       h->access = SA5_performant_access;
+       h->transMethod = CFGTBL_Trans_Performant;
+
+       return;
+clean_up:
+       kfree(h->blockFetchTable);
+       if (h->reply_pool)
+               pci_free_consistent(h->pdev,
+                               h->max_commands * sizeof(__u64),
+                               h->reply_pool,
+                               h->reply_pool_dhandle);
+       return;
+
+} /* cciss_put_controller_into_performant_mode */
+
 /* If MSI/MSI-X is supported by the kernel we will try to enable it on
  * controllers that are capable. If not, we use IO-APIC mode.
  */
 
-static void __devinit cciss_interrupt_mode(ctlr_info_t *c,
-                                          struct pci_dev *pdev, __u32 board_id)
+static void __devinit cciss_interrupt_mode(ctlr_info_t *h)
 {
 #ifdef CONFIG_PCI_MSI
        int err;
@@ -3644,268 +3936,283 @@ static void __devinit cciss_interrupt_mode(ctlr_info_t *c,
        };
 
        /* Some boards advertise MSI but don't really support it */
-       if ((board_id == 0x40700E11) ||
-           (board_id == 0x40800E11) ||
-           (board_id == 0x40820E11) || (board_id == 0x40830E11))
+       if ((h->board_id == 0x40700E11) || (h->board_id == 0x40800E11) ||
+           (h->board_id == 0x40820E11) || (h->board_id == 0x40830E11))
                goto default_int_mode;
 
-       if (pci_find_capability(pdev, PCI_CAP_ID_MSIX)) {
-               err = pci_enable_msix(pdev, cciss_msix_entries, 4);
+       if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) {
+               err = pci_enable_msix(h->pdev, cciss_msix_entries, 4);
                if (!err) {
-                       c->intr[0] = cciss_msix_entries[0].vector;
-                       c->intr[1] = cciss_msix_entries[1].vector;
-                       c->intr[2] = cciss_msix_entries[2].vector;
-                       c->intr[3] = cciss_msix_entries[3].vector;
-                       c->msix_vector = 1;
+                       h->intr[0] = cciss_msix_entries[0].vector;
+                       h->intr[1] = cciss_msix_entries[1].vector;
+                       h->intr[2] = cciss_msix_entries[2].vector;
+                       h->intr[3] = cciss_msix_entries[3].vector;
+                       h->msix_vector = 1;
                        return;
                }
                if (err > 0) {
-                       printk(KERN_WARNING "cciss: only %d MSI-X vectors "
-                              "available\n", err);
+                       dev_warn(&h->pdev->dev,
+                               "only %d MSI-X vectors available\n", err);
                        goto default_int_mode;
                } else {
-                       printk(KERN_WARNING "cciss: MSI-X init failed %d\n",
-                              err);
+                       dev_warn(&h->pdev->dev,
+                               "MSI-X init failed %d\n", err);
                        goto default_int_mode;
                }
        }
-       if (pci_find_capability(pdev, PCI_CAP_ID_MSI)) {
-               if (!pci_enable_msi(pdev)) {
-                       c->msi_vector = 1;
-               } else {
-                       printk(KERN_WARNING "cciss: MSI init failed\n");
-               }
+       if (pci_find_capability(h->pdev, PCI_CAP_ID_MSI)) {
+               if (!pci_enable_msi(h->pdev))
+                       h->msi_vector = 1;
+               else
+                       dev_warn(&h->pdev->dev, "MSI init failed\n");
        }
 default_int_mode:
 #endif                         /* CONFIG_PCI_MSI */
        /* if we get here we're going to use the default interrupt mode */
-       c->intr[SIMPLE_MODE_INT] = pdev->irq;
+       h->intr[PERF_MODE_INT] = h->pdev->irq;
        return;
 }
 
-static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
+static int __devinit cciss_lookup_board_id(struct pci_dev *pdev, u32 *board_id)
 {
-       ushort subsystem_vendor_id, subsystem_device_id, command;
-       __u32 board_id, scratchpad = 0;
-       __u64 cfg_offset;
-       __u32 cfg_base_addr;
-       __u64 cfg_base_addr_index;
-       int i, prod_index, err;
+       int i;
+       u32 subsystem_vendor_id, subsystem_device_id;
 
        subsystem_vendor_id = pdev->subsystem_vendor;
        subsystem_device_id = pdev->subsystem_device;
-       board_id = (((__u32) (subsystem_device_id << 16) & 0xffff0000) |
-                   subsystem_vendor_id);
+       *board_id = ((subsystem_device_id << 16) & 0xffff0000) |
+                       subsystem_vendor_id;
 
        for (i = 0; i < ARRAY_SIZE(products); i++) {
                /* Stand aside for hpsa driver on request */
                if (cciss_allow_hpsa && products[i].board_id == HPSA_BOUNDARY)
                        return -ENODEV;
-               if (board_id == products[i].board_id)
-                       break;
-       }
-       prod_index = i;
-       if (prod_index == ARRAY_SIZE(products)) {
-               dev_warn(&pdev->dev,
-                       "unrecognized board ID: 0x%08lx, ignoring.\n",
-                       (unsigned long) board_id);
-               return -ENODEV;
+               if (*board_id == products[i].board_id)
+                       return i;
        }
+       dev_warn(&pdev->dev, "unrecognized board ID: 0x%08x, ignoring.\n",
+               *board_id);
+       return -ENODEV;
+}
 
-       /* check to see if controller has been disabled */
-       /* BEFORE trying to enable it */
-       (void)pci_read_config_word(pdev, PCI_COMMAND, &command);
-       if (!(command & 0x02)) {
-               printk(KERN_WARNING
-                      "cciss: controller appears to be disabled\n");
-               return -ENODEV;
-       }
+static inline bool cciss_board_disabled(ctlr_info_t *h)
+{
+       u16 command;
 
-       err = pci_enable_device(pdev);
-       if (err) {
-               printk(KERN_ERR "cciss: Unable to Enable PCI device\n");
-               return err;
-       }
+       (void) pci_read_config_word(h->pdev, PCI_COMMAND, &command);
+       return ((command & PCI_COMMAND_MEMORY) == 0);
+}
 
-       err = pci_request_regions(pdev, "cciss");
-       if (err) {
-               printk(KERN_ERR "cciss: Cannot obtain PCI resources, "
-                      "aborting\n");
-               return err;
-       }
+static int __devinit cciss_pci_find_memory_BAR(struct pci_dev *pdev,
+       unsigned long *memory_bar)
+{
+       int i;
 
-#ifdef CCISS_DEBUG
-       printk("command = %x\n", command);
-       printk("irq = %x\n", pdev->irq);
-       printk("board_id = %x\n", board_id);
-#endif                         /* CCISS_DEBUG */
+       for (i = 0; i < DEVICE_COUNT_RESOURCE; i++)
+               if (pci_resource_flags(pdev, i) & IORESOURCE_MEM) {
+                       /* addressing mode bits already removed */
+                       *memory_bar = pci_resource_start(pdev, i);
+                       dev_dbg(&pdev->dev, "memory BAR = %lx\n",
+                               *memory_bar);
+                       return 0;
+               }
+       dev_warn(&pdev->dev, "no memory BAR found\n");
+       return -ENODEV;
+}
 
-/* If the kernel supports MSI/MSI-X we will try to enable that functionality,
- * else we use the IO-APIC interrupt assigned to us by system ROM.
- */
-       cciss_interrupt_mode(c, pdev, board_id);
+static int __devinit cciss_wait_for_board_ready(ctlr_info_t *h)
+{
+       int i;
+       u32 scratchpad;
 
-       /* find the memory BAR */
-       for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
-               if (pci_resource_flags(pdev, i) & IORESOURCE_MEM)
-                       break;
-       }
-       if (i == DEVICE_COUNT_RESOURCE) {
-               printk(KERN_WARNING "cciss: No memory BAR found\n");
-               err = -ENODEV;
-               goto err_out_free_res;
+       for (i = 0; i < CCISS_BOARD_READY_ITERATIONS; i++) {
+               scratchpad = readl(h->vaddr + SA5_SCRATCHPAD_OFFSET);
+               if (scratchpad == CCISS_FIRMWARE_READY)
+                       return 0;
+               msleep(CCISS_BOARD_READY_POLL_INTERVAL_MSECS);
        }
+       dev_warn(&h->pdev->dev, "board not ready, timed out.\n");
+       return -ENODEV;
+}
 
-       c->paddr = pci_resource_start(pdev, i); /* addressing mode bits
-                                                * already removed
-                                                */
+static int __devinit cciss_find_cfg_addrs(struct pci_dev *pdev,
+       void __iomem *vaddr, u32 *cfg_base_addr, u64 *cfg_base_addr_index,
+       u64 *cfg_offset)
+{
+       *cfg_base_addr = readl(vaddr + SA5_CTCFG_OFFSET);
+       *cfg_offset = readl(vaddr + SA5_CTMEM_OFFSET);
+       *cfg_base_addr &= (u32) 0x0000ffff;
+       *cfg_base_addr_index = find_PCI_BAR_index(pdev, *cfg_base_addr);
+       if (*cfg_base_addr_index == -1) {
+               dev_warn(&pdev->dev, "cannot find cfg_base_addr_index, "
+                       "*cfg_base_addr = 0x%08x\n", *cfg_base_addr);
+               return -ENODEV;
+       }
+       return 0;
+}
 
-#ifdef CCISS_DEBUG
-       printk("address 0 = %lx\n", c->paddr);
-#endif                         /* CCISS_DEBUG */
-       c->vaddr = remap_pci_mem(c->paddr, 0x250);
+static int __devinit cciss_find_cfgtables(ctlr_info_t *h)
+{
+       u64 cfg_offset;
+       u32 cfg_base_addr;
+       u64 cfg_base_addr_index;
+       u32 trans_offset;
+       int rc;
 
-       /* Wait for the board to become ready.  (PCI hotplug needs this.)
-        * We poll for up to 120 secs, once per 100ms. */
-       for (i = 0; i < 1200; i++) {
-               scratchpad = readl(c->vaddr + SA5_SCRATCHPAD_OFFSET);
-               if (scratchpad == CCISS_FIRMWARE_READY)
-                       break;
-               set_current_state(TASK_INTERRUPTIBLE);
-               schedule_timeout(msecs_to_jiffies(100));        /* wait 100ms */
-       }
-       if (scratchpad != CCISS_FIRMWARE_READY) {
-               printk(KERN_WARNING "cciss: Board not ready.  Timed out.\n");
-               err = -ENODEV;
-               goto err_out_free_res;
-       }
+       rc = cciss_find_cfg_addrs(h->pdev, h->vaddr, &cfg_base_addr,
+               &cfg_base_addr_index, &cfg_offset);
+       if (rc)
+               return rc;
+       h->cfgtable = remap_pci_mem(pci_resource_start(h->pdev,
+               cfg_base_addr_index) + cfg_offset, sizeof(h->cfgtable));
+       if (!h->cfgtable)
+               return -ENOMEM;
+       /* Find performant mode table. */
+       trans_offset = readl(&h->cfgtable->TransMethodOffset);
+       h->transtable = remap_pci_mem(pci_resource_start(h->pdev,
+                               cfg_base_addr_index)+cfg_offset+trans_offset,
+                               sizeof(*h->transtable));
+       if (!h->transtable)
+               return -ENOMEM;
+       return 0;
+}
 
-       /* get the address index number */
-       cfg_base_addr = readl(c->vaddr + SA5_CTCFG_OFFSET);
-       cfg_base_addr &= (__u32) 0x0000ffff;
-#ifdef CCISS_DEBUG
-       printk("cfg base address = %x\n", cfg_base_addr);
-#endif                         /* CCISS_DEBUG */
-       cfg_base_addr_index = find_PCI_BAR_index(pdev, cfg_base_addr);
-#ifdef CCISS_DEBUG
-       printk("cfg base address index = %llx\n",
-               (unsigned long long)cfg_base_addr_index);
-#endif                         /* CCISS_DEBUG */
-       if (cfg_base_addr_index == -1) {
-               printk(KERN_WARNING "cciss: Cannot find cfg_base_addr_index\n");
-               err = -ENODEV;
-               goto err_out_free_res;
+static void __devinit cciss_get_max_perf_mode_cmds(struct ctlr_info *h)
+{
+       h->max_commands = readl(&(h->cfgtable->MaxPerformantModeCommands));
+       if (h->max_commands < 16) {
+               dev_warn(&h->pdev->dev, "Controller reports "
+                       "max supported commands of %d, an obvious lie. "
+                       "Using 16.  Ensure that firmware is up to date.\n",
+                       h->max_commands);
+               h->max_commands = 16;
        }
+}
 
-       cfg_offset = readl(c->vaddr + SA5_CTMEM_OFFSET);
-#ifdef CCISS_DEBUG
-       printk("cfg offset = %llx\n", (unsigned long long)cfg_offset);
-#endif                         /* CCISS_DEBUG */
-       c->cfgtable = remap_pci_mem(pci_resource_start(pdev,
-                                                      cfg_base_addr_index) +
-                                   cfg_offset, sizeof(CfgTable_struct));
-       c->board_id = board_id;
-
-#ifdef CCISS_DEBUG
-       print_cfg_table(c->cfgtable);
-#endif                         /* CCISS_DEBUG */
-
-       /* Some controllers support Zero Memory Raid (ZMR).
-        * When configured in ZMR mode the number of supported
-        * commands drops to 64. So instead of just setting an
-        * arbitrary value we make the driver a little smarter.
-        * We read the config table to tell us how many commands
-        * are supported on the controller then subtract 4 to
-        * leave a little room for ioctl calls.
-        */
-       c->max_commands = readl(&(c->cfgtable->CmdsOutMax));
-       c->maxsgentries = readl(&(c->cfgtable->MaxSGElements));
-
+/* Interrogate the hardware for some limits:
+ * max commands, max SG elements without chaining, and with chaining,
+ * SG chain block size, etc.
+ */
+static void __devinit cciss_find_board_params(ctlr_info_t *h)
+{
+       cciss_get_max_perf_mode_cmds(h);
+       h->nr_cmds = h->max_commands - 4; /* Allow room for some ioctls */
+       h->maxsgentries = readl(&(h->cfgtable->MaxSGElements));
        /*
-        * Limit native command to 32 s/g elements to save dma'able memory.
+        * Limit in-command s/g elements to 32 save dma'able memory.
         * Howvever spec says if 0, use 31
         */
-
-       c->max_cmd_sgentries = 31;
-       if (c->maxsgentries > 512) {
-               c->max_cmd_sgentries = 32;
-               c->chainsize = c->maxsgentries - c->max_cmd_sgentries + 1;
-               c->maxsgentries -= 1;   /* account for chain pointer */
+       h->max_cmd_sgentries = 31;
+       if (h->maxsgentries > 512) {
+               h->max_cmd_sgentries = 32;
+               h->chainsize = h->maxsgentries - h->max_cmd_sgentries + 1;
+               h->maxsgentries--; /* save one for chain pointer */
        } else {
-               c->maxsgentries = 31;   /* Default to traditional value */
-               c->chainsize = 0;       /* traditional */
+               h->maxsgentries = 31; /* default to traditional values */
+               h->chainsize = 0;
        }
+}
 
-       c->product_name = products[prod_index].product_name;
-       c->access = *(products[prod_index].access);
-       c->nr_cmds = c->max_commands - 4;
-       if ((readb(&c->cfgtable->Signature[0]) != 'C') ||
-           (readb(&c->cfgtable->Signature[1]) != 'I') ||
-           (readb(&c->cfgtable->Signature[2]) != 'S') ||
-           (readb(&c->cfgtable->Signature[3]) != 'S')) {
-               printk("Does not appear to be a valid CISS config table\n");
-               err = -ENODEV;
-               goto err_out_free_res;
+static inline bool CISS_signature_present(ctlr_info_t *h)
+{
+       if ((readb(&h->cfgtable->Signature[0]) != 'C') ||
+           (readb(&h->cfgtable->Signature[1]) != 'I') ||
+           (readb(&h->cfgtable->Signature[2]) != 'S') ||
+           (readb(&h->cfgtable->Signature[3]) != 'S')) {
+               dev_warn(&h->pdev->dev, "not a valid CISS config table\n");
+               return false;
        }
+       return true;
+}
+
+/* Need to enable prefetch in the SCSI core for 6400 in x86 */
+static inline void cciss_enable_scsi_prefetch(ctlr_info_t *h)
+{
 #ifdef CONFIG_X86
-       {
-               /* Need to enable prefetch in the SCSI core for 6400 in x86 */
-               __u32 prefetch;
-               prefetch = readl(&(c->cfgtable->SCSI_Prefetch));
-               prefetch |= 0x100;
-               writel(prefetch, &(c->cfgtable->SCSI_Prefetch));
-       }
+       u32 prefetch;
+
+       prefetch = readl(&(h->cfgtable->SCSI_Prefetch));
+       prefetch |= 0x100;
+       writel(prefetch, &(h->cfgtable->SCSI_Prefetch));
 #endif
+}
 
-       /* Disabling DMA prefetch and refetch for the P600.
-        * An ASIC bug may result in accesses to invalid memory addresses.
-        * We've disabled prefetch for some time now. Testing with XEN
-        * kernels revealed a bug in the refetch if dom0 resides on a P600.
-        */
-       if(board_id == 0x3225103C) {
-               __u32 dma_prefetch;
-               __u32 dma_refetch;
-               dma_prefetch = readl(c->vaddr + I2O_DMA1_CFG);
-               dma_prefetch |= 0x8000;
-               writel(dma_prefetch, c->vaddr + I2O_DMA1_CFG);
-               pci_read_config_dword(pdev, PCI_COMMAND_PARITY, &dma_refetch);
-               dma_refetch |= 0x1;
-               pci_write_config_dword(pdev, PCI_COMMAND_PARITY, dma_refetch);
+/* Disable DMA prefetch for the P600.  Otherwise an ASIC bug may result
+ * in a prefetch beyond physical memory.
+ */
+static inline void cciss_p600_dma_prefetch_quirk(ctlr_info_t *h)
+{
+       u32 dma_prefetch;
+       __u32 dma_refetch;
+
+       if (h->board_id != 0x3225103C)
+               return;
+       dma_prefetch = readl(h->vaddr + I2O_DMA1_CFG);
+       dma_prefetch |= 0x8000;
+       writel(dma_prefetch, h->vaddr + I2O_DMA1_CFG);
+       pci_read_config_dword(h->pdev, PCI_COMMAND_PARITY, &dma_refetch);
+       dma_refetch |= 0x1;
+       pci_write_config_dword(h->pdev, PCI_COMMAND_PARITY, dma_refetch);
+}
+
+static int __devinit cciss_pci_init(ctlr_info_t *h)
+{
+       int prod_index, err;
+
+       prod_index = cciss_lookup_board_id(h->pdev, &h->board_id);
+       if (prod_index < 0)
+               return -ENODEV;
+       h->product_name = products[prod_index].product_name;
+       h->access = *(products[prod_index].access);
+
+       if (cciss_board_disabled(h)) {
+               dev_warn(&h->pdev->dev, "controller appears to be disabled\n");
+               return -ENODEV;
+       }
+       err = pci_enable_device(h->pdev);
+       if (err) {
+               dev_warn(&h->pdev->dev, "Unable to Enable PCI device\n");
+               return err;
        }
 
-#ifdef CCISS_DEBUG
-       printk("Trying to put board into Simple mode\n");
-#endif                         /* CCISS_DEBUG */
-       c->max_commands = readl(&(c->cfgtable->CmdsOutMax));
-       /* Update the field, and then ring the doorbell */
-       writel(CFGTBL_Trans_Simple, &(c->cfgtable->HostWrite.TransportRequest));
-       writel(CFGTBL_ChangeReq, c->vaddr + SA5_DOORBELL);
+       err = pci_request_regions(h->pdev, "cciss");
+       if (err) {
+               dev_warn(&h->pdev->dev,
+                       "Cannot obtain PCI resources, aborting\n");
+               return err;
+       }
 
-       /* under certain very rare conditions, this can take awhile.
-        * (e.g.: hot replace a failed 144GB drive in a RAID 5 set right
-        * as we enter this code.) */
-       for (i = 0; i < MAX_CONFIG_WAIT; i++) {
-               if (!(readl(c->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
-                       break;
-               /* delay and try again */
-               set_current_state(TASK_INTERRUPTIBLE);
-               schedule_timeout(msecs_to_jiffies(1));
+       dev_dbg(&h->pdev->dev, "irq = %x\n", h->pdev->irq);
+       dev_dbg(&h->pdev->dev, "board_id = %x\n", h->board_id);
+
+/* If the kernel supports MSI/MSI-X we will try to enable that functionality,
+ * else we use the IO-APIC interrupt assigned to us by system ROM.
+ */
+       cciss_interrupt_mode(h);
+       err = cciss_pci_find_memory_BAR(h->pdev, &h->paddr);
+       if (err)
+               goto err_out_free_res;
+       h->vaddr = remap_pci_mem(h->paddr, 0x250);
+       if (!h->vaddr) {
+               err = -ENOMEM;
+               goto err_out_free_res;
        }
+       err = cciss_wait_for_board_ready(h);
+       if (err)
+               goto err_out_free_res;
+       err = cciss_find_cfgtables(h);
+       if (err)
+               goto err_out_free_res;
+       print_cfg_table(h);
+       cciss_find_board_params(h);
 
-#ifdef CCISS_DEBUG
-       printk(KERN_DEBUG "I counter got to %d %x\n", i,
-              readl(c->vaddr + SA5_DOORBELL));
-#endif                         /* CCISS_DEBUG */
-#ifdef CCISS_DEBUG
-       print_cfg_table(c->cfgtable);
-#endif                         /* CCISS_DEBUG */
-
-       if (!(readl(&(c->cfgtable->TransportActive)) & CFGTBL_Trans_Simple)) {
-               printk(KERN_WARNING "cciss: unable to get board into"
-                      " simple mode\n");
+       if (!CISS_signature_present(h)) {
                err = -ENODEV;
                goto err_out_free_res;
        }
+       cciss_enable_scsi_prefetch(h);
+       cciss_p600_dma_prefetch_quirk(h);
+       cciss_put_controller_into_performant_mode(h);
        return 0;
 
 err_out_free_res:
@@ -3913,42 +4220,47 @@ err_out_free_res:
         * Deliberately omit pci_disable_device(): it does something nasty to
         * Smart Array controllers that pci_enable_device does not undo
         */
-       pci_release_regions(pdev);
+       if (h->transtable)
+               iounmap(h->transtable);
+       if (h->cfgtable)
+               iounmap(h->cfgtable);
+       if (h->vaddr)
+               iounmap(h->vaddr);
+       pci_release_regions(h->pdev);
        return err;
 }
 
 /* Function to find the first free pointer into our hba[] array
  * Returns -1 if no free entries are left.
  */
-static int alloc_cciss_hba(void)
+static int alloc_cciss_hba(struct pci_dev *pdev)
 {
        int i;
 
        for (i = 0; i < MAX_CTLR; i++) {
                if (!hba[i]) {
-                       ctlr_info_t *p;
+                       ctlr_info_t *h;
 
-                       p = kzalloc(sizeof(ctlr_info_t), GFP_KERNEL);
-                       if (!p)
+                       h = kzalloc(sizeof(ctlr_info_t), GFP_KERNEL);
+                       if (!h)
                                goto Enomem;
-                       hba[i] = p;
+                       hba[i] = h;
                        return i;
                }
        }
-       printk(KERN_WARNING "cciss: This driver supports a maximum"
+       dev_warn(&pdev->dev, "This driver supports a maximum"
               " of %d controllers.\n", MAX_CTLR);
        return -1;
 Enomem:
-       printk(KERN_ERR "cciss: out of memory.\n");
+       dev_warn(&pdev->dev, "out of memory.\n");
        return -1;
 }
 
-static void free_hba(int n)
+static void free_hba(ctlr_info_t *h)
 {
-       ctlr_info_t *h = hba[n];
        int i;
 
-       hba[n] = NULL;
+       hba[h->ctlr] = NULL;
        for (i = 0; i < h->highest_lun + 1; i++)
                if (h->gendisk[i] != NULL)
                        put_disk(h->gendisk[i]);
@@ -4028,7 +4340,8 @@ static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, u
        /* we leak the DMA buffer here ... no choice since the controller could
           still complete the command. */
        if (i == 10) {
-               printk(KERN_ERR "cciss: controller message %02x:%02x timed out\n",
+               dev_err(&pdev->dev,
+                       "controller message %02x:%02x timed out\n",
                        opcode, type);
                return -ETIMEDOUT;
        }
@@ -4036,12 +4349,12 @@ static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, u
        pci_free_consistent(pdev, cmd_sz, cmd, paddr64);
 
        if (tag & 2) {
-               printk(KERN_ERR "cciss: controller message %02x:%02x failed\n",
+               dev_err(&pdev->dev, "controller message %02x:%02x failed\n",
                        opcode, type);
                return -EIO;
        }
 
-       printk(KERN_INFO "cciss: controller message %02x:%02x succeeded\n",
+       dev_info(&pdev->dev, "controller message %02x:%02x succeeded\n",
                opcode, type);
        return 0;
 }
@@ -4062,7 +4375,7 @@ static __devinit int cciss_reset_msi(struct pci_dev *pdev)
        if (pos) {
                pci_read_config_word(pdev, msi_control_reg(pos), &control);
                if (control & PCI_MSI_FLAGS_ENABLE) {
-                       printk(KERN_INFO "cciss: resetting MSI\n");
+                       dev_info(&pdev->dev, "resetting MSI\n");
                        pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSI_FLAGS_ENABLE);
                }
        }
@@ -4071,7 +4384,7 @@ static __devinit int cciss_reset_msi(struct pci_dev *pdev)
        if (pos) {
                pci_read_config_word(pdev, msi_control_reg(pos), &control);
                if (control & PCI_MSIX_FLAGS_ENABLE) {
-                       printk(KERN_INFO "cciss: resetting MSI-X\n");
+                       dev_info(&pdev->dev, "resetting MSI-X\n");
                        pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSIX_FLAGS_ENABLE);
                }
        }
@@ -4079,68 +4392,144 @@ static __devinit int cciss_reset_msi(struct pci_dev *pdev)
        return 0;
 }
 
-/* This does a hard reset of the controller using PCI power management
- * states. */
-static __devinit int cciss_hard_reset_controller(struct pci_dev *pdev)
+static int cciss_controller_hard_reset(struct pci_dev *pdev,
+       void * __iomem vaddr, bool use_doorbell)
 {
-       u16 pmcsr, saved_config_space[32];
-       int i, pos;
+       u16 pmcsr;
+       int pos;
 
-       printk(KERN_INFO "cciss: using PCI PM to reset controller\n");
+       if (use_doorbell) {
+               /* For everything after the P600, the PCI power state method
+                * of resetting the controller doesn't work, so we have this
+                * other way using the doorbell register.
+                */
+               dev_info(&pdev->dev, "using doorbell to reset controller\n");
+               writel(DOORBELL_CTLR_RESET, vaddr + SA5_DOORBELL);
+               msleep(1000);
+       } else { /* Try to do it the PCI power state way */
+
+               /* Quoting from the Open CISS Specification: "The Power
+                * Management Control/Status Register (CSR) controls the power
+                * state of the device.  The normal operating state is D0,
+                * CSR=00h.  The software off state is D3, CSR=03h.  To reset
+                * the controller, place the interface device in D3 then to D0,
+                * this causes a secondary PCI reset which will reset the
+                * controller." */
+
+               pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
+               if (pos == 0) {
+                       dev_err(&pdev->dev,
+                               "cciss_controller_hard_reset: "
+                               "PCI PM not supported\n");
+                       return -ENODEV;
+               }
+               dev_info(&pdev->dev, "using PCI PM to reset controller\n");
+               /* enter the D3hot power management state */
+               pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
+               pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
+               pmcsr |= PCI_D3hot;
+               pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
 
-       /* This is very nearly the same thing as
+               msleep(500);
 
-          pci_save_state(pci_dev);
-          pci_set_power_state(pci_dev, PCI_D3hot);
-          pci_set_power_state(pci_dev, PCI_D0);
-          pci_restore_state(pci_dev);
+               /* enter the D0 power management state */
+               pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
+               pmcsr |= PCI_D0;
+               pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
 
-          but we can't use these nice canned kernel routines on
-          kexec, because they also check the MSI/MSI-X state in PCI
-          configuration space and do the wrong thing when it is
-          set/cleared.  Also, the pci_save/restore_state functions
-          violate the ordering requirements for restoring the
-          configuration space from the CCISS document (see the
-          comment below).  So we roll our own .... */
+               msleep(500);
+       }
+       return 0;
+}
 
-       for (i = 0; i < 32; i++)
-               pci_read_config_word(pdev, 2*i, &saved_config_space[i]);
+/* This does a hard reset of the controller using PCI power management
+ * states or using the doorbell register. */
+static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
+{
+       u16 saved_config_space[32];
+       u64 cfg_offset;
+       u32 cfg_base_addr;
+       u64 cfg_base_addr_index;
+       void __iomem *vaddr;
+       unsigned long paddr;
+       u32 misc_fw_support, active_transport;
+       int rc, i;
+       CfgTable_struct __iomem *cfgtable;
+       bool use_doorbell;
+       u32 board_id;
+
+       /* For controllers as old a the p600, this is very nearly
+        * the same thing as
+        *
+        * pci_save_state(pci_dev);
+        * pci_set_power_state(pci_dev, PCI_D3hot);
+        * pci_set_power_state(pci_dev, PCI_D0);
+        * pci_restore_state(pci_dev);
+        *
+        * but we can't use these nice canned kernel routines on
+        * kexec, because they also check the MSI/MSI-X state in PCI
+        * configuration space and do the wrong thing when it is
+        * set/cleared.  Also, the pci_save/restore_state functions
+        * violate the ordering requirements for restoring the
+        * configuration space from the CCISS document (see the
+        * comment below).  So we roll our own ....
+        *
+        * For controllers newer than the P600, the pci power state
+        * method of resetting doesn't work so we have another way
+        * using the doorbell register.
+        */
 
-       pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
-       if (pos == 0) {
-               printk(KERN_ERR "cciss_reset_controller: PCI PM not supported\n");
+       /* Exclude 640x boards.  These are two pci devices in one slot
+        * which share a battery backed cache module.  One controls the
+        * cache, the other accesses the cache through the one that controls
+        * it.  If we reset the one controlling the cache, the other will
+        * likely not be happy.  Just forbid resetting this conjoined mess.
+        */
+       cciss_lookup_board_id(pdev, &board_id);
+       if (board_id == 0x409C0E11 || board_id == 0x409D0E11) {
+               dev_warn(&pdev->dev, "Cannot reset Smart Array 640x "
+                               "due to shared cache module.");
                return -ENODEV;
        }
 
-       /* Quoting from the Open CISS Specification: "The Power
-        * Management Control/Status Register (CSR) controls the power
-        * state of the device.  The normal operating state is D0,
-        * CSR=00h.  The software off state is D3, CSR=03h.  To reset
-        * the controller, place the interface device in D3 then to
-        * D0, this causes a secondary PCI reset which will reset the
-        * controller." */
+       for (i = 0; i < 32; i++)
+               pci_read_config_word(pdev, 2*i, &saved_config_space[i]);
 
-       /* enter the D3hot power management state */
-       pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
-       pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
-       pmcsr |= PCI_D3hot;
-       pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
+       /* find the first memory BAR, so we can find the cfg table */
+       rc = cciss_pci_find_memory_BAR(pdev, &paddr);
+       if (rc)
+               return rc;
+       vaddr = remap_pci_mem(paddr, 0x250);
+       if (!vaddr)
+               return -ENOMEM;
 
-       schedule_timeout_uninterruptible(HZ >> 1);
+       /* find cfgtable in order to check if reset via doorbell is supported */
+       rc = cciss_find_cfg_addrs(pdev, vaddr, &cfg_base_addr,
+                                       &cfg_base_addr_index, &cfg_offset);
+       if (rc)
+               goto unmap_vaddr;
+       cfgtable = remap_pci_mem(pci_resource_start(pdev,
+                      cfg_base_addr_index) + cfg_offset, sizeof(*cfgtable));
+       if (!cfgtable) {
+               rc = -ENOMEM;
+               goto unmap_vaddr;
+       }
 
-       /* enter the D0 power management state */
-       pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
-       pmcsr |= PCI_D0;
-       pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
+       /* If reset via doorbell register is supported, use that. */
+       misc_fw_support = readl(&cfgtable->misc_fw_support);
+       use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET;
 
-       schedule_timeout_uninterruptible(HZ >> 1);
+       rc = cciss_controller_hard_reset(pdev, vaddr, use_doorbell);
+       if (rc)
+               goto unmap_cfgtable;
 
        /* Restore the PCI configuration space.  The Open CISS
         * Specification says, "Restore the PCI Configuration
         * Registers, offsets 00h through 60h. It is important to
         * restore the command register, 16-bits at offset 04h,
         * last. Do not restore the configuration status register,
-        * 16-bits at offset 06h."  Note that the offset is 2*i. */
+        * 16-bits at offset 06h."  Note that the offset is 2*i.
+        */
        for (i = 0; i < 32; i++) {
                if (i == 2 || i == 3)
                        continue;
@@ -4149,6 +4538,63 @@ static __devinit int cciss_hard_reset_controller(struct pci_dev *pdev)
        wmb();
        pci_write_config_word(pdev, 4, saved_config_space[2]);
 
+       /* Some devices (notably the HP Smart Array 5i Controller)
+          need a little pause here */
+       msleep(CCISS_POST_RESET_PAUSE_MSECS);
+
+       /* Controller should be in simple mode at this point.  If it's not,
+        * It means we're on one of those controllers which doesn't support
+        * the doorbell reset method and on which the PCI power management reset
+        * method doesn't work (P800, for example.)
+        * In those cases, don't try to proceed, as it generally doesn't work.
+        */
+       active_transport = readl(&cfgtable->TransportActive);
+       if (active_transport & PERFORMANT_MODE) {
+               dev_warn(&pdev->dev, "Unable to successfully reset controller,"
+                       " Ignoring controller.\n");
+               rc = -ENODEV;
+       }
+
+unmap_cfgtable:
+       iounmap(cfgtable);
+
+unmap_vaddr:
+       iounmap(vaddr);
+       return rc;
+}
+
+static __devinit int cciss_init_reset_devices(struct pci_dev *pdev)
+{
+       int rc, i;
+
+       if (!reset_devices)
+               return 0;
+
+       /* Reset the controller with a PCI power-cycle or via doorbell */
+       rc = cciss_kdump_hard_reset_controller(pdev);
+
+       /* -ENOTSUPP here means we cannot reset the controller
+        * but it's already (and still) up and running in
+        * "performant mode".  Or, it might be 640x, which can't reset
+        * due to concerns about shared bbwc between 6402/6404 pair.
+        */
+       if (rc == -ENOTSUPP)
+               return 0; /* just try to do the kdump anyhow. */
+       if (rc)
+               return -ENODEV;
+       if (cciss_reset_msi(pdev))
+               return -ENODEV;
+
+       /* Now try to get the controller to respond to a no-op */
+       for (i = 0; i < CCISS_POST_RESET_NOOP_RETRIES; i++) {
+               if (cciss_noop(pdev) == 0)
+                       break;
+               else
+                       dev_warn(&pdev->dev, "no-op failed%s\n",
+                               (i < CCISS_POST_RESET_NOOP_RETRIES - 1 ?
+                                       "; re-trying" : ""));
+               msleep(CCISS_POST_RESET_NOOP_INTERVAL_MSECS);
+       }
        return 0;
 }
 
@@ -4166,46 +4612,31 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
        int rc;
        int dac, return_code;
        InquiryData_struct *inq_buff;
+       ctlr_info_t *h;
 
-       if (reset_devices) {
-               /* Reset the controller with a PCI power-cycle */
-               if (cciss_hard_reset_controller(pdev) || cciss_reset_msi(pdev))
-                       return -ENODEV;
-
-               /* Now try to get the controller to respond to a no-op. Some
-                  devices (notably the HP Smart Array 5i Controller) need
-                  up to 30 seconds to respond. */
-               for (i=0; i<30; i++) {
-                       if (cciss_noop(pdev) == 0)
-                               break;
-
-                       schedule_timeout_uninterruptible(HZ);
-               }
-               if (i == 30) {
-                       printk(KERN_ERR "cciss: controller seems dead\n");
-                       return -EBUSY;
-               }
-       }
-
-       i = alloc_cciss_hba();
+       rc = cciss_init_reset_devices(pdev);
+       if (rc)
+               return rc;
+       i = alloc_cciss_hba(pdev);
        if (i < 0)
                return -1;
 
-       hba[i]->busy_initializing = 1;
-       INIT_HLIST_HEAD(&hba[i]->cmpQ);
-       INIT_HLIST_HEAD(&hba[i]->reqQ);
-       mutex_init(&hba[i]->busy_shutting_down);
+       h = hba[i];
+       h->pdev = pdev;
+       h->busy_initializing = 1;
+       INIT_HLIST_HEAD(&h->cmpQ);
+       INIT_HLIST_HEAD(&h->reqQ);
+       mutex_init(&h->busy_shutting_down);
 
-       if (cciss_pci_init(hba[i], pdev) != 0)
+       if (cciss_pci_init(h) != 0)
                goto clean_no_release_regions;
 
-       sprintf(hba[i]->devname, "cciss%d", i);
-       hba[i]->ctlr = i;
-       hba[i]->pdev = pdev;
+       sprintf(h->devname, "cciss%d", i);
+       h->ctlr = i;
 
-       init_completion(&hba[i]->scan_wait);
+       init_completion(&h->scan_wait);
 
-       if (cciss_create_hba_sysfs_entry(hba[i]))
+       if (cciss_create_hba_sysfs_entry(h))
                goto clean0;
 
        /* configure PCI DMA stuff */
@@ -4214,7 +4645,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
        else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))
                dac = 0;
        else {
-               printk(KERN_ERR "cciss: no suitable DMA available\n");
+               dev_err(&h->pdev->dev, "no suitable DMA available\n");
                goto clean1;
        }
 
@@ -4224,151 +4655,161 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
         * 8 controller support.
         */
        if (i < MAX_CTLR_ORIG)
-               hba[i]->major = COMPAQ_CISS_MAJOR + i;
-       rc = register_blkdev(hba[i]->major, hba[i]->devname);
+               h->major = COMPAQ_CISS_MAJOR + i;
+       rc = register_blkdev(h->major, h->devname);
        if (rc == -EBUSY || rc == -EINVAL) {
-               printk(KERN_ERR
-                      "cciss:  Unable to get major number %d for %s "
-                      "on hba %d\n", hba[i]->major, hba[i]->devname, i);
+               dev_err(&h->pdev->dev,
+                      "Unable to get major number %d for %s "
+                      "on hba %d\n", h->major, h->devname, i);
                goto clean1;
        } else {
                if (i >= MAX_CTLR_ORIG)
-                       hba[i]->major = rc;
+                       h->major = rc;
        }
 
        /* make sure the board interrupts are off */
-       hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_OFF);
-       if (request_irq(hba[i]->intr[SIMPLE_MODE_INT], do_cciss_intr,
-                       IRQF_DISABLED | IRQF_SHARED, hba[i]->devname, hba[i])) {
-               printk(KERN_ERR "cciss: Unable to get irq %d for %s\n",
-                      hba[i]->intr[SIMPLE_MODE_INT], hba[i]->devname);
-               goto clean2;
+       h->access.set_intr_mask(h, CCISS_INTR_OFF);
+       if (h->msi_vector || h->msix_vector) {
+               if (request_irq(h->intr[PERF_MODE_INT],
+                               do_cciss_msix_intr,
+                               IRQF_DISABLED, h->devname, h)) {
+                       dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n",
+                              h->intr[PERF_MODE_INT], h->devname);
+                       goto clean2;
+               }
+       } else {
+               if (request_irq(h->intr[PERF_MODE_INT], do_cciss_intx,
+                               IRQF_DISABLED, h->devname, h)) {
+                       dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n",
+                              h->intr[PERF_MODE_INT], h->devname);
+                       goto clean2;
+               }
        }
 
-       printk(KERN_INFO "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n",
-              hba[i]->devname, pdev->device, pci_name(pdev),
-              hba[i]->intr[SIMPLE_MODE_INT], dac ? "" : " not");
+       dev_info(&h->pdev->dev, "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n",
+              h->devname, pdev->device, pci_name(pdev),
+              h->intr[PERF_MODE_INT], dac ? "" : " not");
 
-       hba[i]->cmd_pool_bits =
-           kmalloc(DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG)
+       h->cmd_pool_bits =
+           kmalloc(DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG)
                        * sizeof(unsigned long), GFP_KERNEL);
-       hba[i]->cmd_pool = (CommandList_struct *)
-           pci_alloc_consistent(hba[i]->pdev,
-                   hba[i]->nr_cmds * sizeof(CommandList_struct),
-                   &(hba[i]->cmd_pool_dhandle));
-       hba[i]->errinfo_pool = (ErrorInfo_struct *)
-           pci_alloc_consistent(hba[i]->pdev,
-                   hba[i]->nr_cmds * sizeof(ErrorInfo_struct),
-                   &(hba[i]->errinfo_pool_dhandle));
-       if ((hba[i]->cmd_pool_bits == NULL)
-           || (hba[i]->cmd_pool == NULL)
-           || (hba[i]->errinfo_pool == NULL)) {
-               printk(KERN_ERR "cciss: out of memory");
+       h->cmd_pool = (CommandList_struct *)
+           pci_alloc_consistent(h->pdev,
+                   h->nr_cmds * sizeof(CommandList_struct),
+                   &(h->cmd_pool_dhandle));
+       h->errinfo_pool = (ErrorInfo_struct *)
+           pci_alloc_consistent(h->pdev,
+                   h->nr_cmds * sizeof(ErrorInfo_struct),
+                   &(h->errinfo_pool_dhandle));
+       if ((h->cmd_pool_bits == NULL)
+           || (h->cmd_pool == NULL)
+           || (h->errinfo_pool == NULL)) {
+               dev_err(&h->pdev->dev, "out of memory");
                goto clean4;
        }
 
        /* Need space for temp scatter list */
-       hba[i]->scatter_list = kmalloc(hba[i]->max_commands *
+       h->scatter_list = kmalloc(h->max_commands *
                                                sizeof(struct scatterlist *),
                                                GFP_KERNEL);
-       for (k = 0; k < hba[i]->nr_cmds; k++) {
-               hba[i]->scatter_list[k] = kmalloc(sizeof(struct scatterlist) *
-                                                       hba[i]->maxsgentries,
+       for (k = 0; k < h->nr_cmds; k++) {
+               h->scatter_list[k] = kmalloc(sizeof(struct scatterlist) *
+                                                       h->maxsgentries,
                                                        GFP_KERNEL);
-               if (hba[i]->scatter_list[k] == NULL) {
-                       printk(KERN_ERR "cciss%d: could not allocate "
-                               "s/g lists\n", i);
+               if (h->scatter_list[k] == NULL) {
+                       dev_err(&h->pdev->dev,
+                               "could not allocate s/g lists\n");
                        goto clean4;
                }
        }
-       hba[i]->cmd_sg_list = cciss_allocate_sg_chain_blocks(hba[i],
-               hba[i]->chainsize, hba[i]->nr_cmds);
-       if (!hba[i]->cmd_sg_list && hba[i]->chainsize > 0)
+       h->cmd_sg_list = cciss_allocate_sg_chain_blocks(h,
+               h->chainsize, h->nr_cmds);
+       if (!h->cmd_sg_list && h->chainsize > 0)
                goto clean4;
 
-       spin_lock_init(&hba[i]->lock);
+       spin_lock_init(&h->lock);
 
        /* Initialize the pdev driver private data.
-          have it point to hba[i].  */
-       pci_set_drvdata(pdev, hba[i]);
+          have it point to h.  */
+       pci_set_drvdata(pdev, h);
        /* command and error info recs zeroed out before
           they are used */
-       memset(hba[i]->cmd_pool_bits, 0,
-              DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG)
+       memset(h->cmd_pool_bits, 0,
+              DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG)
                        * sizeof(unsigned long));
 
-       hba[i]->num_luns = 0;
-       hba[i]->highest_lun = -1;
+       h->num_luns = 0;
+       h->highest_lun = -1;
        for (j = 0; j < CISS_MAX_LUN; j++) {
-               hba[i]->drv[j] = NULL;
-               hba[i]->gendisk[j] = NULL;
+               h->drv[j] = NULL;
+               h->gendisk[j] = NULL;
        }
 
-       cciss_scsi_setup(i);
+       cciss_scsi_setup(h);
 
        /* Turn the interrupts on so we can service requests */
-       hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_ON);
+       h->access.set_intr_mask(h, CCISS_INTR_ON);
 
        /* Get the firmware version */
        inq_buff = kzalloc(sizeof(InquiryData_struct), GFP_KERNEL);
        if (inq_buff == NULL) {
-               printk(KERN_ERR "cciss: out of memory\n");
+               dev_err(&h->pdev->dev, "out of memory\n");
                goto clean4;
        }
 
-       return_code = sendcmd_withirq(CISS_INQUIRY, i, inq_buff,
+       return_code = sendcmd_withirq(h, CISS_INQUIRY, inq_buff,
                sizeof(InquiryData_struct), 0, CTLR_LUNID, TYPE_CMD);
        if (return_code == IO_OK) {
-               hba[i]->firm_ver[0] = inq_buff->data_byte[32];
-               hba[i]->firm_ver[1] = inq_buff->data_byte[33];
-               hba[i]->firm_ver[2] = inq_buff->data_byte[34];
-               hba[i]->firm_ver[3] = inq_buff->data_byte[35];
+               h->firm_ver[0] = inq_buff->data_byte[32];
+               h->firm_ver[1] = inq_buff->data_byte[33];
+               h->firm_ver[2] = inq_buff->data_byte[34];
+               h->firm_ver[3] = inq_buff->data_byte[35];
        } else {         /* send command failed */
-               printk(KERN_WARNING "cciss: unable to determine firmware"
+               dev_warn(&h->pdev->dev, "unable to determine firmware"
                        " version of controller\n");
        }
        kfree(inq_buff);
 
-       cciss_procinit(i);
+       cciss_procinit(h);
 
-       hba[i]->cciss_max_sectors = 8192;
+       h->cciss_max_sectors = 8192;
 
-       rebuild_lun_table(hba[i], 1, 0);
-       hba[i]->busy_initializing = 0;
+       rebuild_lun_table(h, 1, 0);
+       h->busy_initializing = 0;
        return 1;
 
 clean4:
-       kfree(hba[i]->cmd_pool_bits);
+       kfree(h->cmd_pool_bits);
        /* Free up sg elements */
-       for (k = 0; k < hba[i]->nr_cmds; k++)
-               kfree(hba[i]->scatter_list[k]);
-       kfree(hba[i]->scatter_list);
-       cciss_free_sg_chain_blocks(hba[i]->cmd_sg_list, hba[i]->nr_cmds);
-       if (hba[i]->cmd_pool)
-               pci_free_consistent(hba[i]->pdev,
-                                   hba[i]->nr_cmds * sizeof(CommandList_struct),
-                                   hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle);
-       if (hba[i]->errinfo_pool)
-               pci_free_consistent(hba[i]->pdev,
-                                   hba[i]->nr_cmds * sizeof(ErrorInfo_struct),
-                                   hba[i]->errinfo_pool,
-                                   hba[i]->errinfo_pool_dhandle);
-       free_irq(hba[i]->intr[SIMPLE_MODE_INT], hba[i]);
+       for (k = 0; k < h->nr_cmds; k++)
+               kfree(h->scatter_list[k]);
+       kfree(h->scatter_list);
+       cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
+       if (h->cmd_pool)
+               pci_free_consistent(h->pdev,
+                                   h->nr_cmds * sizeof(CommandList_struct),
+                                   h->cmd_pool, h->cmd_pool_dhandle);
+       if (h->errinfo_pool)
+               pci_free_consistent(h->pdev,
+                                   h->nr_cmds * sizeof(ErrorInfo_struct),
+                                   h->errinfo_pool,
+                                   h->errinfo_pool_dhandle);
+       free_irq(h->intr[PERF_MODE_INT], h);
 clean2:
-       unregister_blkdev(hba[i]->major, hba[i]->devname);
+       unregister_blkdev(h->major, h->devname);
 clean1:
-       cciss_destroy_hba_sysfs_entry(hba[i]);
+       cciss_destroy_hba_sysfs_entry(h);
 clean0:
        pci_release_regions(pdev);
 clean_no_release_regions:
-       hba[i]->busy_initializing = 0;
+       h->busy_initializing = 0;
 
        /*
         * Deliberately omit pci_disable_device(): it does something nasty to
         * Smart Array controllers that pci_enable_device does not undo
         */
        pci_set_drvdata(pdev, NULL);
-       free_hba(i);
+       free_hba(h);
        return -1;
 }
 
@@ -4381,55 +4822,51 @@ static void cciss_shutdown(struct pci_dev *pdev)
        h = pci_get_drvdata(pdev);
        flush_buf = kzalloc(4, GFP_KERNEL);
        if (!flush_buf) {
-               printk(KERN_WARNING
-                       "cciss:%d cache not flushed, out of memory.\n",
-                       h->ctlr);
+               dev_warn(&h->pdev->dev, "cache not flushed, out of memory.\n");
                return;
        }
        /* write all data in the battery backed cache to disk */
        memset(flush_buf, 0, 4);
-       return_code = sendcmd_withirq(CCISS_CACHE_FLUSH, h->ctlr, flush_buf,
+       return_code = sendcmd_withirq(h, CCISS_CACHE_FLUSH, flush_buf,
                4, 0, CTLR_LUNID, TYPE_CMD);
        kfree(flush_buf);
        if (return_code != IO_OK)
-               printk(KERN_WARNING "cciss%d: Error flushing cache\n",
-                       h->ctlr);
+               dev_warn(&h->pdev->dev, "Error flushing cache\n");
        h->access.set_intr_mask(h, CCISS_INTR_OFF);
-       free_irq(h->intr[2], h);
+       free_irq(h->intr[PERF_MODE_INT], h);
 }
 
 static void __devexit cciss_remove_one(struct pci_dev *pdev)
 {
-       ctlr_info_t *tmp_ptr;
+       ctlr_info_t *h;
        int i, j;
 
        if (pci_get_drvdata(pdev) == NULL) {
-               printk(KERN_ERR "cciss: Unable to remove device \n");
+               dev_err(&pdev->dev, "Unable to remove device\n");
                return;
        }
 
-       tmp_ptr = pci_get_drvdata(pdev);
-       i = tmp_ptr->ctlr;
+       h = pci_get_drvdata(pdev);
+       i = h->ctlr;
        if (hba[i] == NULL) {
-               printk(KERN_ERR "cciss: device appears to "
-                      "already be removed \n");
+               dev_err(&pdev->dev, "device appears to already be removed\n");
                return;
        }
 
-       mutex_lock(&hba[i]->busy_shutting_down);
+       mutex_lock(&h->busy_shutting_down);
 
-       remove_from_scan_list(hba[i]);
-       remove_proc_entry(hba[i]->devname, proc_cciss);
-       unregister_blkdev(hba[i]->major, hba[i]->devname);
+       remove_from_scan_list(h);
+       remove_proc_entry(h->devname, proc_cciss);
+       unregister_blkdev(h->major, h->devname);
 
        /* remove it from the disk list */
        for (j = 0; j < CISS_MAX_LUN; j++) {
-               struct gendisk *disk = hba[i]->gendisk[j];
+               struct gendisk *disk = h->gendisk[j];
                if (disk) {
                        struct request_queue *q = disk->queue;
 
                        if (disk->flags & GENHD_FL_UP) {
-                               cciss_destroy_ld_sysfs_entry(hba[i], j, 1);
+                               cciss_destroy_ld_sysfs_entry(h, j, 1);
                                del_gendisk(disk);
                        }
                        if (q)
@@ -4438,39 +4875,41 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
        }
 
 #ifdef CONFIG_CISS_SCSI_TAPE
-       cciss_unregister_scsi(i);       /* unhook from SCSI subsystem */
+       cciss_unregister_scsi(h);       /* unhook from SCSI subsystem */
 #endif
 
        cciss_shutdown(pdev);
 
 #ifdef CONFIG_PCI_MSI
-       if (hba[i]->msix_vector)
-               pci_disable_msix(hba[i]->pdev);
-       else if (hba[i]->msi_vector)
-               pci_disable_msi(hba[i]->pdev);
+       if (h->msix_vector)
+               pci_disable_msix(h->pdev);
+       else if (h->msi_vector)
+               pci_disable_msi(h->pdev);
 #endif                         /* CONFIG_PCI_MSI */
 
-       iounmap(hba[i]->vaddr);
+       iounmap(h->transtable);
+       iounmap(h->cfgtable);
+       iounmap(h->vaddr);
 
-       pci_free_consistent(hba[i]->pdev, hba[i]->nr_cmds * sizeof(CommandList_struct),
-                           hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle);
-       pci_free_consistent(hba[i]->pdev, hba[i]->nr_cmds * sizeof(ErrorInfo_struct),
-                           hba[i]->errinfo_pool, hba[i]->errinfo_pool_dhandle);
-       kfree(hba[i]->cmd_pool_bits);
+       pci_free_consistent(h->pdev, h->nr_cmds * sizeof(CommandList_struct),
+                           h->cmd_pool, h->cmd_pool_dhandle);
+       pci_free_consistent(h->pdev, h->nr_cmds * sizeof(ErrorInfo_struct),
+                           h->errinfo_pool, h->errinfo_pool_dhandle);
+       kfree(h->cmd_pool_bits);
        /* Free up sg elements */
-       for (j = 0; j < hba[i]->nr_cmds; j++)
-               kfree(hba[i]->scatter_list[j]);
-       kfree(hba[i]->scatter_list);
-       cciss_free_sg_chain_blocks(hba[i]->cmd_sg_list, hba[i]->nr_cmds);
+       for (j = 0; j < h->nr_cmds; j++)
+               kfree(h->scatter_list[j]);
+       kfree(h->scatter_list);
+       cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
        /*
         * Deliberately omit pci_disable_device(): it does something nasty to
         * Smart Array controllers that pci_enable_device does not undo
         */
        pci_release_regions(pdev);
        pci_set_drvdata(pdev, NULL);
-       cciss_destroy_hba_sysfs_entry(hba[i]);
-       mutex_unlock(&hba[i]->busy_shutting_down);
-       free_hba(i);
+       cciss_destroy_hba_sysfs_entry(h);
+       mutex_unlock(&h->busy_shutting_down);
+       free_hba(h);
 }
 
 static struct pci_driver cciss_pci_driver = {
@@ -4495,7 +4934,6 @@ static int __init cciss_init(void)
         * array of them, the size must be a multiple of 8 bytes.
         */
        BUILD_BUG_ON(sizeof(CommandList_struct) % COMMANDLIST_ALIGNMENT);
-
        printk(KERN_INFO DRIVER_NAME "\n");
 
        err = bus_register(&cciss_bus_type);
@@ -4532,8 +4970,8 @@ static void __exit cciss_cleanup(void)
        /* double check that all controller entrys have been removed */
        for (i = 0; i < MAX_CTLR; i++) {
                if (hba[i] != NULL) {
-                       printk(KERN_WARNING "cciss: had to remove"
-                              " controller %d\n", i);
+                       dev_warn(&hba[i]->pdev->dev,
+                               "had to remove controller\n");
                        cciss_remove_one(hba[i]->pdev);
                }
        }
@@ -4542,46 +4980,5 @@ static void __exit cciss_cleanup(void)
        bus_unregister(&cciss_bus_type);
 }
 
-static void fail_all_cmds(unsigned long ctlr)
-{
-       /* If we get here, the board is apparently dead. */
-       ctlr_info_t *h = hba[ctlr];
-       CommandList_struct *c;
-       unsigned long flags;
-
-       printk(KERN_WARNING "cciss%d: controller not responding.\n", h->ctlr);
-       h->alive = 0;           /* the controller apparently died... */
-
-       spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
-
-       pci_disable_device(h->pdev);    /* Make sure it is really dead. */
-
-       /* move everything off the request queue onto the completed queue */
-       while (!hlist_empty(&h->reqQ)) {
-               c = hlist_entry(h->reqQ.first, CommandList_struct, list);
-               removeQ(c);
-               h->Qdepth--;
-               addQ(&h->cmpQ, c);
-       }
-
-       /* Now, fail everything on the completed queue with a HW error */
-       while (!hlist_empty(&h->cmpQ)) {
-               c = hlist_entry(h->cmpQ.first, CommandList_struct, list);
-               removeQ(c);
-               if (c->cmd_type != CMD_MSG_STALE)
-                       c->err_info->CommandStatus = CMD_HARDWARE_ERR;
-               if (c->cmd_type == CMD_RWREQ) {
-                       complete_command(h, c, 0);
-               } else if (c->cmd_type == CMD_IOCTL_PEND)
-                       complete(c->waiting);
-#ifdef CONFIG_CISS_SCSI_TAPE
-               else if (c->cmd_type == CMD_SCSI)
-                       complete_scsi_command(c, 0, 0);
-#endif
-       }
-       spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
-       return;
-}
-
 module_init(cciss_init);
 module_exit(cciss_cleanup);
index c5d4111..ae340ff 100644 (file)
@@ -25,7 +25,7 @@ struct access_method {
        void (*submit_command)(ctlr_info_t *h, CommandList_struct *c);
        void (*set_intr_mask)(ctlr_info_t *h, unsigned long val);
        unsigned long (*fifo_full)(ctlr_info_t *h);
-       unsigned long (*intr_pending)(ctlr_info_t *h);
+       bool (*intr_pending)(ctlr_info_t *h);
        unsigned long (*command_completed)(ctlr_info_t *h);
 };
 typedef struct _drive_info_struct
@@ -85,8 +85,8 @@ struct ctlr_info
        int     max_cmd_sgentries;
        SGDescriptor_struct **cmd_sg_list;
 
-#      define DOORBELL_INT     0
-#      define PERF_MODE_INT    1
+#      define PERF_MODE_INT    0
+#      define DOORBELL_INT     1
 #      define SIMPLE_MODE_INT  2
 #      define MEMQ_MODE_INT    3
        unsigned int intr[4];
@@ -137,10 +137,27 @@ struct ctlr_info
        struct list_head scan_list;
        struct completion scan_wait;
        struct device dev;
+       /*
+        * Performant mode tables.
+        */
+       u32 trans_support;
+       u32 trans_offset;
+       struct TransTable_struct *transtable;
+       unsigned long transMethod;
+
+       /*
+        * Performant mode completion buffer
+        */
+       u64 *reply_pool;
+       dma_addr_t reply_pool_dhandle;
+       u64 *reply_pool_head;
+       size_t reply_pool_size;
+       unsigned char reply_pool_wraparound;
+       u32 *blockFetchTable;
 };
 
-/*  Defining the diffent access_menthods */
-/*
+/*  Defining the diffent access_methods
+ *
  * Memory mapped FIFO interface (SMART 53xx cards)
  */
 #define SA5_DOORBELL   0x20
@@ -159,19 +176,47 @@ struct ctlr_info
 #define SA5B_INTR_PENDING      0x04
 #define FIFO_EMPTY             0xffffffff      
 #define CCISS_FIRMWARE_READY   0xffff0000 /* value in scratchpad register */
+/* Perf. mode flags */
+#define SA5_PERF_INTR_PENDING  0x04
+#define SA5_PERF_INTR_OFF      0x05
+#define SA5_OUTDB_STATUS_PERF_BIT      0x01
+#define SA5_OUTDB_CLEAR_PERF_BIT       0x01
+#define SA5_OUTDB_CLEAR         0xA0
+#define SA5_OUTDB_CLEAR_PERF_BIT        0x01
+#define SA5_OUTDB_STATUS        0x9C
+
 
 #define  CISS_ERROR_BIT                0x02
 
 #define CCISS_INTR_ON  1 
 #define CCISS_INTR_OFF 0
+
+
+/* CCISS_BOARD_READY_WAIT_SECS is how long to wait for a board
+ * to become ready, in seconds, before giving up on it.
+ * CCISS_BOARD_READY_POLL_INTERVAL_MSECS * is how long to wait
+ * between polling the board to see if it is ready, in
+ * milliseconds.  CCISS_BOARD_READY_ITERATIONS is derived
+ * the above.
+ */
+#define CCISS_BOARD_READY_WAIT_SECS (120)
+#define CCISS_BOARD_READY_POLL_INTERVAL_MSECS (100)
+#define CCISS_BOARD_READY_ITERATIONS \
+       ((CCISS_BOARD_READY_WAIT_SECS * 1000) / \
+               CCISS_BOARD_READY_POLL_INTERVAL_MSECS)
+#define CCISS_POST_RESET_PAUSE_MSECS (3000)
+#define CCISS_POST_RESET_NOOP_INTERVAL_MSECS (1000)
+#define CCISS_POST_RESET_NOOP_RETRIES (12)
+
 /* 
        Send the command to the hardware 
 */
 static void SA5_submit_command( ctlr_info_t *h, CommandList_struct *c) 
 {
 #ifdef CCISS_DEBUG
-        printk("Sending %x - down to controller\n", c->busaddr );
-#endif /* CCISS_DEBUG */ 
+       printk(KERN_WARNING "cciss%d: Sending %08x - down to controller\n",
+                       h->ctlr, c->busaddr);
+#endif /* CCISS_DEBUG */
          writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET);
         h->commands_outstanding++;
         if ( h->commands_outstanding > h->max_outstanding)
@@ -214,6 +259,20 @@ static void SA5B_intr_mask(ctlr_info_t *h, unsigned long val)
                         h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
         }
 }
+
+/* Performant mode intr_mask */
+static void SA5_performant_intr_mask(ctlr_info_t *h, unsigned long val)
+{
+       if (val) { /* turn on interrupts */
+               h->interrupts_enabled = 1;
+               writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+       } else {
+               h->interrupts_enabled = 0;
+               writel(SA5_PERF_INTR_OFF,
+                               h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+       }
+}
+
 /*
  *  Returns true if fifo is full.  
  * 
@@ -250,10 +309,44 @@ static unsigned long SA5_completed(ctlr_info_t *h)
        return ( register_value); 
 
 }
+
+/* Performant mode command completed */
+static unsigned long SA5_performant_completed(ctlr_info_t *h)
+{
+       unsigned long register_value = FIFO_EMPTY;
+
+       /* flush the controller write of the reply queue by reading
+        * outbound doorbell status register.
+        */
+       register_value = readl(h->vaddr + SA5_OUTDB_STATUS);
+       /* msi auto clears the interrupt pending bit. */
+       if (!(h->msi_vector || h->msix_vector)) {
+               writel(SA5_OUTDB_CLEAR_PERF_BIT, h->vaddr + SA5_OUTDB_CLEAR);
+               /* Do a read in order to flush the write to the controller
+                * (as per spec.)
+                */
+               register_value = readl(h->vaddr + SA5_OUTDB_STATUS);
+       }
+
+       if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) {
+               register_value = *(h->reply_pool_head);
+               (h->reply_pool_head)++;
+               h->commands_outstanding--;
+       } else {
+               register_value = FIFO_EMPTY;
+       }
+       /* Check for wraparound */
+       if (h->reply_pool_head == (h->reply_pool + h->max_commands)) {
+               h->reply_pool_head = h->reply_pool;
+               h->reply_pool_wraparound ^= 1;
+       }
+
+       return register_value;
+}
 /*
  *     Returns true if an interrupt is pending.. 
  */
-static unsigned long SA5_intr_pending(ctlr_info_t *h)
+static bool SA5_intr_pending(ctlr_info_t *h)
 {
        unsigned long register_value  = 
                readl(h->vaddr + SA5_INTR_STATUS);
@@ -268,7 +361,7 @@ static unsigned long SA5_intr_pending(ctlr_info_t *h)
 /*
  *      Returns true if an interrupt is pending..
  */
-static unsigned long SA5B_intr_pending(ctlr_info_t *h)
+static bool SA5B_intr_pending(ctlr_info_t *h)
 {
         unsigned long register_value  =
                 readl(h->vaddr + SA5_INTR_STATUS);
@@ -280,6 +373,20 @@ static unsigned long SA5B_intr_pending(ctlr_info_t *h)
         return 0 ;
 }
 
+static bool SA5_performant_intr_pending(ctlr_info_t *h)
+{
+       unsigned long register_value = readl(h->vaddr + SA5_INTR_STATUS);
+
+       if (!register_value)
+               return false;
+
+       if (h->msi_vector || h->msix_vector)
+               return true;
+
+       /* Read outbound doorbell to flush */
+       register_value = readl(h->vaddr + SA5_OUTDB_STATUS);
+       return register_value & SA5_OUTDB_STATUS_PERF_BIT;
+}
 
 static struct access_method SA5_access = {
        SA5_submit_command,
@@ -297,6 +404,14 @@ static struct access_method SA5B_access = {
         SA5_completed,
 };
 
+static struct access_method SA5_performant_access = {
+       SA5_submit_command,
+       SA5_performant_intr_mask,
+       SA5_fifo_full,
+       SA5_performant_intr_pending,
+       SA5_performant_completed,
+};
+
 struct board_type {
        __u32   board_id;
        char    *product_name;
@@ -304,6 +419,4 @@ struct board_type {
        int nr_cmds; /* Max cmds this kind of ctlr can handle. */
 };
 
-#define CCISS_LOCK(i)  (&hba[i]->lock)
-
 #endif /* CCISS_H */
index e624ff9..eb060f1 100644 (file)
 /* Configuration Table */
 #define CFGTBL_ChangeReq        0x00000001l
 #define CFGTBL_AccCmds          0x00000001l
+#define DOORBELL_CTLR_RESET     0x00000004l
 
 #define CFGTBL_Trans_Simple     0x00000002l
+#define CFGTBL_Trans_Performant 0x00000004l
 
 #define CFGTBL_BusType_Ultra2   0x00000001l
 #define CFGTBL_BusType_Ultra3   0x00000002l
@@ -173,12 +175,15 @@ typedef struct _SGDescriptor_struct {
  * PAD_64 can be adjusted independently as needed for 32-bit
  * and 64-bits systems.
  */
-#define COMMANDLIST_ALIGNMENT (8)
+#define COMMANDLIST_ALIGNMENT (32)
 #define IS_64_BIT ((sizeof(long) - 4)/4)
 #define IS_32_BIT (!IS_64_BIT)
 #define PAD_32 (0)
 #define PAD_64 (4)
 #define PADSIZE (IS_32_BIT * PAD_32 + IS_64_BIT * PAD_64)
+#define DIRECT_LOOKUP_BIT 0x10
+#define DIRECT_LOOKUP_SHIFT 5
+
 typedef struct _CommandList_struct {
   CommandListHeader_struct Header;
   RequestBlock_struct      Request;
@@ -195,7 +200,7 @@ typedef struct _CommandList_struct {
   struct completion *waiting;
   int   retry_count;
   void * scsi_cmd;
-  char   pad[PADSIZE];
+  char pad[PADSIZE];
 } CommandList_struct;
 
 /* Configuration Table Structure */
@@ -209,12 +214,15 @@ typedef struct _HostWrite_struct {
 typedef struct _CfgTable_struct {
   BYTE             Signature[4];
   DWORD            SpecValence;
+#define SIMPLE_MODE    0x02
+#define PERFORMANT_MODE        0x04
+#define MEMQ_MODE      0x08
   DWORD            TransportSupport;
   DWORD            TransportActive;
   HostWrite_struct HostWrite;
   DWORD            CmdsOutMax;
   DWORD            BusTypes;
-  DWORD            Reserved; 
+  DWORD            TransMethodOffset;
   BYTE             ServerName[16];
   DWORD            HeartBeat;
   DWORD            SCSI_Prefetch;
@@ -222,6 +230,28 @@ typedef struct _CfgTable_struct {
   DWORD            MaxLogicalUnits;
   DWORD            MaxPhysicalDrives;
   DWORD            MaxPhysicalDrivesPerLogicalUnit;
+  DWORD            MaxPerformantModeCommands;
+  u8              reserved[0x78 - 0x58];
+  u32             misc_fw_support; /* offset 0x78 */
+#define MISC_FW_DOORBELL_RESET (0x02)
 } CfgTable_struct;
+
+struct TransTable_struct {
+  u32 BlockFetch0;
+  u32 BlockFetch1;
+  u32 BlockFetch2;
+  u32 BlockFetch3;
+  u32 BlockFetch4;
+  u32 BlockFetch5;
+  u32 BlockFetch6;
+  u32 BlockFetch7;
+  u32 RepQSize;
+  u32 RepQCount;
+  u32 RepQCtrAddrLow32;
+  u32 RepQCtrAddrHigh32;
+  u32 RepQAddr0Low32;
+  u32 RepQAddr0High32;
+};
+
 #pragma pack()  
 #endif /* CCISS_CMD_H */
index 72dae92..575495f 100644 (file)
 #define CCISS_ABORT_MSG 0x00
 #define CCISS_RESET_MSG 0x01
 
-static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff,
+static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff,
        size_t size,
        __u8 page_code, unsigned char *scsi3addr,
        int cmd_type);
 
-static CommandList_struct *cmd_alloc(ctlr_info_t *h, int get_from_pool);
-static void cmd_free(ctlr_info_t *h, CommandList_struct *c, int got_from_pool);
+static CommandList_struct *cmd_alloc(ctlr_info_t *h);
+static CommandList_struct *cmd_special_alloc(ctlr_info_t *h);
+static void cmd_free(ctlr_info_t *h, CommandList_struct *c);
+static void cmd_special_free(ctlr_info_t *h, CommandList_struct *c);
 
 static int cciss_scsi_proc_info(
                struct Scsi_Host *sh,
@@ -93,8 +95,8 @@ static struct scsi_host_template cciss_driver_template = {
 
 #pragma pack(1)
 
-#define SCSI_PAD_32 0
-#define SCSI_PAD_64 0
+#define SCSI_PAD_32 8
+#define SCSI_PAD_64 8
 
 struct cciss_scsi_cmd_stack_elem_t {
        CommandList_struct cmd;
@@ -127,16 +129,16 @@ struct cciss_scsi_adapter_data_t {
        spinlock_t lock; // to protect ccissscsi[ctlr]; 
 };
 
-#define CPQ_TAPE_LOCK(ctlr, flags) spin_lock_irqsave( \
-       &hba[ctlr]->scsi_ctlr->lock, flags);
-#define CPQ_TAPE_UNLOCK(ctlr, flags) spin_unlock_irqrestore( \
-       &hba[ctlr]->scsi_ctlr->lock, flags);
+#define CPQ_TAPE_LOCK(h, flags) spin_lock_irqsave( \
+       &h->scsi_ctlr->lock, flags);
+#define CPQ_TAPE_UNLOCK(h, flags) spin_unlock_irqrestore( \
+       &h->scsi_ctlr->lock, flags);
 
 static CommandList_struct *
 scsi_cmd_alloc(ctlr_info_t *h)
 {
        /* assume only one process in here at a time, locking done by caller. */
-       /* use CCISS_LOCK(ctlr) */
+       /* use h->lock */
        /* might be better to rewrite how we allocate scsi commands in a way that */
        /* needs no locking at all. */
 
@@ -177,10 +179,10 @@ scsi_cmd_alloc(ctlr_info_t *h)
 }
 
 static void 
-scsi_cmd_free(ctlr_info_t *h, CommandList_struct *cmd)
+scsi_cmd_free(ctlr_info_t *h, CommandList_struct *c)
 {
        /* assume only one process in here at a time, locking done by caller. */
-       /* use CCISS_LOCK(ctlr) */
+       /* use h->lock */
        /* drop the free memory chunk on top of the stack. */
 
        struct cciss_scsi_adapter_data_t *sa;
@@ -190,22 +192,23 @@ scsi_cmd_free(ctlr_info_t *h, CommandList_struct *cmd)
        stk = &sa->cmd_stack; 
        stk->top++;
        if (stk->top >= CMD_STACK_SIZE) {
-               printk("cciss: scsi_cmd_free called too many times.\n");
+               dev_err(&h->pdev->dev,
+                       "scsi_cmd_free called too many times.\n");
                BUG();
        }
-       stk->elem[stk->top] = (struct cciss_scsi_cmd_stack_elem_t *) cmd;
+       stk->elem[stk->top] = (struct cciss_scsi_cmd_stack_elem_t *) c;
 }
 
 static int
-scsi_cmd_stack_setup(int ctlr, struct cciss_scsi_adapter_data_t *sa)
+scsi_cmd_stack_setup(ctlr_info_t *h, struct cciss_scsi_adapter_data_t *sa)
 {
        int i;
        struct cciss_scsi_cmd_stack_t *stk;
        size_t size;
 
-       sa->cmd_sg_list = cciss_allocate_sg_chain_blocks(hba[ctlr],
-               hba[ctlr]->chainsize, CMD_STACK_SIZE);
-       if (!sa->cmd_sg_list && hba[ctlr]->chainsize > 0)
+       sa->cmd_sg_list = cciss_allocate_sg_chain_blocks(h,
+               h->chainsize, CMD_STACK_SIZE);
+       if (!sa->cmd_sg_list && h->chainsize > 0)
                return -ENOMEM;
 
        stk = &sa->cmd_stack; 
@@ -215,7 +218,7 @@ scsi_cmd_stack_setup(int ctlr, struct cciss_scsi_adapter_data_t *sa)
        BUILD_BUG_ON((sizeof(*stk->pool) % COMMANDLIST_ALIGNMENT) != 0);
        /* pci_alloc_consistent guarantees 32-bit DMA address will be used */
        stk->pool = (struct cciss_scsi_cmd_stack_elem_t *)
-               pci_alloc_consistent(hba[ctlr]->pdev, size, &stk->cmd_pool_handle);
+               pci_alloc_consistent(h->pdev, size, &stk->cmd_pool_handle);
 
        if (stk->pool == NULL) {
                cciss_free_sg_chain_blocks(sa->cmd_sg_list, CMD_STACK_SIZE);
@@ -234,23 +237,22 @@ scsi_cmd_stack_setup(int ctlr, struct cciss_scsi_adapter_data_t *sa)
 }
 
 static void
-scsi_cmd_stack_free(int ctlr)
+scsi_cmd_stack_free(ctlr_info_t *h)
 {
        struct cciss_scsi_adapter_data_t *sa;
        struct cciss_scsi_cmd_stack_t *stk;
        size_t size;
 
-       sa = hba[ctlr]->scsi_ctlr;
+       sa = h->scsi_ctlr;
        stk = &sa->cmd_stack; 
        if (stk->top != CMD_STACK_SIZE-1) {
-               printk( "cciss: %d scsi commands are still outstanding.\n",
+               dev_warn(&h->pdev->dev,
+                       "bug: %d scsi commands are still outstanding.\n",
                        CMD_STACK_SIZE - stk->top);
-               // BUG();
-               printk("WE HAVE A BUG HERE!!! stk=0x%p\n", stk);
        }
        size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * CMD_STACK_SIZE;
 
-       pci_free_consistent(hba[ctlr]->pdev, size, stk->pool, stk->cmd_pool_handle);
+       pci_free_consistent(h->pdev, size, stk->pool, stk->cmd_pool_handle);
        stk->pool = NULL;
        cciss_free_sg_chain_blocks(sa->cmd_sg_list, CMD_STACK_SIZE);
 }
@@ -342,20 +344,20 @@ print_cmd(CommandList_struct *cp)
 #endif
 
 static int 
-find_bus_target_lun(int ctlr, int *bus, int *target, int *lun)
+find_bus_target_lun(ctlr_info_t *h, int *bus, int *target, int *lun)
 {
        /* finds an unused bus, target, lun for a new device */
-       /* assumes hba[ctlr]->scsi_ctlr->lock is held */ 
+       /* assumes h->scsi_ctlr->lock is held */
        int i, found=0;
        unsigned char target_taken[CCISS_MAX_SCSI_DEVS_PER_HBA];
 
        memset(&target_taken[0], 0, CCISS_MAX_SCSI_DEVS_PER_HBA);
 
        target_taken[SELF_SCSI_ID] = 1; 
-       for (i=0;i<ccissscsi[ctlr].ndevices;i++)
-               target_taken[ccissscsi[ctlr].dev[i].target] = 1;
+       for (i = 0; i < ccissscsi[h->ctlr].ndevices; i++)
+               target_taken[ccissscsi[h->ctlr].dev[i].target] = 1;
        
-       for (i=0;i<CCISS_MAX_SCSI_DEVS_PER_HBA;i++) {
+       for (i = 0; i < CCISS_MAX_SCSI_DEVS_PER_HBA; i++) {
                if (!target_taken[i]) {
                        *bus = 0; *target=i; *lun = 0; found=1;
                        break;
@@ -369,19 +371,19 @@ struct scsi2map {
 };
 
 static int 
-cciss_scsi_add_entry(int ctlr, int hostno, 
+cciss_scsi_add_entry(ctlr_info_t *h, int hostno,
                struct cciss_scsi_dev_t *device,
                struct scsi2map *added, int *nadded)
 {
-       /* assumes hba[ctlr]->scsi_ctlr->lock is held */ 
-       int n = ccissscsi[ctlr].ndevices;
+       /* assumes h->scsi_ctlr->lock is held */
+       int n = ccissscsi[h->ctlr].ndevices;
        struct cciss_scsi_dev_t *sd;
        int i, bus, target, lun;
        unsigned char addr1[8], addr2[8];
 
        if (n >= CCISS_MAX_SCSI_DEVS_PER_HBA) {
-               printk("cciss%d: Too many devices, "
-                       "some will be inaccessible.\n", ctlr);
+               dev_warn(&h->pdev->dev, "Too many devices, "
+                       "some will be inaccessible.\n");
                return -1;
        }
 
@@ -397,7 +399,7 @@ cciss_scsi_add_entry(int ctlr, int hostno,
                memcpy(addr1, device->scsi3addr, 8);
                addr1[4] = 0;
                for (i = 0; i < n; i++) {
-                       sd = &ccissscsi[ctlr].dev[i];
+                       sd = &ccissscsi[h->ctlr].dev[i];
                        memcpy(addr2, sd->scsi3addr, 8);
                        addr2[4] = 0;
                        /* differ only in byte 4? */
@@ -410,9 +412,9 @@ cciss_scsi_add_entry(int ctlr, int hostno,
                }
        }
 
-       sd = &ccissscsi[ctlr].dev[n];
+       sd = &ccissscsi[h->ctlr].dev[n];
        if (lun == 0) {
-               if (find_bus_target_lun(ctlr,
+               if (find_bus_target_lun(h,
                        &sd->bus, &sd->target, &sd->lun) != 0)
                        return -1;
        } else {
@@ -431,37 +433,37 @@ cciss_scsi_add_entry(int ctlr, int hostno,
        memcpy(sd->device_id, device->device_id, sizeof(sd->device_id));
        sd->devtype = device->devtype;
 
-       ccissscsi[ctlr].ndevices++;
+       ccissscsi[h->ctlr].ndevices++;
 
        /* initially, (before registering with scsi layer) we don't 
           know our hostno and we don't want to print anything first 
           time anyway (the scsi layer's inquiries will show that info) */
        if (hostno != -1)
-               printk("cciss%d: %s device c%db%dt%dl%d added.\n", 
-                       ctlr, scsi_device_type(sd->devtype), hostno,
+               dev_info(&h->pdev->dev, "%s device c%db%dt%dl%d added.\n",
+                       scsi_device_type(sd->devtype), hostno,
                        sd->bus, sd->target, sd->lun);
        return 0;
 }
 
 static void
-cciss_scsi_remove_entry(int ctlr, int hostno, int entry,
+cciss_scsi_remove_entry(ctlr_info_t *h, int hostno, int entry,
        struct scsi2map *removed, int *nremoved)
 {
-       /* assumes hba[ctlr]->scsi_ctlr->lock is held */ 
+       /* assumes h->ctlr]->scsi_ctlr->lock is held */
        int i;
        struct cciss_scsi_dev_t sd;
 
        if (entry < 0 || entry >= CCISS_MAX_SCSI_DEVS_PER_HBA) return;
-       sd = ccissscsi[ctlr].dev[entry];
+       sd = ccissscsi[h->ctlr].dev[entry];
        removed[*nremoved].bus    = sd.bus;
        removed[*nremoved].target = sd.target;
        removed[*nremoved].lun    = sd.lun;
        (*nremoved)++;
-       for (i=entry;i<ccissscsi[ctlr].ndevices-1;i++)
-               ccissscsi[ctlr].dev[i] = ccissscsi[ctlr].dev[i+1];
-       ccissscsi[ctlr].ndevices--;
-       printk("cciss%d: %s device c%db%dt%dl%d removed.\n",
-               ctlr, scsi_device_type(sd.devtype), hostno,
+       for (i = entry; i < ccissscsi[h->ctlr].ndevices-1; i++)
+               ccissscsi[h->ctlr].dev[i] = ccissscsi[h->ctlr].dev[i+1];
+       ccissscsi[h->ctlr].ndevices--;
+       dev_info(&h->pdev->dev, "%s device c%db%dt%dl%d removed.\n",
+               scsi_device_type(sd.devtype), hostno,
                        sd.bus, sd.target, sd.lun);
 }
 
@@ -476,24 +478,24 @@ cciss_scsi_remove_entry(int ctlr, int hostno, int entry,
        (a)[1] == (b)[1] && \
        (a)[0] == (b)[0])
 
-static void fixup_botched_add(int ctlr, char *scsi3addr)
+static void fixup_botched_add(ctlr_info_t *h, char *scsi3addr)
 {
        /* called when scsi_add_device fails in order to re-adjust */
        /* ccissscsi[] to match the mid layer's view. */
        unsigned long flags;
        int i, j;
-       CPQ_TAPE_LOCK(ctlr, flags);
-       for (i = 0; i < ccissscsi[ctlr].ndevices; i++) {
+       CPQ_TAPE_LOCK(h, flags);
+       for (i = 0; i < ccissscsi[h->ctlr].ndevices; i++) {
                if (memcmp(scsi3addr,
-                               ccissscsi[ctlr].dev[i].scsi3addr, 8) == 0) {
-                       for (j = i; j < ccissscsi[ctlr].ndevices-1; j++)
-                               ccissscsi[ctlr].dev[j] =
-                                       ccissscsi[ctlr].dev[j+1];
-                       ccissscsi[ctlr].ndevices--;
+                               ccissscsi[h->ctlr].dev[i].scsi3addr, 8) == 0) {
+                       for (j = i; j < ccissscsi[h->ctlr].ndevices-1; j++)
+                               ccissscsi[h->ctlr].dev[j] =
+                                       ccissscsi[h->ctlr].dev[j+1];
+                       ccissscsi[h->ctlr].ndevices--;
                        break;
                }
        }
-       CPQ_TAPE_UNLOCK(ctlr, flags);
+       CPQ_TAPE_UNLOCK(h, flags);
 }
 
 static int device_is_the_same(struct cciss_scsi_dev_t *dev1,
@@ -513,7 +515,7 @@ static int device_is_the_same(struct cciss_scsi_dev_t *dev1,
 }
 
 static int
-adjust_cciss_scsi_table(int ctlr, int hostno,
+adjust_cciss_scsi_table(ctlr_info_t *h, int hostno,
        struct cciss_scsi_dev_t sd[], int nsds)
 {
        /* sd contains scsi3 addresses and devtypes, but
@@ -534,15 +536,15 @@ adjust_cciss_scsi_table(int ctlr, int hostno,
                        GFP_KERNEL);
 
        if (!added || !removed) {
-               printk(KERN_WARNING "cciss%d: Out of memory in "
-                       "adjust_cciss_scsi_table\n", ctlr);
+               dev_warn(&h->pdev->dev,
+                       "Out of memory in adjust_cciss_scsi_table\n");
                goto free_and_out;
        }
 
-       CPQ_TAPE_LOCK(ctlr, flags);
+       CPQ_TAPE_LOCK(h, flags);
 
        if (hostno != -1)  /* if it's not the first time... */
-               sh = hba[ctlr]->scsi_ctlr->scsi_host;
+               sh = h->scsi_ctlr->scsi_host;
 
        /* find any devices in ccissscsi[] that are not in 
           sd[] and remove them from ccissscsi[] */
@@ -550,8 +552,8 @@ adjust_cciss_scsi_table(int ctlr, int hostno,
        i = 0;
        nremoved = 0;
        nadded = 0;
-       while(i<ccissscsi[ctlr].ndevices) {
-               csd = &ccissscsi[ctlr].dev[i];
+       while (i < ccissscsi[h->ctlr].ndevices) {
+               csd = &ccissscsi[h->ctlr].dev[i];
                found=0;
                for (j=0;j<nsds;j++) {
                        if (SCSI3ADDR_EQ(sd[j].scsi3addr,
@@ -566,20 +568,18 @@ adjust_cciss_scsi_table(int ctlr, int hostno,
 
                if (found == 0) { /* device no longer present. */ 
                        changes++;
-                       /* printk("cciss%d: %s device c%db%dt%dl%d removed.\n",
-                               ctlr, scsi_device_type(csd->devtype), hostno,
-                                       csd->bus, csd->target, csd->lun); */
-                       cciss_scsi_remove_entry(ctlr, hostno, i,
+                       cciss_scsi_remove_entry(h, hostno, i,
                                removed, &nremoved);
                        /* remove ^^^, hence i not incremented */
                } else if (found == 1) { /* device is different in some way */
                        changes++;
-                       printk("cciss%d: device c%db%dt%dl%d has changed.\n",
-                               ctlr, hostno, csd->bus, csd->target, csd->lun);
-                       cciss_scsi_remove_entry(ctlr, hostno, i,
+                       dev_info(&h->pdev->dev,
+                               "device c%db%dt%dl%d has changed.\n",
+                               hostno, csd->bus, csd->target, csd->lun);
+                       cciss_scsi_remove_entry(h, hostno, i,
                                removed, &nremoved);
                        /* remove ^^^, hence i not incremented */
-                       if (cciss_scsi_add_entry(ctlr, hostno, &sd[j],
+                       if (cciss_scsi_add_entry(h, hostno, &sd[j],
                                added, &nadded) != 0)
                                /* we just removed one, so add can't fail. */
                                        BUG();
@@ -601,8 +601,8 @@ adjust_cciss_scsi_table(int ctlr, int hostno,
 
        for (i=0;i<nsds;i++) {
                found=0;
-               for (j=0;j<ccissscsi[ctlr].ndevices;j++) {
-                       csd = &ccissscsi[ctlr].dev[j];
+               for (j = 0; j < ccissscsi[h->ctlr].ndevices; j++) {
+                       csd = &ccissscsi[h->ctlr].dev[j];
                        if (SCSI3ADDR_EQ(sd[i].scsi3addr,
                                csd->scsi3addr)) {
                                if (device_is_the_same(&sd[i], csd))
@@ -614,18 +614,18 @@ adjust_cciss_scsi_table(int ctlr, int hostno,
                }
                if (!found) {
                        changes++;
-                       if (cciss_scsi_add_entry(ctlr, hostno, &sd[i],
+                       if (cciss_scsi_add_entry(h, hostno, &sd[i],
                                added, &nadded) != 0)
                                break;
                } else if (found == 1) {
                        /* should never happen... */
                        changes++;
-                       printk(KERN_WARNING "cciss%d: device "
-                               "unexpectedly changed\n", ctlr);
+                       dev_warn(&h->pdev->dev,
+                               "device unexpectedly changed\n");
                        /* but if it does happen, we just ignore that device */
                }
        }
-       CPQ_TAPE_UNLOCK(ctlr, flags);
+       CPQ_TAPE_UNLOCK(h, flags);
 
        /* Don't notify scsi mid layer of any changes the first time through */
        /* (or if there are no changes) scsi_scan_host will do it later the */
@@ -645,9 +645,9 @@ adjust_cciss_scsi_table(int ctlr, int hostno,
                        /* We don't expect to get here. */
                        /* future cmds to this device will get selection */
                        /* timeout as if the device was gone. */
-                       printk(KERN_WARNING "cciss%d: didn't find "
+                       dev_warn(&h->pdev->dev, "didn't find "
                                "c%db%dt%dl%d\n for removal.",
-                               ctlr, hostno, removed[i].bus,
+                               hostno, removed[i].bus,
                                removed[i].target, removed[i].lun);
                }
        }
@@ -659,13 +659,12 @@ adjust_cciss_scsi_table(int ctlr, int hostno,
                        added[i].target, added[i].lun);
                if (rc == 0)
                        continue;
-               printk(KERN_WARNING "cciss%d: scsi_add_device "
+               dev_warn(&h->pdev->dev, "scsi_add_device "
                        "c%db%dt%dl%d failed, device not added.\n",
-                       ctlr, hostno,
-                       added[i].bus, added[i].target, added[i].lun);
+                       hostno, added[i].bus, added[i].target, added[i].lun);
                /* now we have to remove it from ccissscsi, */
                /* since it didn't get added to scsi mid layer */
-               fixup_botched_add(ctlr, added[i].scsi3addr);
+               fixup_botched_add(h, added[i].scsi3addr);
        }
 
 free_and_out:
@@ -675,33 +674,33 @@ free_and_out:
 }
 
 static int
-lookup_scsi3addr(int ctlr, int bus, int target, int lun, char *scsi3addr)
+lookup_scsi3addr(ctlr_info_t *h, int bus, int target, int lun, char *scsi3addr)
 {
        int i;
        struct cciss_scsi_dev_t *sd;
        unsigned long flags;
 
-       CPQ_TAPE_LOCK(ctlr, flags);
-       for (i=0;i<ccissscsi[ctlr].ndevices;i++) {
-               sd = &ccissscsi[ctlr].dev[i];
+       CPQ_TAPE_LOCK(h, flags);
+       for (i = 0; i < ccissscsi[h->ctlr].ndevices; i++) {
+               sd = &ccissscsi[h->ctlr].dev[i];
                if (sd->bus == bus &&
                    sd->target == target &&
                    sd->lun == lun) {
                        memcpy(scsi3addr, &sd->scsi3addr[0], 8);
-                       CPQ_TAPE_UNLOCK(ctlr, flags);
+                       CPQ_TAPE_UNLOCK(h, flags);
                        return 0;
                }
        }
-       CPQ_TAPE_UNLOCK(ctlr, flags);
+       CPQ_TAPE_UNLOCK(h, flags);
        return -1;
 }
 
 static void 
-cciss_scsi_setup(int cntl_num)
+cciss_scsi_setup(ctlr_info_t *h)
 {
        struct cciss_scsi_adapter_data_t * shba;
 
-       ccissscsi[cntl_num].ndevices = 0;
+       ccissscsi[h->ctlr].ndevices = 0;
        shba = (struct cciss_scsi_adapter_data_t *)
                kmalloc(sizeof(*shba), GFP_KERNEL);     
        if (shba == NULL)
@@ -709,35 +708,35 @@ cciss_scsi_setup(int cntl_num)
        shba->scsi_host = NULL;
        spin_lock_init(&shba->lock);
        shba->registered = 0;
-       if (scsi_cmd_stack_setup(cntl_num, shba) != 0) {
+       if (scsi_cmd_stack_setup(h, shba) != 0) {
                kfree(shba);
                shba = NULL;
        }
-       hba[cntl_num]->scsi_ctlr = shba;
+       h->scsi_ctlr = shba;
        return;
 }
 
-static void
-complete_scsi_command( CommandList_struct *cp, int timeout, __u32 tag)
+static void complete_scsi_command(CommandList_struct *c, int timeout,
+       __u32 tag)
 {
        struct scsi_cmnd *cmd;
-       ctlr_info_t *ctlr;
+       ctlr_info_t *h;
        ErrorInfo_struct *ei;
 
-       ei = cp->err_info;
+       ei = c->err_info;
 
        /* First, see if it was a message rather than a command */
-       if (cp->Request.Type.Type == TYPE_MSG)  {
-               cp->cmd_type = CMD_MSG_DONE;
+       if (c->Request.Type.Type == TYPE_MSG)  {
+               c->cmd_type = CMD_MSG_DONE;
                return;
        }
 
-       cmd = (struct scsi_cmnd *) cp->scsi_cmd;        
-       ctlr = hba[cp->ctlr];
+       cmd = (struct scsi_cmnd *) c->scsi_cmd;
+       h = hba[c->ctlr];
 
        scsi_dma_unmap(cmd);
-       if (cp->Header.SGTotal > ctlr->max_cmd_sgentries)
-               cciss_unmap_sg_chain_block(ctlr, cp);
+       if (c->Header.SGTotal > h->max_cmd_sgentries)
+               cciss_unmap_sg_chain_block(h, c);
 
        cmd->result = (DID_OK << 16);           /* host byte */
        cmd->result |= (COMMAND_COMPLETE << 8); /* msg byte */
@@ -764,9 +763,8 @@ complete_scsi_command( CommandList_struct *cp, int timeout, __u32 tag)
                                {
 #if 0
                                        printk(KERN_WARNING "cciss: cmd %p "
-                                       "has SCSI Status = %x\n",
-                                               cp,  
-                                               ei->ScsiStatus); 
+                                               "has SCSI Status = %x\n",
+                                               c, ei->ScsiStatus);
 #endif
                                        cmd->result |= (ei->ScsiStatus << 1);
                                }
@@ -786,13 +784,13 @@ complete_scsi_command( CommandList_struct *cp, int timeout, __u32 tag)
                        case CMD_DATA_UNDERRUN: /* let mid layer handle it. */
                        break;
                        case CMD_DATA_OVERRUN:
-                               printk(KERN_WARNING "cciss: cp %p has"
+                               dev_warn(&h->pdev->dev, "%p has"
                                        " completed with data overrun "
-                                       "reported\n", cp);
+                                       "reported\n", c);
                        break;
                        case CMD_INVALID: {
-                               /* print_bytes(cp, sizeof(*cp), 1, 0);
-                               print_cmd(cp); */
+                               /* print_bytes(c, sizeof(*c), 1, 0);
+                               print_cmd(c); */
      /* We get CMD_INVALID if you address a non-existent tape drive instead
        of a selection timeout (no response).  You will see this if you yank 
        out a tape drive, then try to access it. This is kind of a shame
@@ -802,54 +800,50 @@ complete_scsi_command( CommandList_struct *cp, int timeout, __u32 tag)
                                }
                        break;
                        case CMD_PROTOCOL_ERR:
-                                printk(KERN_WARNING "cciss: cp %p has "
-                                       "protocol error \n", cp);
+                               dev_warn(&h->pdev->dev,
+                                       "%p has protocol error\n", c);
                         break;
                        case CMD_HARDWARE_ERR:
                                cmd->result = DID_ERROR << 16;
-                                printk(KERN_WARNING "cciss: cp %p had " 
-                                        " hardware error\n", cp);
+                               dev_warn(&h->pdev->dev,
+                                       "%p had hardware error\n", c);
                         break;
                        case CMD_CONNECTION_LOST:
                                cmd->result = DID_ERROR << 16;
-                               printk(KERN_WARNING "cciss: cp %p had "
-                                       "connection lost\n", cp);
+                               dev_warn(&h->pdev->dev,
+                                       "%p had connection lost\n", c);
                        break;
                        case CMD_ABORTED:
                                cmd->result = DID_ABORT << 16;
-                               printk(KERN_WARNING "cciss: cp %p was "
-                                       "aborted\n", cp);
+                               dev_warn(&h->pdev->dev, "%p was aborted\n", c);
                        break;
                        case CMD_ABORT_FAILED:
                                cmd->result = DID_ERROR << 16;
-                               printk(KERN_WARNING "cciss: cp %p reports "
-                                       "abort failed\n", cp);
+                               dev_warn(&h->pdev->dev,
+                                       "%p reports abort failed\n", c);
                        break;
                        case CMD_UNSOLICITED_ABORT:
                                cmd->result = DID_ABORT << 16;
-                               printk(KERN_WARNING "cciss: cp %p aborted "
-                                       "do to an unsolicited abort\n", cp);
+                               dev_warn(&h->pdev->dev, "%p aborted do to an "
+                                       "unsolicited abort\n", c);
                        break;
                        case CMD_TIMEOUT:
                                cmd->result = DID_TIME_OUT << 16;
-                               printk(KERN_WARNING "cciss: cp %p timedout\n",
-                                       cp);
+                               dev_warn(&h->pdev->dev, "%p timedout\n", c);
                        break;
                        default:
                                cmd->result = DID_ERROR << 16;
-                               printk(KERN_WARNING "cciss: cp %p returned "
-                                       "unknown status %x\n", cp, 
+                               dev_warn(&h->pdev->dev,
+                                       "%p returned unknown status %x\n", c,
                                                ei->CommandStatus); 
                }
        }
-       // printk("c:%p:c%db%dt%dl%d ", cmd, ctlr->ctlr, cmd->channel, 
-       //      cmd->target, cmd->lun);
        cmd->scsi_done(cmd);
-       scsi_cmd_free(ctlr, cp);
+       scsi_cmd_free(h, c);
 }
 
 static int
-cciss_scsi_detect(int ctlr)
+cciss_scsi_detect(ctlr_info_t *h)
 {
        struct Scsi_Host *sh;
        int error;
@@ -860,15 +854,15 @@ cciss_scsi_detect(int ctlr)
        sh->io_port = 0;        // good enough?  FIXME, 
        sh->n_io_port = 0;      // I don't think we use these two...
        sh->this_id = SELF_SCSI_ID;  
-       sh->sg_tablesize = hba[ctlr]->maxsgentries;
+       sh->sg_tablesize = h->maxsgentries;
        sh->max_cmd_len = MAX_COMMAND_SIZE;
 
        ((struct cciss_scsi_adapter_data_t *) 
-               hba[ctlr]->scsi_ctlr)->scsi_host = sh;
-       sh->hostdata[0] = (unsigned long) hba[ctlr];
-       sh->irq = hba[ctlr]->intr[SIMPLE_MODE_INT];
+               h->scsi_ctlr)->scsi_host = sh;
+       sh->hostdata[0] = (unsigned long) h;
+       sh->irq = h->intr[SIMPLE_MODE_INT];
        sh->unique_id = sh->irq;
-       error = scsi_add_host(sh, &hba[ctlr]->pdev->dev);
+       error = scsi_add_host(sh, &h->pdev->dev);
        if (error)
                goto fail_host_put;
        scsi_scan_host(sh);
@@ -882,20 +876,20 @@ cciss_scsi_detect(int ctlr)
 
 static void
 cciss_unmap_one(struct pci_dev *pdev,
-               CommandList_struct *cp,
+               CommandList_struct *c,
                size_t buflen,
                int data_direction)
 {
        u64bit addr64;
 
-       addr64.val32.lower = cp->SG[0].Addr.lower;
-       addr64.val32.upper = cp->SG[0].Addr.upper;
+       addr64.val32.lower = c->SG[0].Addr.lower;
+       addr64.val32.upper = c->SG[0].Addr.upper;
        pci_unmap_single(pdev, (dma_addr_t) addr64.val, buflen, data_direction);
 }
 
 static void
 cciss_map_one(struct pci_dev *pdev,
-               CommandList_struct *cp,
+               CommandList_struct *c,
                unsigned char *buf,
                size_t buflen,
                int data_direction)
@@ -903,164 +897,149 @@ cciss_map_one(struct pci_dev *pdev,
        __u64 addr64;
 
        addr64 = (__u64) pci_map_single(pdev, buf, buflen, data_direction);
-       cp->SG[0].Addr.lower = 
+       c->SG[0].Addr.lower =
          (__u32) (addr64 & (__u64) 0x00000000FFFFFFFF);
-       cp->SG[0].Addr.upper =
+       c->SG[0].Addr.upper =
          (__u32) ((addr64 >> 32) & (__u64) 0x00000000FFFFFFFF);
-       cp->SG[0].Len = buflen;
-       cp->Header.SGList = (__u8) 1;   /* no. SGs contig in this cmd */
-       cp->Header.SGTotal = (__u16) 1; /* total sgs in this cmd list */
+       c->SG[0].Len = buflen;
+       c->Header.SGList = (__u8) 1;   /* no. SGs contig in this cmd */
+       c->Header.SGTotal = (__u16) 1; /* total sgs in this cmd list */
 }
 
 static int
-cciss_scsi_do_simple_cmd(ctlr_info_t *c,
-                       CommandList_struct *cp,
+cciss_scsi_do_simple_cmd(ctlr_info_t *h,
+                       CommandList_struct *c,
                        unsigned char *scsi3addr, 
                        unsigned char *cdb,
                        unsigned char cdblen,
                        unsigned char *buf, int bufsize,
                        int direction)
 {
-       unsigned long flags;
        DECLARE_COMPLETION_ONSTACK(wait);
 
-       cp->cmd_type = CMD_IOCTL_PEND;          // treat this like an ioctl 
-       cp->scsi_cmd = NULL;
-       cp->Header.ReplyQueue = 0;  // unused in simple mode
-       memcpy(&cp->Header.LUN, scsi3addr, sizeof(cp->Header.LUN));
-       cp->Header.Tag.lower = cp->busaddr;  // Use k. address of cmd as tag
+       c->cmd_type = CMD_IOCTL_PEND; /* treat this like an ioctl */
+       c->scsi_cmd = NULL;
+       c->Header.ReplyQueue = 0;  /* unused in simple mode */
+       memcpy(&c->Header.LUN, scsi3addr, sizeof(c->Header.LUN));
+       c->Header.Tag.lower = c->busaddr;  /* Use k. address of cmd as tag */
        // Fill in the request block...
 
        /* printk("Using scsi3addr 0x%02x%0x2%0x2%0x2%0x2%0x2%0x2%0x2\n", 
                scsi3addr[0], scsi3addr[1], scsi3addr[2], scsi3addr[3],
                scsi3addr[4], scsi3addr[5], scsi3addr[6], scsi3addr[7]); */
 
-       memset(cp->Request.CDB, 0, sizeof(cp->Request.CDB));
-       memcpy(cp->Request.CDB, cdb, cdblen);
-       cp->Request.Timeout = 0;
-       cp->Request.CDBLen = cdblen;
-       cp->Request.Type.Type = TYPE_CMD;
-       cp->Request.Type.Attribute = ATTR_SIMPLE;
-       cp->Request.Type.Direction = direction;
+       memset(c->Request.CDB, 0, sizeof(c->Request.CDB));
+       memcpy(c->Request.CDB, cdb, cdblen);
+       c->Request.Timeout = 0;
+       c->Request.CDBLen = cdblen;
+       c->Request.Type.Type = TYPE_CMD;
+       c->Request.Type.Attribute = ATTR_SIMPLE;
+       c->Request.Type.Direction = direction;
 
        /* Fill in the SG list and do dma mapping */
-       cciss_map_one(c->pdev, cp, (unsigned char *) buf,
+       cciss_map_one(h->pdev, c, (unsigned char *) buf,
                        bufsize, DMA_FROM_DEVICE); 
 
-       cp->waiting = &wait;
-
-       /* Put the request on the tail of the request queue */
-       spin_lock_irqsave(CCISS_LOCK(c->ctlr), flags);
-       addQ(&c->reqQ, cp);
-       c->Qdepth++;
-       start_io(c);
-       spin_unlock_irqrestore(CCISS_LOCK(c->ctlr), flags);
-
+       c->waiting = &wait;
+       enqueue_cmd_and_start_io(h, c);
        wait_for_completion(&wait);
 
        /* undo the dma mapping */
-       cciss_unmap_one(c->pdev, cp, bufsize, DMA_FROM_DEVICE);
+       cciss_unmap_one(h->pdev, c, bufsize, DMA_FROM_DEVICE);
        return(0);
 }
 
 static void 
-cciss_scsi_interpret_error(CommandList_struct *cp)
+cciss_scsi_interpret_error(ctlr_info_t *h, CommandList_struct *c)
 {
        ErrorInfo_struct *ei;
 
-       ei = cp->err_info; 
+       ei = c->err_info;
        switch(ei->CommandStatus)
        {
                case CMD_TARGET_STATUS:
-                       printk(KERN_WARNING "cciss: cmd %p has "
-                               "completed with errors\n", cp);
-                       printk(KERN_WARNING "cciss: cmd %p "
-                               "has SCSI Status = %x\n",
-                                       cp,  
-                                       ei->ScsiStatus);
+                       dev_warn(&h->pdev->dev,
+                               "cmd %p has completed with errors\n", c);
+                       dev_warn(&h->pdev->dev,
+                               "cmd %p has SCSI Status = %x\n",
+                               c, ei->ScsiStatus);
                        if (ei->ScsiStatus == 0)
-                               printk(KERN_WARNING 
-                               "cciss:SCSI status is abnormally zero.  "
+                               dev_warn(&h->pdev->dev,
+                               "SCSI status is abnormally zero.  "
                                "(probably indicates selection timeout "
                                "reported incorrectly due to a known "
                                "firmware bug, circa July, 2001.)\n");
                break;
                case CMD_DATA_UNDERRUN: /* let mid layer handle it. */
-                       printk("UNDERRUN\n");
+                       dev_info(&h->pdev->dev, "UNDERRUN\n");
                break;
                case CMD_DATA_OVERRUN:
-                       printk(KERN_WARNING "cciss: cp %p has"
+                       dev_warn(&h->pdev->dev, "%p has"
                                " completed with data overrun "
-                               "reported\n", cp);
+                               "reported\n", c);
                break;
                case CMD_INVALID: {
                        /* controller unfortunately reports SCSI passthru's */
                        /* to non-existent targets as invalid commands. */
-                       printk(KERN_WARNING "cciss: cp %p is "
-                               "reported invalid (probably means "
-                               "target device no longer present)\n", 
-                               cp); 
-                       /* print_bytes((unsigned char *) cp, sizeof(*cp), 1, 0);
-                       print_cmd(cp);  */
+                       dev_warn(&h->pdev->dev,
+                               "%p is reported invalid (probably means "
+                               "target device no longer present)\n", c);
+                       /* print_bytes((unsigned char *) c, sizeof(*c), 1, 0);
+                       print_cmd(c);  */
                        }
                break;
                case CMD_PROTOCOL_ERR:
-                       printk(KERN_WARNING "cciss: cp %p has "
-                               "protocol error \n", cp);
+                       dev_warn(&h->pdev->dev, "%p has protocol error\n", c);
                break;
                case CMD_HARDWARE_ERR:
                        /* cmd->result = DID_ERROR << 16; */
-                       printk(KERN_WARNING "cciss: cp %p had " 
-                               " hardware error\n", cp);
+                       dev_warn(&h->pdev->dev, "%p had hardware error\n", c);
                break;
                case CMD_CONNECTION_LOST:
-                       printk(KERN_WARNING "cciss: cp %p had "
-                               "connection lost\n", cp);
+                       dev_warn(&h->pdev->dev, "%p had connection lost\n", c);
                break;
                case CMD_ABORTED:
-                       printk(KERN_WARNING "cciss: cp %p was "
-                               "aborted\n", cp);
+                       dev_warn(&h->pdev->dev, "%p was aborted\n", c);
                break;
                case CMD_ABORT_FAILED:
-                       printk(KERN_WARNING "cciss: cp %p reports "
-                               "abort failed\n", cp);
+                       dev_warn(&h->pdev->dev,
+                               "%p reports abort failed\n", c);
                break;
                case CMD_UNSOLICITED_ABORT:
-                       printk(KERN_WARNING "cciss: cp %p aborted "
-                               "do to an unsolicited abort\n", cp);
+                       dev_warn(&h->pdev->dev,
+                               "%p aborted do to an unsolicited abort\n", c);
                break;
                case CMD_TIMEOUT:
-                       printk(KERN_WARNING "cciss: cp %p timedout\n",
-                               cp);
+                       dev_warn(&h->pdev->dev, "%p timedout\n", c);
                break;
                default:
-                       printk(KERN_WARNING "cciss: cp %p returned "
-                               "unknown status %x\n", cp, 
-                                       ei->CommandStatus); 
+                       dev_warn(&h->pdev->dev,
+                               "%p returned unknown status %x\n",
+                               c, ei->CommandStatus);
        }
 }
 
 static int
-cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr, 
+cciss_scsi_do_inquiry(ctlr_info_t *h, unsigned char *scsi3addr,
        unsigned char page, unsigned char *buf,
        unsigned char bufsize)
 {
        int rc;
-       CommandList_struct *cp;
+       CommandList_struct *c;
        char cdb[6];
        ErrorInfo_struct *ei;
        unsigned long flags;
 
-       spin_lock_irqsave(CCISS_LOCK(c->ctlr), flags);
-       cp = scsi_cmd_alloc(c);
-       spin_unlock_irqrestore(CCISS_LOCK(c->ctlr), flags);
+       spin_lock_irqsave(&h->lock, flags);
+       c = scsi_cmd_alloc(h);
+       spin_unlock_irqrestore(&h->lock, flags);
 
-       if (cp == NULL) {                       /* trouble... */
+       if (c == NULL) {                        /* trouble... */
                printk("cmd_alloc returned NULL!\n");
                return -1;
        }
 
-       ei = cp->err_info; 
+       ei = c->err_info;
 
        cdb[0] = CISS_INQUIRY;
        cdb[1] = (page != 0);
@@ -1068,24 +1047,24 @@ cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr,
        cdb[3] = 0;
        cdb[4] = bufsize;
        cdb[5] = 0;
-       rc = cciss_scsi_do_simple_cmd(c, cp, scsi3addr, cdb, 
+       rc = cciss_scsi_do_simple_cmd(h, c, scsi3addr, cdb,
                                6, buf, bufsize, XFER_READ);
 
        if (rc != 0) return rc; /* something went wrong */
 
        if (ei->CommandStatus != 0 && 
            ei->CommandStatus != CMD_DATA_UNDERRUN) {
-               cciss_scsi_interpret_error(cp);
+               cciss_scsi_interpret_error(h, c);
                rc = -1;
        }
-       spin_lock_irqsave(CCISS_LOCK(c->ctlr), flags);
-       scsi_cmd_free(c, cp);
-       spin_unlock_irqrestore(CCISS_LOCK(c->ctlr), flags);
+       spin_lock_irqsave(&h->lock, flags);
+       scsi_cmd_free(h, c);
+       spin_unlock_irqrestore(&h->lock, flags);
        return rc;      
 }
 
 /* Get the device id from inquiry page 0x83 */
-static int cciss_scsi_get_device_id(ctlr_info_t *c, unsigned char *scsi3addr,
+static int cciss_scsi_get_device_id(ctlr_info_t *h, unsigned char *scsi3addr,
        unsigned char *device_id, int buflen)
 {
        int rc;
@@ -1096,7 +1075,7 @@ static int cciss_scsi_get_device_id(ctlr_info_t *c, unsigned char *scsi3addr,
        buf = kzalloc(64, GFP_KERNEL);
        if (!buf)
                return -1;
-       rc = cciss_scsi_do_inquiry(c, scsi3addr, 0x83, buf, 64);
+       rc = cciss_scsi_do_inquiry(h, scsi3addr, 0x83, buf, 64);
        if (rc == 0)
                memcpy(device_id, &buf[8], buflen);
        kfree(buf);
@@ -1104,20 +1083,20 @@ static int cciss_scsi_get_device_id(ctlr_info_t *c, unsigned char *scsi3addr,
 }
 
 static int
-cciss_scsi_do_report_phys_luns(ctlr_info_t *c, 
+cciss_scsi_do_report_phys_luns(ctlr_info_t *h,
                ReportLunData_struct *buf, int bufsize)
 {
        int rc;
-       CommandList_struct *cp;
+       CommandList_struct *c;
        unsigned char cdb[12];
        unsigned char scsi3addr[8]; 
        ErrorInfo_struct *ei;
        unsigned long flags;
 
-       spin_lock_irqsave(CCISS_LOCK(c->ctlr), flags);
-       cp = scsi_cmd_alloc(c);
-       spin_unlock_irqrestore(CCISS_LOCK(c->ctlr), flags);
-       if (cp == NULL) {                       /* trouble... */
+       spin_lock_irqsave(&h->lock, flags);
+       c = scsi_cmd_alloc(h);
+       spin_unlock_irqrestore(&h->lock, flags);
+       if (c == NULL) {                        /* trouble... */
                printk("cmd_alloc returned NULL!\n");
                return -1;
        }
@@ -1136,27 +1115,27 @@ cciss_scsi_do_report_phys_luns(ctlr_info_t *c,
        cdb[10] = 0;
        cdb[11] = 0;
 
-       rc = cciss_scsi_do_simple_cmd(c, cp, scsi3addr, 
+       rc = cciss_scsi_do_simple_cmd(h, c, scsi3addr,
                                cdb, 12, 
                                (unsigned char *) buf, 
                                bufsize, XFER_READ);
 
        if (rc != 0) return rc; /* something went wrong */
 
-       ei = cp->err_info; 
+       ei = c->err_info;
        if (ei->CommandStatus != 0 && 
            ei->CommandStatus != CMD_DATA_UNDERRUN) {
-               cciss_scsi_interpret_error(cp);
+               cciss_scsi_interpret_error(h, c);
                rc = -1;
        }
-       spin_lock_irqsave(CCISS_LOCK(c->ctlr), flags);
-       scsi_cmd_free(c, cp);
-       spin_unlock_irqrestore(CCISS_LOCK(c->ctlr), flags);
+       spin_lock_irqsave(&h->lock, flags);
+       scsi_cmd_free(h, c);
+       spin_unlock_irqrestore(&h->lock, flags);
        return rc;      
 }
 
 static void
-cciss_update_non_disk_devices(int cntl_num, int hostno)
+cciss_update_non_disk_devices(ctlr_info_t *h, int hostno)
 {
        /* the idea here is we could get notified from /proc
           that some devices have changed, so we do a report 
@@ -1189,7 +1168,6 @@ cciss_update_non_disk_devices(int cntl_num, int hostno)
        ReportLunData_struct *ld_buff;
        unsigned char *inq_buff;
        unsigned char scsi3addr[8];
-       ctlr_info_t *c;
        __u32 num_luns=0;
        unsigned char *ch;
        struct cciss_scsi_dev_t *currentsd, *this_device;
@@ -1197,7 +1175,6 @@ cciss_update_non_disk_devices(int cntl_num, int hostno)
        int reportlunsize = sizeof(*ld_buff) + CISS_MAX_PHYS_LUN * 8;
        int i;
 
-       c = (ctlr_info_t *) hba[cntl_num];      
        ld_buff = kzalloc(reportlunsize, GFP_KERNEL);
        inq_buff = kmalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL);
        currentsd = kzalloc(sizeof(*currentsd) *
@@ -1207,7 +1184,7 @@ cciss_update_non_disk_devices(int cntl_num, int hostno)
                goto out;
        }
        this_device = &currentsd[CCISS_MAX_SCSI_DEVS_PER_HBA];
-       if (cciss_scsi_do_report_phys_luns(c, ld_buff, reportlunsize) == 0) {
+       if (cciss_scsi_do_report_phys_luns(h, ld_buff, reportlunsize) == 0) {
                ch = &ld_buff->LUNListLength[0];
                num_luns = ((ch[0]<<24) | (ch[1]<<16) | (ch[2]<<8) | ch[3]) / 8;
                if (num_luns > CISS_MAX_PHYS_LUN) {
@@ -1231,7 +1208,7 @@ cciss_update_non_disk_devices(int cntl_num, int hostno)
                memset(inq_buff, 0, OBDR_TAPE_INQ_SIZE);
                memcpy(&scsi3addr[0], &ld_buff->LUN[i][0], 8);
 
-               if (cciss_scsi_do_inquiry(hba[cntl_num], scsi3addr, 0, inq_buff,
+               if (cciss_scsi_do_inquiry(h, scsi3addr, 0, inq_buff,
                        (unsigned char) OBDR_TAPE_INQ_SIZE) != 0)
                        /* Inquiry failed (msg printed already) */
                        continue; /* so we will skip this device. */
@@ -1249,7 +1226,7 @@ cciss_update_non_disk_devices(int cntl_num, int hostno)
                        sizeof(this_device->revision));
                memset(this_device->device_id, 0,
                        sizeof(this_device->device_id));
-               cciss_scsi_get_device_id(hba[cntl_num], scsi3addr,
+               cciss_scsi_get_device_id(h, scsi3addr,
                        this_device->device_id, sizeof(this_device->device_id));
 
                switch (this_device->devtype)
@@ -1276,7 +1253,7 @@ cciss_update_non_disk_devices(int cntl_num, int hostno)
                  case 0x08: /* medium changer */
                        if (ncurrent >= CCISS_MAX_SCSI_DEVS_PER_HBA) {
                                printk(KERN_INFO "cciss%d: %s ignored, "
-                                       "too many devices.\n", cntl_num,
+                                       "too many devices.\n", h->ctlr,
                                        scsi_device_type(this_device->devtype));
                                break;
                        }
@@ -1288,7 +1265,7 @@ cciss_update_non_disk_devices(int cntl_num, int hostno)
                }
        }
 
-       adjust_cciss_scsi_table(cntl_num, hostno, currentsd, ncurrent);
+       adjust_cciss_scsi_table(h, hostno, currentsd, ncurrent);
 out:
        kfree(inq_buff);
        kfree(ld_buff);
@@ -1307,12 +1284,12 @@ is_keyword(char *ptr, int len, char *verb)  // Thanks to ncr53c8xx.c
 }
 
 static int
-cciss_scsi_user_command(int ctlr, int hostno, char *buffer, int length)
+cciss_scsi_user_command(ctlr_info_t *h, int hostno, char *buffer, int length)
 {
        int arg_len;
 
        if ((arg_len = is_keyword(buffer, length, "rescan")) != 0)
-               cciss_update_non_disk_devices(ctlr, hostno);
+               cciss_update_non_disk_devices(h, hostno);
        else
                return -EINVAL;
        return length;
@@ -1329,20 +1306,16 @@ cciss_scsi_proc_info(struct Scsi_Host *sh,
 {
 
        int buflen, datalen;
-       ctlr_info_t *ci;
+       ctlr_info_t *h;
        int i;
-       int cntl_num;
-
 
-       ci = (ctlr_info_t *) sh->hostdata[0];
-       if (ci == NULL)  /* This really shouldn't ever happen. */
+       h = (ctlr_info_t *) sh->hostdata[0];
+       if (h == NULL)  /* This really shouldn't ever happen. */
                return -EINVAL;
 
-       cntl_num = ci->ctlr;    /* Get our index into the hba[] array */
-
        if (func == 0) {        /* User is reading from /proc/scsi/ciss*?/?*  */
                buflen = sprintf(buffer, "cciss%d: SCSI host: %d\n",
-                               cntl_num, sh->host_no);
+                               h->ctlr, sh->host_no);
 
                /* this information is needed by apps to know which cciss
                   device corresponds to which scsi host number without
@@ -1352,8 +1325,9 @@ cciss_scsi_proc_info(struct Scsi_Host *sh,
                   this info is for an app to be able to use to know how to
                   get them back in sync. */
 
-               for (i=0;i<ccissscsi[cntl_num].ndevices;i++) {
-                       struct cciss_scsi_dev_t *sd = &ccissscsi[cntl_num].dev[i];
+               for (i = 0; i < ccissscsi[h->ctlr].ndevices; i++) {
+                       struct cciss_scsi_dev_t *sd =
+                               &ccissscsi[h->ctlr].dev[i];
                        buflen += sprintf(&buffer[buflen], "c%db%dt%dl%d %02d "
                                "0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
                                sh->host_no, sd->bus, sd->target, sd->lun,
@@ -1371,15 +1345,15 @@ cciss_scsi_proc_info(struct Scsi_Host *sh,
                        *start = buffer + offset;
                return(datalen);
        } else  /* User is writing to /proc/scsi/cciss*?/?*  ... */
-               return cciss_scsi_user_command(cntl_num, sh->host_no,
+               return cciss_scsi_user_command(h, sh->host_no,
                        buffer, length);        
 } 
 
 /* cciss_scatter_gather takes a struct scsi_cmnd, (cmd), and does the pci 
    dma mapping  and fills in the scatter gather entries of the 
-   cciss command, cp. */
+   cciss command, c. */
 
-static void cciss_scatter_gather(ctlr_info_t *h, CommandList_struct *cp,
+static void cciss_scatter_gather(ctlr_info_t *h, CommandList_struct *c,
        struct scsi_cmnd *cmd)
 {
        unsigned int len;
@@ -1393,7 +1367,7 @@ static void cciss_scatter_gather(ctlr_info_t *h, CommandList_struct *cp,
 
        chained = 0;
        sg_index = 0;
-       curr_sg = cp->SG;
+       curr_sg = c->SG;
        request_nsgs = scsi_dma_map(cmd);
        if (request_nsgs) {
                scsi_for_each_sg(cmd, sg, request_nsgs, i) {
@@ -1401,7 +1375,7 @@ static void cciss_scatter_gather(ctlr_info_t *h, CommandList_struct *cp,
                                !chained && request_nsgs - i > 1) {
                                chained = 1;
                                sg_index = 0;
-                               curr_sg = sa->cmd_sg_list[cp->cmdindex];
+                               curr_sg = sa->cmd_sg_list[c->cmdindex];
                        }
                        addr64 = (__u64) sg_dma_address(sg);
                        len  = sg_dma_len(sg);
@@ -1414,19 +1388,19 @@ static void cciss_scatter_gather(ctlr_info_t *h, CommandList_struct *cp,
                        ++sg_index;
                }
                if (chained)
-                       cciss_map_sg_chain_block(h, cp,
-                               sa->cmd_sg_list[cp->cmdindex],
+                       cciss_map_sg_chain_block(h, c,
+                               sa->cmd_sg_list[c->cmdindex],
                                (request_nsgs - (h->max_cmd_sgentries - 1)) *
                                        sizeof(SGDescriptor_struct));
        }
        /* track how many SG entries we are using */
        if (request_nsgs > h->maxSG)
                h->maxSG = request_nsgs;
-       cp->Header.SGTotal = (__u8) request_nsgs + chained;
+       c->Header.SGTotal = (__u8) request_nsgs + chained;
        if (request_nsgs > h->max_cmd_sgentries)
-               cp->Header.SGList = h->max_cmd_sgentries;
+               c->Header.SGList = h->max_cmd_sgentries;
        else
-               cp->Header.SGList = cp->Header.SGTotal;
+               c->Header.SGList = c->Header.SGTotal;
        return;
 }
 
@@ -1434,18 +1408,17 @@ static void cciss_scatter_gather(ctlr_info_t *h, CommandList_struct *cp,
 static int
 cciss_scsi_queue_command (struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd *))
 {
-       ctlr_info_t *c;
-       int ctlr, rc;
+       ctlr_info_t *h;
+       int rc;
        unsigned char scsi3addr[8];
-       CommandList_struct *cp;
+       CommandList_struct *c;
        unsigned long flags;
 
        // Get the ptr to our adapter structure (hba[i]) out of cmd->host.
        // We violate cmd->host privacy here.  (Is there another way?)
-       c = (ctlr_info_t *) cmd->device->host->hostdata[0];
-       ctlr = c->ctlr;
+       h = (ctlr_info_t *) cmd->device->host->hostdata[0];
 
-       rc = lookup_scsi3addr(ctlr, cmd->device->channel, cmd->device->id, 
+       rc = lookup_scsi3addr(h, cmd->device->channel, cmd->device->id,
                        cmd->device->lun, scsi3addr);
        if (rc != 0) {
                /* the scsi nexus does not match any that we presented... */
@@ -1457,19 +1430,14 @@ cciss_scsi_queue_command (struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd
                return 0;
        }
 
-       /* printk("cciss_queue_command, p=%p, cmd=0x%02x, c%db%dt%dl%d\n", 
-               cmd, cmd->cmnd[0], ctlr, cmd->channel, cmd->target, cmd->lun);*/
-       // printk("q:%p:c%db%dt%dl%d ", cmd, ctlr, cmd->channel, 
-       //      cmd->target, cmd->lun);
-
        /* Ok, we have a reasonable scsi nexus, so send the cmd down, and
            see what the device thinks of it. */
 
-       spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
-       cp = scsi_cmd_alloc(c);
-       spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
-       if (cp == NULL) {                       /* trouble... */
-               printk("scsi_cmd_alloc returned NULL!\n");
+       spin_lock_irqsave(&h->lock, flags);
+       c = scsi_cmd_alloc(h);
+       spin_unlock_irqrestore(&h->lock, flags);
+       if (c == NULL) {                        /* trouble... */
+               dev_warn(&h->pdev->dev, "scsi_cmd_alloc returned NULL!\n");
                /* FIXME: next 3 lines are -> BAD! <- */
                cmd->result = DID_NO_CONNECT << 16;
                done(cmd);
@@ -1480,35 +1448,41 @@ cciss_scsi_queue_command (struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd
 
        cmd->scsi_done = done;    // save this for use by completion code 
 
-       // save cp in case we have to abort it 
-       cmd->host_scribble = (unsigned char *) cp; 
+       /* save c in case we have to abort it */
+       cmd->host_scribble = (unsigned char *) c;
 
-       cp->cmd_type = CMD_SCSI;
-       cp->scsi_cmd = cmd;
-       cp->Header.ReplyQueue = 0;  // unused in simple mode
-       memcpy(&cp->Header.LUN.LunAddrBytes[0], &scsi3addr[0], 8);
-       cp->Header.Tag.lower = cp->busaddr;  // Use k. address of cmd as tag
+       c->cmd_type = CMD_SCSI;
+       c->scsi_cmd = cmd;
+       c->Header.ReplyQueue = 0;  /* unused in simple mode */
+       memcpy(&c->Header.LUN.LunAddrBytes[0], &scsi3addr[0], 8);
+       c->Header.Tag.lower = c->busaddr;  /* Use k. address of cmd as tag */
        
        // Fill in the request block...
 
-       cp->Request.Timeout = 0;
-       memset(cp->Request.CDB, 0, sizeof(cp->Request.CDB));
-       BUG_ON(cmd->cmd_len > sizeof(cp->Request.CDB));
-       cp->Request.CDBLen = cmd->cmd_len;
-       memcpy(cp->Request.CDB, cmd->cmnd, cmd->cmd_len);
-       cp->Request.Type.Type = TYPE_CMD;
-       cp->Request.Type.Attribute = ATTR_SIMPLE;
+       c->Request.Timeout = 0;
+       memset(c->Request.CDB, 0, sizeof(c->Request.CDB));
+       BUG_ON(cmd->cmd_len > sizeof(c->Request.CDB));
+       c->Request.CDBLen = cmd->cmd_len;
+       memcpy(c->Request.CDB, cmd->cmnd, cmd->cmd_len);
+       c->Request.Type.Type = TYPE_CMD;
+       c->Request.Type.Attribute = ATTR_SIMPLE;
        switch(cmd->sc_data_direction)
        {
-         case DMA_TO_DEVICE: cp->Request.Type.Direction = XFER_WRITE; break;
-         case DMA_FROM_DEVICE: cp->Request.Type.Direction = XFER_READ; break;
-         case DMA_NONE: cp->Request.Type.Direction = XFER_NONE; break;
+         case DMA_TO_DEVICE:
+               c->Request.Type.Direction = XFER_WRITE;
+               break;
+         case DMA_FROM_DEVICE:
+               c->Request.Type.Direction = XFER_READ;
+               break;
+         case DMA_NONE:
+               c->Request.Type.Direction = XFER_NONE;
+               break;
          case DMA_BIDIRECTIONAL:
                // This can happen if a buggy application does a scsi passthru
                // and sets both inlen and outlen to non-zero. ( see
                // ../scsi/scsi_ioctl.c:scsi_ioctl_send_command() )
 
-               cp->Request.Type.Direction = XFER_RSVD;
+               c->Request.Type.Direction = XFER_RSVD;
                // This is technically wrong, and cciss controllers should
                // reject it with CMD_INVALID, which is the most correct 
                // response, but non-fibre backends appear to let it 
@@ -1519,27 +1493,18 @@ cciss_scsi_queue_command (struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd
                break;
 
          default: 
-               printk("cciss: unknown data direction: %d\n", 
+               dev_warn(&h->pdev->dev, "unknown data direction: %d\n",
                        cmd->sc_data_direction);
                BUG();
                break;
        }
-       cciss_scatter_gather(c, cp, cmd);
-
-       /* Put the request on the tail of the request queue */
-
-       spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
-       addQ(&c->reqQ, cp);
-       c->Qdepth++;
-       start_io(c);
-       spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
-
+       cciss_scatter_gather(h, c, cmd);
+       enqueue_cmd_and_start_io(h, c);
        /* the cmd'll come back via intr handler in complete_scsi_command()  */
        return 0;
 }
 
-static void 
-cciss_unregister_scsi(int ctlr)
+static void cciss_unregister_scsi(ctlr_info_t *h)
 {
        struct cciss_scsi_adapter_data_t *sa;
        struct cciss_scsi_cmd_stack_t *stk;
@@ -1547,59 +1512,58 @@ cciss_unregister_scsi(int ctlr)
 
        /* we are being forcibly unloaded, and may not refuse. */
 
-       spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
-       sa = hba[ctlr]->scsi_ctlr;
+       spin_lock_irqsave(&h->lock, flags);
+       sa = h->scsi_ctlr;
        stk = &sa->cmd_stack; 
 
        /* if we weren't ever actually registered, don't unregister */ 
        if (sa->registered) {
-               spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
+               spin_unlock_irqrestore(&h->lock, flags);
                scsi_remove_host(sa->scsi_host);
                scsi_host_put(sa->scsi_host);
-               spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
+               spin_lock_irqsave(&h->lock, flags);
        }
 
        /* set scsi_host to NULL so our detect routine will 
           find us on register */
        sa->scsi_host = NULL;
-       spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
-       scsi_cmd_stack_free(ctlr);
+       spin_unlock_irqrestore(&h->lock, flags);
+       scsi_cmd_stack_free(h);
        kfree(sa);
 }
 
-static int 
-cciss_engage_scsi(int ctlr)
+static int cciss_engage_scsi(ctlr_info_t *h)
 {
        struct cciss_scsi_adapter_data_t *sa;
        struct cciss_scsi_cmd_stack_t *stk;
        unsigned long flags;
 
-       spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
-       sa = hba[ctlr]->scsi_ctlr;
+       spin_lock_irqsave(&h->lock, flags);
+       sa = h->scsi_ctlr;
        stk = &sa->cmd_stack; 
 
        if (sa->registered) {
-               printk("cciss%d: SCSI subsystem already engaged.\n", ctlr);
-               spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
+               dev_info(&h->pdev->dev, "SCSI subsystem already engaged.\n");
+               spin_unlock_irqrestore(&h->lock, flags);
                return -ENXIO;
        }
        sa->registered = 1;
-       spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
-       cciss_update_non_disk_devices(ctlr, -1);
-       cciss_scsi_detect(ctlr);
+       spin_unlock_irqrestore(&h->lock, flags);
+       cciss_update_non_disk_devices(h, -1);
+       cciss_scsi_detect(h);
        return 0;
 }
 
 static void
-cciss_seq_tape_report(struct seq_file *seq, int ctlr)
+cciss_seq_tape_report(struct seq_file *seq, ctlr_info_t *h)
 {
        unsigned long flags;
 
-       CPQ_TAPE_LOCK(ctlr, flags);
+       CPQ_TAPE_LOCK(h, flags);
        seq_printf(seq,
                "Sequential access devices: %d\n\n",
-                       ccissscsi[ctlr].ndevices);
-       CPQ_TAPE_UNLOCK(ctlr, flags);
+                       ccissscsi[h->ctlr].ndevices);
+       CPQ_TAPE_UNLOCK(h, flags);
 }
 
 static int wait_for_device_to_become_ready(ctlr_info_t *h,
@@ -1610,10 +1574,10 @@ static int wait_for_device_to_become_ready(ctlr_info_t *h,
        int waittime = HZ;
        CommandList_struct *c;
 
-       c = cmd_alloc(h, 1);
+       c = cmd_alloc(h);
        if (!c) {
-               printk(KERN_WARNING "cciss%d: out of memory in "
-                       "wait_for_device_to_become_ready.\n", h->ctlr);
+               dev_warn(&h->pdev->dev, "out of memory in "
+                       "wait_for_device_to_become_ready.\n");
                return IO_ERROR;
        }
 
@@ -1631,7 +1595,7 @@ static int wait_for_device_to_become_ready(ctlr_info_t *h,
                        waittime = waittime * 2;
 
                /* Send the Test Unit Ready */
-               rc = fill_cmd(c, TEST_UNIT_READY, h->ctlr, NULL, 0, 0,
+               rc = fill_cmd(h, c, TEST_UNIT_READY, NULL, 0, 0,
                        lunaddr, TYPE_CMD);
                if (rc == 0)
                        rc = sendcmd_withirq_core(h, c, 0);
@@ -1657,18 +1621,18 @@ static int wait_for_device_to_become_ready(ctlr_info_t *h,
                        }
                }
 retry_tur:
-               printk(KERN_WARNING "cciss%d: Waiting %d secs "
+               dev_warn(&h->pdev->dev, "Waiting %d secs "
                        "for device to become ready.\n",
-                       h->ctlr, waittime / HZ);
+                       waittime / HZ);
                rc = 1; /* device not ready. */
        }
 
        if (rc)
-               printk("cciss%d: giving up on device.\n", h->ctlr);
+               dev_warn(&h->pdev->dev, "giving up on device.\n");
        else
-               printk(KERN_WARNING "cciss%d: device is ready.\n", h->ctlr);
+               dev_warn(&h->pdev->dev, "device is ready.\n");
 
-       cmd_free(h, c, 1);
+       cmd_free(h, c);
        return rc;
 }
 
@@ -1688,26 +1652,24 @@ static int cciss_eh_device_reset_handler(struct scsi_cmnd *scsicmd)
        int rc;
        CommandList_struct *cmd_in_trouble;
        unsigned char lunaddr[8];
-       ctlr_info_t *c;
-       int ctlr;
+       ctlr_info_t *h;
 
        /* find the controller to which the command to be aborted was sent */
-       c = (ctlr_info_t *) scsicmd->device->host->hostdata[0];
-       if (c == NULL) /* paranoia */
+       h = (ctlr_info_t *) scsicmd->device->host->hostdata[0];
+       if (h == NULL) /* paranoia */
                return FAILED;
-       ctlr = c->ctlr;
-       printk(KERN_WARNING "cciss%d: resetting tape drive or medium changer.\n", ctlr);
+       dev_warn(&h->pdev->dev, "resetting tape drive or medium changer.\n");
        /* find the command that's giving us trouble */
        cmd_in_trouble = (CommandList_struct *) scsicmd->host_scribble;
        if (cmd_in_trouble == NULL) /* paranoia */
                return FAILED;
        memcpy(lunaddr, &cmd_in_trouble->Header.LUN.LunAddrBytes[0], 8);
        /* send a reset to the SCSI LUN which the command was sent to */
-       rc = sendcmd_withirq(CCISS_RESET_MSG, ctlr, NULL, 0, 0, lunaddr,
+       rc = sendcmd_withirq(h, CCISS_RESET_MSG, NULL, 0, 0, lunaddr,
                TYPE_MSG);
-       if (rc == 0 && wait_for_device_to_become_ready(c, lunaddr) == 0)
+       if (rc == 0 && wait_for_device_to_become_ready(h, lunaddr) == 0)
                return SUCCESS;
-       printk(KERN_WARNING "cciss%d: resetting device failed.\n", ctlr);
+       dev_warn(&h->pdev->dev, "resetting device failed.\n");
        return FAILED;
 }
 
@@ -1716,22 +1678,20 @@ static int  cciss_eh_abort_handler(struct scsi_cmnd *scsicmd)
        int rc;
        CommandList_struct *cmd_to_abort;
        unsigned char lunaddr[8];
-       ctlr_info_t *c;
-       int ctlr;
+       ctlr_info_t *h;
 
        /* find the controller to which the command to be aborted was sent */
-       c = (ctlr_info_t *) scsicmd->device->host->hostdata[0];
-       if (c == NULL) /* paranoia */
+       h = (ctlr_info_t *) scsicmd->device->host->hostdata[0];
+       if (h == NULL) /* paranoia */
                return FAILED;
-       ctlr = c->ctlr;
-       printk(KERN_WARNING "cciss%d: aborting tardy SCSI cmd\n", ctlr);
+       dev_warn(&h->pdev->dev, "aborting tardy SCSI cmd\n");
 
        /* find the command to be aborted */
        cmd_to_abort = (CommandList_struct *) scsicmd->host_scribble;
        if (cmd_to_abort == NULL) /* paranoia */
                return FAILED;
        memcpy(lunaddr, &cmd_to_abort->Header.LUN.LunAddrBytes[0], 8);
-       rc = sendcmd_withirq(CCISS_ABORT_MSG, ctlr, &cmd_to_abort->Header.Tag,
+       rc = sendcmd_withirq(h, CCISS_ABORT_MSG, &cmd_to_abort->Header.Tag,
                0, 0, lunaddr, TYPE_MSG);
        if (rc == 0)
                return SUCCESS;
index abb4ec6..d53b029 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/hdreg.h>
+#include <linux/smp_lock.h>
 #include <linux/spinlock.h>
 #include <linux/blkdev.h>
 #include <linux/genhd.h>
@@ -157,7 +158,7 @@ static int sendcmd(
        unsigned int blkcnt,
        unsigned int log_unit );
 
-static int ida_open(struct block_device *bdev, fmode_t mode);
+static int ida_unlocked_open(struct block_device *bdev, fmode_t mode);
 static int ida_release(struct gendisk *disk, fmode_t mode);
 static int ida_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg);
 static int ida_getgeo(struct block_device *bdev, struct hd_geometry *geo);
@@ -195,9 +196,9 @@ static inline ctlr_info_t *get_host(struct gendisk *disk)
 
 static const struct block_device_operations ida_fops  = {
        .owner          = THIS_MODULE,
-       .open           = ida_open,
+       .open           = ida_unlocked_open,
        .release        = ida_release,
-       .locked_ioctl   = ida_ioctl,
+       .ioctl          = ida_ioctl,
        .getgeo         = ida_getgeo,
        .revalidate_disk= ida_revalidate,
 };
@@ -840,13 +841,29 @@ static int ida_open(struct block_device *bdev, fmode_t mode)
        return 0;
 }
 
+static int ida_unlocked_open(struct block_device *bdev, fmode_t mode)
+{
+       int ret;
+
+       lock_kernel();
+       ret = ida_open(bdev, mode);
+       unlock_kernel();
+
+       return ret;
+}
+
 /*
  * Close.  Sync first.
  */
 static int ida_release(struct gendisk *disk, fmode_t mode)
 {
-       ctlr_info_t *host = get_host(disk);
+       ctlr_info_t *host;
+
+       lock_kernel();
+       host = get_host(disk);
        host->usage_count--;
+       unlock_kernel();
+
        return 0;
 }
 
@@ -1128,7 +1145,7 @@ static int ida_getgeo(struct block_device *bdev, struct hd_geometry *geo)
  *  ida_ioctl does some miscellaneous stuff like reporting drive geometry,
  *  setting readahead and submitting commands from userspace to the controller.
  */
-static int ida_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg)
+static int ida_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg)
 {
        drv_info_t *drv = get_drv(bdev->bd_disk);
        ctlr_info_t *host = get_host(bdev->bd_disk);
@@ -1162,7 +1179,8 @@ out_passthru:
                return error;
        case IDAGETCTLRSIG:
                if (!arg) return -EINVAL;
-               put_user(host->ctlr_sig, (int __user *)arg);
+               if (put_user(host->ctlr_sig, (int __user *)arg))
+                       return -EFAULT;
                return 0;
        case IDAREVALIDATEVOLS:
                if (MINOR(bdev->bd_dev) != 0)
@@ -1170,7 +1188,8 @@ out_passthru:
                return revalidate_allvol(host);
        case IDADRIVERVERSION:
                if (!arg) return -EINVAL;
-               put_user(DRIVER_VERSION, (unsigned long __user *)arg);
+               if (put_user(DRIVER_VERSION, (unsigned long __user *)arg))
+                       return -EFAULT;
                return 0;
        case IDAGETPCIINFO:
        {
@@ -1192,6 +1211,19 @@ out_passthru:
        }
                
 }
+
+static int ida_ioctl(struct block_device *bdev, fmode_t mode,
+                            unsigned int cmd, unsigned long param)
+{
+       int ret;
+
+       lock_kernel();
+       ret = ida_locked_ioctl(bdev, mode, cmd, param);
+       unlock_kernel();
+
+       return ret;
+}
+
 /*
  * ida_ctlr_ioctl is for passing commands to the controller from userspace.
  * The command block (io) has already been copied to kernel space for us,
@@ -1225,17 +1257,11 @@ static int ida_ctlr_ioctl(ctlr_info_t *h, int dsk, ida_ioctl_t *io)
        /* Pre submit processing */
        switch(io->cmd) {
        case PASSTHRU_A:
-               p = kmalloc(io->sg[0].size, GFP_KERNEL);
-               if (!p) 
-               { 
-                       error = -ENOMEM; 
-                       cmd_free(h, c, 0); 
-                       return(error);
-               }
-               if (copy_from_user(p, io->sg[0].addr, io->sg[0].size)) {
-                       kfree(p);
-                       cmd_free(h, c, 0); 
-                       return -EFAULT;
+               p = memdup_user(io->sg[0].addr, io->sg[0].size);
+               if (IS_ERR(p)) {
+                       error = PTR_ERR(p);
+                       cmd_free(h, c, 0);
+                       return error;
                }
                c->req.hdr.blk = pci_map_single(h->pci_dev, &(io->c), 
                                sizeof(ida_ioctl_t), 
@@ -1266,18 +1292,12 @@ static int ida_ctlr_ioctl(ctlr_info_t *h, int dsk, ida_ioctl_t *io)
        case DIAG_PASS_THRU:
        case COLLECT_BUFFER:
        case WRITE_FLASH_ROM:
-               p = kmalloc(io->sg[0].size, GFP_KERNEL);
-               if (!p) 
-               { 
-                        error = -ENOMEM; 
-                        cmd_free(h, c, 0);
-                        return(error);
+               p = memdup_user(io->sg[0].addr, io->sg[0].size);
+               if (IS_ERR(p)) {
+                       error = PTR_ERR(p);
+                       cmd_free(h, c, 0);
+                       return error;
                 }
-               if (copy_from_user(p, io->sg[0].addr, io->sg[0].size)) {
-                       kfree(p);
-                        cmd_free(h, c, 0);
-                       return -EFAULT;
-               }
                c->req.sg[0].size = io->sg[0].size;
                c->req.sg[0].addr = pci_map_single(h->pci_dev, p, 
                        c->req.sg[0].size, PCI_DMA_BIDIRECTIONAL); 
index df01899..9400845 100644 (file)
@@ -79,8 +79,8 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
        md_io.error = 0;
 
        if ((rw & WRITE) && !test_bit(MD_NO_BARRIER, &mdev->flags))
-               rw |= (1 << BIO_RW_BARRIER);
-       rw |= ((1<<BIO_RW_UNPLUG) | (1<<BIO_RW_SYNCIO));
+               rw |= REQ_HARDBARRIER;
+       rw |= REQ_UNPLUG | REQ_SYNC;
 
  retry:
        bio = bio_alloc(GFP_NOIO, 1);
@@ -103,11 +103,11 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
        /* check for unsupported barrier op.
         * would rather check on EOPNOTSUPP, but that is not reliable.
         * don't try again for ANY return value != 0 */
-       if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER) && !ok)) {
+       if (unlikely((bio->bi_rw & REQ_HARDBARRIER) && !ok)) {
                /* Try again with no barrier */
                dev_warn(DEV, "Barriers not supported on meta data device - disabling\n");
                set_bit(MD_NO_BARRIER, &mdev->flags);
-               rw &= ~(1 << BIO_RW_BARRIER);
+               rw &= ~REQ_HARDBARRIER;
                bio_put(bio);
                goto retry;
        }
index 485ed8c..352441b 100644 (file)
@@ -550,12 +550,6 @@ struct p_delay_probe {
        u32     offset;  /* usecs the probe got sent after the reference time point */
 } __packed;
 
-struct delay_probe {
-       struct list_head list;
-       unsigned int seq_num;
-       struct timeval time;
-};
-
 /* DCBP: Drbd Compressed Bitmap Packet ... */
 static inline enum drbd_bitmap_code
 DCBP_get_code(struct p_compressed_bm *p)
@@ -942,11 +936,9 @@ struct drbd_conf {
        unsigned int ko_count;
        struct drbd_work  resync_work,
                          unplug_work,
-                         md_sync_work,
-                         delay_probe_work;
+                         md_sync_work;
        struct timer_list resync_timer;
        struct timer_list md_sync_timer;
-       struct timer_list delay_probe_timer;
 
        /* Used after attach while negotiating new disk state. */
        union drbd_state new_state_tmp;
@@ -1062,12 +1054,6 @@ struct drbd_conf {
        u64 ed_uuid; /* UUID of the exposed data */
        struct mutex state_mutex;
        char congestion_reason;  /* Why we where congested... */
-       struct list_head delay_probes; /* protected by peer_seq_lock */
-       int data_delay;   /* Delay of packets on the data-sock behind meta-sock */
-       unsigned int delay_seq; /* To generate sequence numbers of delay probes */
-       struct timeval dps_time; /* delay-probes-start-time */
-       unsigned int dp_volume_last;  /* send_cnt of last delay probe */
-       int c_sync_rate; /* current resync rate after delay_probe magic */
 };
 
 static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
index 7258c95..fa650dd 100644 (file)
@@ -2184,43 +2184,6 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
        return ok;
 }
 
-static int drbd_send_delay_probe(struct drbd_conf *mdev, struct drbd_socket *ds)
-{
-       struct p_delay_probe dp;
-       int offset, ok = 0;
-       struct timeval now;
-
-       mutex_lock(&ds->mutex);
-       if (likely(ds->socket)) {
-               do_gettimeofday(&now);
-               offset = now.tv_usec - mdev->dps_time.tv_usec +
-                        (now.tv_sec - mdev->dps_time.tv_sec) * 1000000;
-               dp.seq_num  = cpu_to_be32(mdev->delay_seq);
-               dp.offset   = cpu_to_be32(offset);
-
-               ok = _drbd_send_cmd(mdev, ds->socket, P_DELAY_PROBE,
-                                   (struct p_header *)&dp, sizeof(dp), 0);
-       }
-       mutex_unlock(&ds->mutex);
-
-       return ok;
-}
-
-static int drbd_send_delay_probes(struct drbd_conf *mdev)
-{
-       int ok;
-
-       mdev->delay_seq++;
-       do_gettimeofday(&mdev->dps_time);
-       ok = drbd_send_delay_probe(mdev, &mdev->meta);
-       ok = ok && drbd_send_delay_probe(mdev, &mdev->data);
-
-       mdev->dp_volume_last = mdev->send_cnt;
-       mod_timer(&mdev->delay_probe_timer, jiffies + mdev->sync_conf.dp_interval * HZ / 10);
-
-       return ok;
-}
-
 /* called on sndtimeo
  * returns FALSE if we should retry,
  * TRUE if we think connection is dead
@@ -2369,31 +2332,6 @@ static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
        return 1;
 }
 
-static void consider_delay_probes(struct drbd_conf *mdev)
-{
-       if (mdev->state.conn != C_SYNC_SOURCE || mdev->agreed_pro_version < 93)
-               return;
-
-       if (mdev->dp_volume_last + mdev->sync_conf.dp_volume * 2 < mdev->send_cnt)
-               drbd_send_delay_probes(mdev);
-}
-
-static int w_delay_probes(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
-{
-       if (!cancel && mdev->state.conn == C_SYNC_SOURCE)
-               drbd_send_delay_probes(mdev);
-
-       return 1;
-}
-
-static void delay_probe_timer_fn(unsigned long data)
-{
-       struct drbd_conf *mdev = (struct drbd_conf *) data;
-
-       if (list_empty(&mdev->delay_probe_work.list))
-               drbd_queue_work(&mdev->data.work, &mdev->delay_probe_work);
-}
-
 /* Used to send write requests
  * R_PRIMARY -> Peer   (P_DATA)
  */
@@ -2425,15 +2363,15 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
        /* NOTE: no need to check if barriers supported here as we would
         *       not pass the test in make_request_common in that case
         */
-       if (bio_rw_flagged(req->master_bio, BIO_RW_BARRIER)) {
+       if (req->master_bio->bi_rw & REQ_HARDBARRIER) {
                dev_err(DEV, "ASSERT FAILED would have set DP_HARDBARRIER\n");
                /* dp_flags |= DP_HARDBARRIER; */
        }
-       if (bio_rw_flagged(req->master_bio, BIO_RW_SYNCIO))
+       if (req->master_bio->bi_rw & REQ_SYNC)
                dp_flags |= DP_RW_SYNC;
        /* for now handle SYNCIO and UNPLUG
         * as if they still were one and the same flag */
-       if (bio_rw_flagged(req->master_bio, BIO_RW_UNPLUG))
+       if (req->master_bio->bi_rw & REQ_UNPLUG)
                dp_flags |= DP_RW_SYNC;
        if (mdev->state.conn >= C_SYNC_SOURCE &&
            mdev->state.conn <= C_PAUSED_SYNC_T)
@@ -2457,9 +2395,6 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
 
        drbd_put_data_sock(mdev);
 
-       if (ok)
-               consider_delay_probes(mdev);
-
        return ok;
 }
 
@@ -2506,9 +2441,6 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
 
        drbd_put_data_sock(mdev);
 
-       if (ok)
-               consider_delay_probes(mdev);
-
        return ok;
 }
 
@@ -2604,6 +2536,7 @@ static int drbd_open(struct block_device *bdev, fmode_t mode)
        unsigned long flags;
        int rv = 0;
 
+       lock_kernel();
        spin_lock_irqsave(&mdev->req_lock, flags);
        /* to have a stable mdev->state.role
         * and no race with updating open_cnt */
@@ -2618,6 +2551,7 @@ static int drbd_open(struct block_device *bdev, fmode_t mode)
        if (!rv)
                mdev->open_cnt++;
        spin_unlock_irqrestore(&mdev->req_lock, flags);
+       unlock_kernel();
 
        return rv;
 }
@@ -2625,7 +2559,9 @@ static int drbd_open(struct block_device *bdev, fmode_t mode)
 static int drbd_release(struct gendisk *gd, fmode_t mode)
 {
        struct drbd_conf *mdev = gd->private_data;
+       lock_kernel();
        mdev->open_cnt--;
+       unlock_kernel();
        return 0;
 }
 
@@ -2660,9 +2596,20 @@ static void drbd_unplug_fn(struct request_queue *q)
 
 static void drbd_set_defaults(struct drbd_conf *mdev)
 {
-       mdev->sync_conf.after      = DRBD_AFTER_DEF;
-       mdev->sync_conf.rate       = DRBD_RATE_DEF;
-       mdev->sync_conf.al_extents = DRBD_AL_EXTENTS_DEF;
+       /* This way we get a compile error when sync_conf grows,
+          and we forgot to initialize it here */
+       mdev->sync_conf = (struct syncer_conf) {
+               /* .rate = */           DRBD_RATE_DEF,
+               /* .after = */          DRBD_AFTER_DEF,
+               /* .al_extents = */     DRBD_AL_EXTENTS_DEF,
+               /* .verify_alg = */     {}, 0,
+               /* .cpu_mask = */       {}, 0,
+               /* .csums_alg = */      {}, 0,
+               /* .use_rle = */        0
+       };
+
+       /* Have to use that way, because the layout differs between
+          big endian and little endian */
        mdev->state = (union drbd_state) {
                { .role = R_SECONDARY,
                  .peer = R_UNKNOWN,
@@ -2721,24 +2668,17 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
        INIT_LIST_HEAD(&mdev->unplug_work.list);
        INIT_LIST_HEAD(&mdev->md_sync_work.list);
        INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
-       INIT_LIST_HEAD(&mdev->delay_probes);
-       INIT_LIST_HEAD(&mdev->delay_probe_work.list);
 
        mdev->resync_work.cb  = w_resync_inactive;
        mdev->unplug_work.cb  = w_send_write_hint;
        mdev->md_sync_work.cb = w_md_sync;
        mdev->bm_io_work.w.cb = w_bitmap_io;
-       mdev->delay_probe_work.cb = w_delay_probes;
        init_timer(&mdev->resync_timer);
        init_timer(&mdev->md_sync_timer);
-       init_timer(&mdev->delay_probe_timer);
        mdev->resync_timer.function = resync_timer_fn;
        mdev->resync_timer.data = (unsigned long) mdev;
        mdev->md_sync_timer.function = md_sync_timer_fn;
        mdev->md_sync_timer.data = (unsigned long) mdev;
-       mdev->delay_probe_timer.function = delay_probe_timer_fn;
-       mdev->delay_probe_timer.data = (unsigned long) mdev;
-
 
        init_waitqueue_head(&mdev->misc_wait);
        init_waitqueue_head(&mdev->state_wait);
index 2151f18..73131c5 100644 (file)
@@ -1557,10 +1557,6 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
                sc.rate       = DRBD_RATE_DEF;
                sc.after      = DRBD_AFTER_DEF;
                sc.al_extents = DRBD_AL_EXTENTS_DEF;
-               sc.dp_volume  = DRBD_DP_VOLUME_DEF;
-               sc.dp_interval = DRBD_DP_INTERVAL_DEF;
-               sc.throttle_th = DRBD_RS_THROTTLE_TH_DEF;
-               sc.hold_off_th = DRBD_RS_HOLD_OFF_TH_DEF;
        } else
                memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf));
 
index d0f1767..be3374b 100644 (file)
@@ -73,21 +73,14 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
        seq_printf(seq, "sync'ed:%3u.%u%% ", res / 10, res % 10);
        /* if more than 1 GB display in MB */
        if (mdev->rs_total > 0x100000L)
-               seq_printf(seq, "(%lu/%lu)M",
+               seq_printf(seq, "(%lu/%lu)M\n\t",
                            (unsigned long) Bit2KB(rs_left >> 10),
                            (unsigned long) Bit2KB(mdev->rs_total >> 10));
        else
-               seq_printf(seq, "(%lu/%lu)K",
+               seq_printf(seq, "(%lu/%lu)K\n\t",
                            (unsigned long) Bit2KB(rs_left),
                            (unsigned long) Bit2KB(mdev->rs_total));
 
-       if (mdev->state.conn == C_SYNC_TARGET)
-               seq_printf(seq, " queue_delay: %d.%d ms\n\t",
-                          mdev->data_delay / 1000,
-                          (mdev->data_delay % 1000) / 100);
-       else if (mdev->state.conn == C_SYNC_SOURCE)
-               seq_printf(seq, " delay_probe: %u\n\t", mdev->delay_seq);
-
        /* see drivers/md/md.c
         * We do not want to overflow, so the order of operands and
         * the * 100 / 100 trick are important. We do a +1 to be
@@ -135,14 +128,6 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
        else
                seq_printf(seq, " (%ld)", dbdt);
 
-       if (mdev->state.conn == C_SYNC_TARGET) {
-               if (mdev->c_sync_rate > 1000)
-                       seq_printf(seq, " want: %d,%03d",
-                                  mdev->c_sync_rate / 1000, mdev->c_sync_rate % 1000);
-               else
-                       seq_printf(seq, " want: %d", mdev->c_sync_rate);
-       }
-
        seq_printf(seq, " K/sec\n");
 }
 
index ec1711f..081522d 100644 (file)
@@ -1180,7 +1180,7 @@ next_bio:
        bio->bi_sector = sector;
        bio->bi_bdev = mdev->ldev->backing_bdev;
        /* we special case some flags in the multi-bio case, see below
-        * (BIO_RW_UNPLUG, BIO_RW_BARRIER) */
+        * (REQ_UNPLUG, REQ_HARDBARRIER) */
        bio->bi_rw = rw;
        bio->bi_private = e;
        bio->bi_end_io = drbd_endio_sec;
@@ -1209,16 +1209,16 @@ next_bio:
                bios = bios->bi_next;
                bio->bi_next = NULL;
 
-               /* strip off BIO_RW_UNPLUG unless it is the last bio */
+               /* strip off REQ_UNPLUG unless it is the last bio */
                if (bios)
-                       bio->bi_rw &= ~(1<<BIO_RW_UNPLUG);
+                       bio->bi_rw &= ~REQ_UNPLUG;
 
                drbd_generic_make_request(mdev, fault_type, bio);
 
-               /* strip off BIO_RW_BARRIER,
+               /* strip off REQ_HARDBARRIER,
                 * unless it is the first or last bio */
                if (bios && bios->bi_next)
-                       bios->bi_rw &= ~(1<<BIO_RW_BARRIER);
+                       bios->bi_rw &= ~REQ_HARDBARRIER;
        } while (bios);
        maybe_kick_lo(mdev);
        return 0;
@@ -1233,7 +1233,7 @@ fail:
 }
 
 /**
- * w_e_reissue() - Worker callback; Resubmit a bio, without BIO_RW_BARRIER set
+ * w_e_reissue() - Worker callback; Resubmit a bio, without REQ_HARDBARRIER set
  * @mdev:      DRBD device.
  * @w:         work object.
  * @cancel:    The connection will be closed anyways (unused in this callback)
@@ -1245,7 +1245,7 @@ int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __relea
           (and DE_BARRIER_IN_NEXT_EPOCH_ISSUED in the previous Epoch)
           so that we can finish that epoch in drbd_may_finish_epoch().
           That is necessary if we already have a long chain of Epochs, before
-          we realize that BIO_RW_BARRIER is actually not supported */
+          we realize that REQ_HARDBARRIER is actually not supported */
 
        /* As long as the -ENOTSUPP on the barrier is reported immediately
           that will never trigger. If it is reported late, we will just
@@ -1824,14 +1824,14 @@ static int receive_Data(struct drbd_conf *mdev, struct p_header *h)
                epoch = list_entry(e->epoch->list.prev, struct drbd_epoch, list);
                if (epoch == e->epoch) {
                        set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags);
-                       rw |= (1<<BIO_RW_BARRIER);
+                       rw |= REQ_HARDBARRIER;
                        e->flags |= EE_IS_BARRIER;
                } else {
                        if (atomic_read(&epoch->epoch_size) > 1 ||
                            !test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) {
                                set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags);
                                set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags);
-                               rw |= (1<<BIO_RW_BARRIER);
+                               rw |= REQ_HARDBARRIER;
                                e->flags |= EE_IS_BARRIER;
                        }
                }
@@ -1841,10 +1841,10 @@ static int receive_Data(struct drbd_conf *mdev, struct p_header *h)
        dp_flags = be32_to_cpu(p->dp_flags);
        if (dp_flags & DP_HARDBARRIER) {
                dev_err(DEV, "ASSERT FAILED would have submitted barrier request\n");
-               /* rw |= (1<<BIO_RW_BARRIER); */
+               /* rw |= REQ_HARDBARRIER; */
        }
        if (dp_flags & DP_RW_SYNC)
-               rw |= (1<<BIO_RW_SYNCIO) | (1<<BIO_RW_UNPLUG);
+               rw |= REQ_SYNC | REQ_UNPLUG;
        if (dp_flags & DP_MAY_SET_IN_SYNC)
                e->flags |= EE_MAY_SET_IN_SYNC;
 
@@ -3555,14 +3555,15 @@ static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h)
        return ok;
 }
 
-static int receive_skip(struct drbd_conf *mdev, struct p_header *h)
+static int receive_skip_(struct drbd_conf *mdev, struct p_header *h, int silent)
 {
        /* TODO zero copy sink :) */
        static char sink[128];
        int size, want, r;
 
-       dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
-            h->command, h->length);
+       if (!silent)
+               dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
+                    h->command, h->length);
 
        size = h->length;
        while (size > 0) {
@@ -3574,101 +3575,25 @@ static int receive_skip(struct drbd_conf *mdev, struct p_header *h)
        return size == 0;
 }
 
-static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h)
-{
-       if (mdev->state.disk >= D_INCONSISTENT)
-               drbd_kick_lo(mdev);
-
-       /* Make sure we've acked all the TCP data associated
-        * with the data requests being unplugged */
-       drbd_tcp_quickack(mdev->data.socket);
-
-       return TRUE;
-}
-
-static void timeval_sub_us(struct timeval* tv, unsigned int us)
+static int receive_skip(struct drbd_conf *mdev, struct p_header *h)
 {
-       tv->tv_sec -= us / 1000000;
-       us = us % 1000000;
-       if (tv->tv_usec > us) {
-               tv->tv_usec += 1000000;
-               tv->tv_sec--;
-       }
-       tv->tv_usec -= us;
+       return receive_skip_(mdev, h, 0);
 }
 
-static void got_delay_probe(struct drbd_conf *mdev, int from, struct p_delay_probe *p)
+static int receive_skip_silent(struct drbd_conf *mdev, struct p_header *h)
 {
-       struct delay_probe *dp;
-       struct list_head *le;
-       struct timeval now;
-       int seq_num;
-       int offset;
-       int data_delay;
-
-       seq_num = be32_to_cpu(p->seq_num);
-       offset  = be32_to_cpu(p->offset);
-
-       spin_lock(&mdev->peer_seq_lock);
-       if (!list_empty(&mdev->delay_probes)) {
-               if (from == USE_DATA_SOCKET)
-                       le = mdev->delay_probes.next;
-               else
-                       le = mdev->delay_probes.prev;
-
-               dp = list_entry(le, struct delay_probe, list);
-
-               if (dp->seq_num == seq_num) {
-                       list_del(le);
-                       spin_unlock(&mdev->peer_seq_lock);
-                       do_gettimeofday(&now);
-                       timeval_sub_us(&now, offset);
-                       data_delay =
-                               now.tv_usec - dp->time.tv_usec +
-                               (now.tv_sec - dp->time.tv_sec) * 1000000;
-
-                       if (data_delay > 0)
-                               mdev->data_delay = data_delay;
-
-                       kfree(dp);
-                       return;
-               }
-
-               if (dp->seq_num > seq_num) {
-                       spin_unlock(&mdev->peer_seq_lock);
-                       dev_warn(DEV, "Previous allocation failure of struct delay_probe?\n");
-                       return; /* Do not alloca a struct delay_probe.... */
-               }
-       }
-       spin_unlock(&mdev->peer_seq_lock);
-
-       dp = kmalloc(sizeof(struct delay_probe), GFP_NOIO);
-       if (!dp) {
-               dev_warn(DEV, "Failed to allocate a struct delay_probe, do not worry.\n");
-               return;
-       }
-
-       dp->seq_num = seq_num;
-       do_gettimeofday(&dp->time);
-       timeval_sub_us(&dp->time, offset);
-
-       spin_lock(&mdev->peer_seq_lock);
-       if (from == USE_DATA_SOCKET)
-               list_add(&dp->list, &mdev->delay_probes);
-       else
-               list_add_tail(&dp->list, &mdev->delay_probes);
-       spin_unlock(&mdev->peer_seq_lock);
+       return receive_skip_(mdev, h, 1);
 }
 
-static int receive_delay_probe(struct drbd_conf *mdev, struct p_header *h)
+static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h)
 {
-       struct p_delay_probe *p = (struct p_delay_probe *)h;
+       if (mdev->state.disk >= D_INCONSISTENT)
+               drbd_kick_lo(mdev);
 
-       ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
-       if (drbd_recv(mdev, h->payload, h->length) != h->length)
-               return FALSE;
+       /* Make sure we've acked all the TCP data associated
+        * with the data requests being unplugged */
+       drbd_tcp_quickack(mdev->data.socket);
 
-       got_delay_probe(mdev, USE_DATA_SOCKET, p);
        return TRUE;
 }
 
@@ -3695,7 +3620,7 @@ static drbd_cmd_handler_f drbd_default_handler[] = {
        [P_OV_REQUEST]      = receive_DataRequest,
        [P_OV_REPLY]        = receive_DataRequest,
        [P_CSUM_RS_REQUEST]    = receive_DataRequest,
-       [P_DELAY_PROBE]     = receive_delay_probe,
+       [P_DELAY_PROBE]     = receive_skip_silent,
        /* anything missing from this table is in
         * the asender_tbl, see get_asender_cmd */
        [P_MAX_CMD]         = NULL,
@@ -4472,11 +4397,9 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header *h)
        return TRUE;
 }
 
-static int got_delay_probe_m(struct drbd_conf *mdev, struct p_header *h)
+static int got_something_to_ignore_m(struct drbd_conf *mdev, struct p_header *h)
 {
-       struct p_delay_probe *p = (struct p_delay_probe *)h;
-
-       got_delay_probe(mdev, USE_META_SOCKET, p);
+       /* IGNORE */
        return TRUE;
 }
 
@@ -4504,7 +4427,7 @@ static struct asender_cmd *get_asender_cmd(int cmd)
        [P_BARRIER_ACK]     = { sizeof(struct p_barrier_ack), got_BarrierAck },
        [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
        [P_RS_IS_IN_SYNC]   = { sizeof(struct p_block_ack), got_IsInSync },
-       [P_DELAY_PROBE]     = { sizeof(struct p_delay_probe), got_delay_probe_m },
+       [P_DELAY_PROBE]     = { sizeof(struct p_delay_probe), got_something_to_ignore_m },
        [P_MAX_CMD]         = { 0, NULL },
        };
        if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)
index 654f1ef..f761d98 100644 (file)
@@ -997,7 +997,7 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio)
         * because of those XXX, this is not yet enabled,
         * i.e. in drbd_init_set_defaults we set the NO_BARRIER_SUPP bit.
         */
-       if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER) && test_bit(NO_BARRIER_SUPP, &mdev->flags))) {
+       if (unlikely(bio->bi_rw & REQ_HARDBARRIER) && test_bit(NO_BARRIER_SUPP, &mdev->flags)) {
                /* dev_warn(DEV, "Rejecting barrier request as underlying device does not support\n"); */
                bio_endio(bio, -EOPNOTSUPP);
                return 0;
index b623cee..ca4a16c 100644 (file)
@@ -424,18 +424,6 @@ void resync_timer_fn(unsigned long data)
                drbd_queue_work(&mdev->data.work, &mdev->resync_work);
 }
 
-static int calc_resync_rate(struct drbd_conf *mdev)
-{
-       int d = mdev->data_delay / 1000; /* us -> ms */
-       int td = mdev->sync_conf.throttle_th * 100;  /* 0.1s -> ms */
-       int hd = mdev->sync_conf.hold_off_th * 100;  /* 0.1s -> ms */
-       int cr = mdev->sync_conf.rate;
-
-       return d <= td ? cr :
-               d >= hd ? 0 :
-               cr + (cr * (td - d) / (hd - td));
-}
-
 int w_make_resync_request(struct drbd_conf *mdev,
                struct drbd_work *w, int cancel)
 {
@@ -473,8 +461,7 @@ int w_make_resync_request(struct drbd_conf *mdev,
        max_segment_size = mdev->agreed_pro_version < 94 ?
                queue_max_segment_size(mdev->rq_queue) : DRBD_MAX_SEGMENT_SIZE;
 
-       mdev->c_sync_rate = calc_resync_rate(mdev);
-       number = SLEEP_TIME * mdev->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
+       number = SLEEP_TIME * mdev->sync_conf.rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
        pe = atomic_read(&mdev->rs_pending_cnt);
 
        mutex_lock(&mdev->data.mutex);
index 90c4038..cf04c1b 100644 (file)
@@ -178,6 +178,7 @@ static int print_unex = 1;
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/bio.h>
+#include <linux/smp_lock.h>
 #include <linux/string.h>
 #include <linux/jiffies.h>
 #include <linux/fcntl.h>
@@ -514,8 +515,6 @@ static unsigned long fdc_busy;
 static DECLARE_WAIT_QUEUE_HEAD(fdc_wait);
 static DECLARE_WAIT_QUEUE_HEAD(command_done);
 
-#define NO_SIGNAL (!interruptible || !signal_pending(current))
-
 /* Errors during formatting are counted here. */
 static int format_errors;
 
@@ -539,7 +538,7 @@ static int max_buffer_sectors;
 
 static int *errors;
 typedef void (*done_f)(int);
-static struct cont_t {
+static const struct cont_t {
        void (*interrupt)(void);
                                /* this is called after the interrupt of the
                                 * main command */
@@ -578,7 +577,7 @@ static void reset_fdc(void);
 #define NEED_1_RECAL   -2
 #define NEED_2_RECAL   -3
 
-static int usage_count;
+static atomic_t usage_count = ATOMIC_INIT(0);
 
 /* buffer related variables */
 static int buffer_track = -1;
@@ -858,36 +857,15 @@ static void set_fdc(int drive)
 }
 
 /* locks the driver */
-static int _lock_fdc(int drive, bool interruptible, int line)
+static int lock_fdc(int drive, bool interruptible)
 {
-       if (!usage_count) {
-               pr_err("Trying to lock fdc while usage count=0 at line %d\n",
-                      line);
+       if (WARN(atomic_read(&usage_count) == 0,
+                "Trying to lock fdc while usage count=0\n"))
                return -1;
-       }
-
-       if (test_and_set_bit(0, &fdc_busy)) {
-               DECLARE_WAITQUEUE(wait, current);
-               add_wait_queue(&fdc_wait, &wait);
-
-               for (;;) {
-                       set_current_state(TASK_INTERRUPTIBLE);
-
-                       if (!test_and_set_bit(0, &fdc_busy))
-                               break;
 
-                       schedule();
-
-                       if (!NO_SIGNAL) {
-                               remove_wait_queue(&fdc_wait, &wait);
-                               return -EINTR;
-                       }
-               }
+       if (wait_event_interruptible(fdc_wait, !test_and_set_bit(0, &fdc_busy)))
+               return -EINTR;
 
-               set_current_state(TASK_RUNNING);
-               remove_wait_queue(&fdc_wait, &wait);
-               flush_scheduled_work();
-       }
        command_status = FD_COMMAND_NONE;
 
        __reschedule_timeout(drive, "lock fdc");
@@ -895,11 +873,8 @@ static int _lock_fdc(int drive, bool interruptible, int line)
        return 0;
 }
 
-#define lock_fdc(drive, interruptible)                 \
-       _lock_fdc(drive, interruptible, __LINE__)
-
 /* unlocks the driver */
-static inline void unlock_fdc(void)
+static void unlock_fdc(void)
 {
        unsigned long flags;
 
@@ -1224,7 +1199,7 @@ static int need_more_output(void)
 /* Set perpendicular mode as required, based on data rate, if supported.
  * 82077 Now tested. 1Mbps data rate only possible with 82077-1.
  */
-static inline void perpendicular_mode(void)
+static void perpendicular_mode(void)
 {
        unsigned char perp_mode;
 
@@ -1995,14 +1970,14 @@ static void do_wakeup(void)
        wake_up(&command_done);
 }
 
-static struct cont_t wakeup_cont = {
+static const struct cont_t wakeup_cont = {
        .interrupt      = empty,
        .redo           = do_wakeup,
        .error          = empty,
        .done           = (done_f)empty
 };
 
-static struct cont_t intr_cont = {
+static const struct cont_t intr_cont = {
        .interrupt      = empty,
        .redo           = process_fd_request,
        .error          = empty,
@@ -2015,25 +1990,10 @@ static int wait_til_done(void (*handler)(void), bool interruptible)
 
        schedule_bh(handler);
 
-       if (command_status < 2 && NO_SIGNAL) {
-               DECLARE_WAITQUEUE(wait, current);
-
-               add_wait_queue(&command_done, &wait);
-               for (;;) {
-                       set_current_state(interruptible ?
-                                         TASK_INTERRUPTIBLE :
-                                         TASK_UNINTERRUPTIBLE);
-
-                       if (command_status >= 2 || !NO_SIGNAL)
-                               break;
-
-                       is_alive(__func__, "");
-                       schedule();
-               }
-
-               set_current_state(TASK_RUNNING);
-               remove_wait_queue(&command_done, &wait);
-       }
+       if (interruptible)
+               wait_event_interruptible(command_done, command_status >= 2);
+       else
+               wait_event(command_done, command_status >= 2);
 
        if (command_status < 2) {
                cancel_activity();
@@ -2223,7 +2183,7 @@ static void redo_format(void)
        debugt(__func__, "queue format request");
 }
 
-static struct cont_t format_cont = {
+static const struct cont_t format_cont = {
        .interrupt      = format_interrupt,
        .redo           = redo_format,
        .error          = bad_flp_intr,
@@ -2583,10 +2543,8 @@ static int make_raw_rw_request(void)
        int tracksize;
        int ssize;
 
-       if (max_buffer_sectors == 0) {
-               pr_info("VFS: Block I/O scheduled on unopened device\n");
+       if (WARN(max_buffer_sectors == 0, "VFS: Block I/O scheduled on unopened device\n"))
                return 0;
-       }
 
        set_fdc((long)current_req->rq_disk->private_data);
 
@@ -2921,7 +2879,7 @@ do_request:
        return;
 }
 
-static struct cont_t rw_cont = {
+static const struct cont_t rw_cont = {
        .interrupt      = rw_interrupt,
        .redo           = redo_fd_request,
        .error          = bad_flp_intr,
@@ -2936,19 +2894,16 @@ static void process_fd_request(void)
 
 static void do_fd_request(struct request_queue *q)
 {
-       if (max_buffer_sectors == 0) {
-               pr_info("VFS: %s called on non-open device\n", __func__);
+       if (WARN(max_buffer_sectors == 0,
+                "VFS: %s called on non-open device\n", __func__))
                return;
-       }
 
-       if (usage_count == 0) {
-               pr_info("warning: usage count=0, current_req=%p exiting\n",
-                       current_req);
-               pr_info("sect=%ld type=%x flags=%x\n",
-                       (long)blk_rq_pos(current_req), current_req->cmd_type,
-                       current_req->cmd_flags);
+       if (WARN(atomic_read(&usage_count) == 0,
+                "warning: usage count=0, current_req=%p sect=%ld type=%x flags=%x\n",
+                current_req, (long)blk_rq_pos(current_req), current_req->cmd_type,
+                current_req->cmd_flags))
                return;
-       }
+
        if (test_bit(0, &fdc_busy)) {
                /* fdc busy, this new request will be treated when the
                   current one is done */
@@ -2960,7 +2915,7 @@ static void do_fd_request(struct request_queue *q)
        is_alive(__func__, "");
 }
 
-static struct cont_t poll_cont = {
+static const struct cont_t poll_cont = {
        .interrupt      = success_and_wakeup,
        .redo           = floppy_ready,
        .error          = generic_failure,
@@ -2991,7 +2946,7 @@ static void reset_intr(void)
        pr_info("weird, reset interrupt called\n");
 }
 
-static struct cont_t reset_cont = {
+static const struct cont_t reset_cont = {
        .interrupt      = reset_intr,
        .redo           = success_and_wakeup,
        .error          = generic_failure,
@@ -3033,7 +2988,7 @@ static inline int fd_copyin(void __user *param, void *address,
        return copy_from_user(address, param, size) ? -EFAULT : 0;
 }
 
-static inline const char *drive_name(int type, int drive)
+static const char *drive_name(int type, int drive)
 {
        struct floppy_struct *floppy;
 
@@ -3096,14 +3051,14 @@ static void raw_cmd_done(int flag)
        generic_done(flag);
 }
 
-static struct cont_t raw_cmd_cont = {
+static const struct cont_t raw_cmd_cont = {
        .interrupt      = success_and_wakeup,
        .redo           = floppy_start,
        .error          = generic_failure,
        .done           = raw_cmd_done
 };
 
-static inline int raw_cmd_copyout(int cmd, void __user *param,
+static int raw_cmd_copyout(int cmd, void __user *param,
                                  struct floppy_raw_cmd *ptr)
 {
        int ret;
@@ -3148,7 +3103,7 @@ static void raw_cmd_free(struct floppy_raw_cmd **ptr)
        }
 }
 
-static inline int raw_cmd_copyin(int cmd, void __user *param,
+static int raw_cmd_copyin(int cmd, void __user *param,
                                 struct floppy_raw_cmd **rcmd)
 {
        struct floppy_raw_cmd *ptr;
@@ -3266,7 +3221,7 @@ static int invalidate_drive(struct block_device *bdev)
        return 0;
 }
 
-static inline int set_geometry(unsigned int cmd, struct floppy_struct *g,
+static int set_geometry(unsigned int cmd, struct floppy_struct *g,
                               int drive, int type, struct block_device *bdev)
 {
        int cnt;
@@ -3337,7 +3292,7 @@ static inline int set_geometry(unsigned int cmd, struct floppy_struct *g,
 }
 
 /* handle obsolete ioctl's */
-static int ioctl_table[] = {
+static unsigned int ioctl_table[] = {
        FDCLRPRM,
        FDSETPRM,
        FDDEFPRM,
@@ -3365,7 +3320,7 @@ static int ioctl_table[] = {
        FDTWADDLE
 };
 
-static inline int normalize_ioctl(int *cmd, int *size)
+static int normalize_ioctl(unsigned int *cmd, int *size)
 {
        int i;
 
@@ -3417,7 +3372,7 @@ static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
        return 0;
 }
 
-static int fd_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
+static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
                    unsigned long param)
 {
        int drive = (long)bdev->bd_disk->private_data;
@@ -3593,6 +3548,18 @@ static int fd_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
        return 0;
 }
 
+static int fd_ioctl(struct block_device *bdev, fmode_t mode,
+                            unsigned int cmd, unsigned long param)
+{
+       int ret;
+
+       lock_kernel();
+       ret = fd_locked_ioctl(bdev, mode, cmd, param);
+       unlock_kernel();
+
+       return ret;
+}
+
 static void __init config_types(void)
 {
        bool has_drive = false;
@@ -3649,6 +3616,7 @@ static int floppy_release(struct gendisk *disk, fmode_t mode)
 {
        int drive = (long)disk->private_data;
 
+       lock_kernel();
        mutex_lock(&open_lock);
        if (UDRS->fd_ref < 0)
                UDRS->fd_ref = 0;
@@ -3659,6 +3627,7 @@ static int floppy_release(struct gendisk *disk, fmode_t mode)
        if (!UDRS->fd_ref)
                opened_bdev[drive] = NULL;
        mutex_unlock(&open_lock);
+       unlock_kernel();
 
        return 0;
 }
@@ -3676,6 +3645,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
        int res = -EBUSY;
        char *tmp;
 
+       lock_kernel();
        mutex_lock(&open_lock);
        old_dev = UDRS->fd_device;
        if (opened_bdev[drive] && opened_bdev[drive] != bdev)
@@ -3752,6 +3722,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
                        goto out;
        }
        mutex_unlock(&open_lock);
+       unlock_kernel();
        return 0;
 out:
        if (UDRS->fd_ref < 0)
@@ -3762,6 +3733,7 @@ out:
                opened_bdev[drive] = NULL;
 out2:
        mutex_unlock(&open_lock);
+       unlock_kernel();
        return res;
 }
 
@@ -3829,6 +3801,7 @@ static int __floppy_read_block_0(struct block_device *bdev)
        bio.bi_size = size;
        bio.bi_bdev = bdev;
        bio.bi_sector = 0;
+       bio.bi_flags = BIO_QUIET;
        init_completion(&complete);
        bio.bi_private = &complete;
        bio.bi_end_io = floppy_rb0_complete;
@@ -3857,10 +3830,10 @@ static int floppy_revalidate(struct gendisk *disk)
        if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) ||
            test_bit(FD_VERIFY_BIT, &UDRS->flags) ||
            test_bit(drive, &fake_change) || NO_GEOM) {
-               if (usage_count == 0) {
-                       pr_info("VFS: revalidate called on non-open device.\n");
+               if (WARN(atomic_read(&usage_count) == 0,
+                        "VFS: revalidate called on non-open device.\n"))
                        return -EFAULT;
-               }
+
                lock_fdc(drive, false);
                cf = (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) ||
                      test_bit(FD_VERIFY_BIT, &UDRS->flags));
@@ -3893,7 +3866,7 @@ static const struct block_device_operations floppy_fops = {
        .owner                  = THIS_MODULE,
        .open                   = floppy_open,
        .release                = floppy_release,
-       .locked_ioctl           = fd_ioctl,
+       .ioctl                  = fd_ioctl,
        .getgeo                 = fd_getgeo,
        .media_changed          = check_floppy_change,
        .revalidate_disk        = floppy_revalidate,
@@ -4126,7 +4099,7 @@ static ssize_t floppy_cmos_show(struct device *dev,
        return sprintf(buf, "%X\n", UDP->cmos);
 }
 
-DEVICE_ATTR(cmos, S_IRUGO, floppy_cmos_show, NULL);
+static DEVICE_ATTR(cmos, S_IRUGO, floppy_cmos_show, NULL);
 
 static void floppy_device_release(struct device *dev)
 {
@@ -4175,6 +4148,9 @@ static int __init floppy_init(void)
        int i, unit, drive;
        int err, dr;
 
+       set_debugt();
+       interruptjiffies = resultjiffies = jiffies;
+
 #if defined(CONFIG_PPC)
        if (check_legacy_ioport(FDC1))
                return -ENODEV;
@@ -4353,7 +4329,7 @@ out_unreg_platform_dev:
        platform_device_unregister(&floppy_device[drive]);
 out_flush_work:
        flush_scheduled_work();
-       if (usage_count)
+       if (atomic_read(&usage_count))
                floppy_release_irq_and_dma();
 out_unreg_region:
        blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
@@ -4370,8 +4346,6 @@ out_put_disk:
        return err;
 }
 
-static DEFINE_SPINLOCK(floppy_usage_lock);
-
 static const struct io_region {
        int offset;
        int size;
@@ -4417,14 +4391,8 @@ static void floppy_release_regions(int fdc)
 
 static int floppy_grab_irq_and_dma(void)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(&floppy_usage_lock, flags);
-       if (usage_count++) {
-               spin_unlock_irqrestore(&floppy_usage_lock, flags);
+       if (atomic_inc_return(&usage_count) > 1)
                return 0;
-       }
-       spin_unlock_irqrestore(&floppy_usage_lock, flags);
 
        /*
         * We might have scheduled a free_irq(), wait it to
@@ -4435,9 +4403,7 @@ static int floppy_grab_irq_and_dma(void)
        if (fd_request_irq()) {
                DPRINT("Unable to grab IRQ%d for the floppy driver\n",
                       FLOPPY_IRQ);
-               spin_lock_irqsave(&floppy_usage_lock, flags);
-               usage_count--;
-               spin_unlock_irqrestore(&floppy_usage_lock, flags);
+               atomic_dec(&usage_count);
                return -1;
        }
        if (fd_request_dma()) {
@@ -4447,9 +4413,7 @@ static int floppy_grab_irq_and_dma(void)
                        use_virtual_dma = can_use_virtual_dma = 1;
                if (!(can_use_virtual_dma & 1)) {
                        fd_free_irq();
-                       spin_lock_irqsave(&floppy_usage_lock, flags);
-                       usage_count--;
-                       spin_unlock_irqrestore(&floppy_usage_lock, flags);
+                       atomic_dec(&usage_count);
                        return -1;
                }
        }
@@ -4484,9 +4448,7 @@ cleanup:
        fd_free_dma();
        while (--fdc >= 0)
                floppy_release_regions(fdc);
-       spin_lock_irqsave(&floppy_usage_lock, flags);
-       usage_count--;
-       spin_unlock_irqrestore(&floppy_usage_lock, flags);
+       atomic_dec(&usage_count);
        return -1;
 }
 
@@ -4498,14 +4460,10 @@ static void floppy_release_irq_and_dma(void)
 #endif
        long tmpsize;
        unsigned long tmpaddr;
-       unsigned long flags;
 
-       spin_lock_irqsave(&floppy_usage_lock, flags);
-       if (--usage_count) {
-               spin_unlock_irqrestore(&floppy_usage_lock, flags);
+       if (!atomic_dec_and_test(&usage_count))
                return;
-       }
-       spin_unlock_irqrestore(&floppy_usage_lock, flags);
+
        if (irqdma_allocated) {
                fd_disable_dma();
                fd_free_dma();
@@ -4598,7 +4556,7 @@ static void __exit floppy_module_exit(void)
        del_timer_sync(&fd_timer);
        blk_cleanup_queue(floppy_queue);
 
-       if (usage_count)
+       if (atomic_read(&usage_count))
                floppy_release_irq_and_dma();
 
        /* eject disk, if any */
index 81c78b3..30ec6b3 100644 (file)
@@ -627,7 +627,7 @@ repeat:
                req_data_dir(req) == READ ? "read" : "writ",
                cyl, head, sec, nsect, req->buffer);
 #endif
-       if (blk_fs_request(req)) {
+       if (req->cmd_type == REQ_TYPE_FS) {
                switch (rq_data_dir(req)) {
                case READ:
                        hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_READ,
index 6120922..f3c636d 100644 (file)
@@ -67,6 +67,7 @@
 #include <linux/compat.h>
 #include <linux/suspend.h>
 #include <linux/freezer.h>
+#include <linux/smp_lock.h>
 #include <linux/writeback.h>
 #include <linux/buffer_head.h>         /* for invalidate_bdev() */
 #include <linux/completion.h>
@@ -476,7 +477,7 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
        pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
 
        if (bio_rw(bio) == WRITE) {
-               bool barrier = bio_rw_flagged(bio, BIO_RW_BARRIER);
+               bool barrier = (bio->bi_rw & REQ_HARDBARRIER);
                struct file *file = lo->lo_backing_file;
 
                if (barrier) {
@@ -831,7 +832,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
        lo->lo_queue->unplug_fn = loop_unplug;
 
        if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
-               blk_queue_ordered(lo->lo_queue, QUEUE_ORDERED_DRAIN, NULL);
+               blk_queue_ordered(lo->lo_queue, QUEUE_ORDERED_DRAIN);
 
        set_capacity(lo->lo_disk, size);
        bd_set_size(bdev, size << 9);
@@ -1408,9 +1409,11 @@ static int lo_open(struct block_device *bdev, fmode_t mode)
 {
        struct loop_device *lo = bdev->bd_disk->private_data;
 
+       lock_kernel();
        mutex_lock(&lo->lo_ctl_mutex);
        lo->lo_refcnt++;
        mutex_unlock(&lo->lo_ctl_mutex);
+       unlock_kernel();
 
        return 0;
 }
@@ -1420,6 +1423,7 @@ static int lo_release(struct gendisk *disk, fmode_t mode)
        struct loop_device *lo = disk->private_data;
        int err;
 
+       lock_kernel();
        mutex_lock(&lo->lo_ctl_mutex);
 
        if (--lo->lo_refcnt)
@@ -1444,6 +1448,7 @@ static int lo_release(struct gendisk *disk, fmode_t mode)
 out:
        mutex_unlock(&lo->lo_ctl_mutex);
 out_unlocked:
+       lock_kernel();
        return 0;
 }
 
index 28db925..b82c5ce 100644 (file)
@@ -670,7 +670,7 @@ static void mg_request_poll(struct request_queue *q)
                                break;
                }
 
-               if (unlikely(!blk_fs_request(host->req))) {
+               if (unlikely(host->req->cmd_type != REQ_TYPE_FS)) {
                        mg_end_request_cur(host, -EIO);
                        continue;
                }
@@ -756,7 +756,7 @@ static void mg_request(struct request_queue *q)
                        continue;
                }
 
-               if (unlikely(!blk_fs_request(req))) {
+               if (unlikely(req->cmd_type != REQ_TYPE_FS)) {
                        mg_end_request_cur(host, -EIO);
                        continue;
                }
index 16c3c86..0daa422 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/errno.h>
 #include <linux/file.h>
 #include <linux/ioctl.h>
+#include <linux/smp_lock.h>
 #include <linux/compiler.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
@@ -448,7 +449,7 @@ static void nbd_clear_que(struct nbd_device *lo)
 
 static void nbd_handle_req(struct nbd_device *lo, struct request *req)
 {
-       if (!blk_fs_request(req))
+       if (req->cmd_type != REQ_TYPE_FS)
                goto error_out;
 
        nbd_cmd(req) = NBD_CMD_READ;
@@ -716,9 +717,11 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
        dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n",
                        lo->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg);
 
+       lock_kernel();
        mutex_lock(&lo->tx_lock);
        error = __nbd_ioctl(bdev, lo, cmd, arg);
        mutex_unlock(&lo->tx_lock);
+       unlock_kernel();
 
        return error;
 }
@@ -726,7 +729,7 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
 static const struct block_device_operations nbd_fops =
 {
        .owner =        THIS_MODULE,
-       .locked_ioctl = nbd_ioctl,
+       .ioctl =        nbd_ioctl,
 };
 
 /*
index 6cd8b70..2284b4f 100644 (file)
@@ -310,7 +310,8 @@ static void osdblk_rq_fn(struct request_queue *q)
                        break;
 
                /* filter out block requests we don't understand */
-               if (!blk_fs_request(rq) && !blk_barrier_rq(rq)) {
+               if (rq->cmd_type != REQ_TYPE_FS &&
+                   !(rq->cmd_flags & REQ_HARDBARRIER)) {
                        blk_end_request_all(rq, 0);
                        continue;
                }
@@ -322,7 +323,7 @@ static void osdblk_rq_fn(struct request_queue *q)
                 * driver-specific, etc.
                 */
 
-               do_flush = (rq->special == (void *) 0xdeadbeefUL);
+               do_flush = rq->cmd_flags & REQ_FLUSH;
                do_write = (rq_data_dir(rq) == WRITE);
 
                if (!do_flush) { /* osd_flush does not use a bio */
@@ -379,14 +380,6 @@ static void osdblk_rq_fn(struct request_queue *q)
        }
 }
 
-static void osdblk_prepare_flush(struct request_queue *q, struct request *rq)
-{
-       /* add driver-specific marker, to indicate that this request
-        * is a flush command
-        */
-       rq->special = (void *) 0xdeadbeefUL;
-}
-
 static void osdblk_free_disk(struct osdblk_device *osdev)
 {
        struct gendisk *disk = osdev->disk;
@@ -446,7 +439,7 @@ static int osdblk_init_disk(struct osdblk_device *osdev)
        blk_queue_stack_limits(q, osd_request_queue(osdev->osd));
 
        blk_queue_prep_rq(q, blk_queue_start_tag);
-       blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH, osdblk_prepare_flush);
+       blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH);
 
        disk->queue = q;
 
index 71acf4e..76f8565 100644 (file)
@@ -138,6 +138,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_SLV, D_DLY};
 #include <linux/cdrom.h>
 #include <linux/spinlock.h>
 #include <linux/blkdev.h>
+#include <linux/smp_lock.h>
 #include <asm/uaccess.h>
 
 static DEFINE_SPINLOCK(pcd_lock);
@@ -224,13 +225,21 @@ static char *pcd_buf;             /* buffer for request in progress */
 static int pcd_block_open(struct block_device *bdev, fmode_t mode)
 {
        struct pcd_unit *cd = bdev->bd_disk->private_data;
-       return cdrom_open(&cd->info, bdev, mode);
+       int ret;
+
+       lock_kernel();
+       ret = cdrom_open(&cd->info, bdev, mode);
+       unlock_kernel();
+
+       return ret;
 }
 
 static int pcd_block_release(struct gendisk *disk, fmode_t mode)
 {
        struct pcd_unit *cd = disk->private_data;
+       lock_kernel();
        cdrom_release(&cd->info, mode);
+       unlock_kernel();
        return 0;
 }
 
@@ -238,7 +247,13 @@ static int pcd_block_ioctl(struct block_device *bdev, fmode_t mode,
                                unsigned cmd, unsigned long arg)
 {
        struct pcd_unit *cd = bdev->bd_disk->private_data;
-       return cdrom_ioctl(&cd->info, bdev, mode, cmd, arg);
+       int ret;
+
+       lock_kernel();
+       ret = cdrom_ioctl(&cd->info, bdev, mode, cmd, arg);
+       unlock_kernel();
+
+       return ret;
 }
 
 static int pcd_block_media_changed(struct gendisk *disk)
@@ -251,7 +266,7 @@ static const struct block_device_operations pcd_bdops = {
        .owner          = THIS_MODULE,
        .open           = pcd_block_open,
        .release        = pcd_block_release,
-       .locked_ioctl   = pcd_block_ioctl,
+       .ioctl          = pcd_block_ioctl,
        .media_changed  = pcd_block_media_changed,
 };
 
index c1e5cd0..985f0d4 100644 (file)
@@ -153,6 +153,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_GEO, D_SBY, D_DLY, D_SLV};
 #include <linux/blkdev.h>
 #include <linux/blkpg.h>
 #include <linux/kernel.h>
+#include <linux/smp_lock.h>
 #include <asm/uaccess.h>
 #include <linux/workqueue.h>
 
@@ -439,7 +440,7 @@ static char *pd_buf;                /* buffer for request in progress */
 
 static enum action do_pd_io_start(void)
 {
-       if (blk_special_request(pd_req)) {
+       if (pd_req->cmd_type == REQ_TYPE_SPECIAL) {
                phase = pd_special;
                return pd_special();
        }
@@ -735,12 +736,14 @@ static int pd_open(struct block_device *bdev, fmode_t mode)
 {
        struct pd_unit *disk = bdev->bd_disk->private_data;
 
+       lock_kernel();
        disk->access++;
 
        if (disk->removable) {
                pd_special_command(disk, pd_media_check);
                pd_special_command(disk, pd_door_lock);
        }
+       unlock_kernel();
        return 0;
 }
 
@@ -768,8 +771,10 @@ static int pd_ioctl(struct block_device *bdev, fmode_t mode,
 
        switch (cmd) {
        case CDROMEJECT:
+               lock_kernel();
                if (disk->access == 1)
                        pd_special_command(disk, pd_eject);
+               unlock_kernel();
                return 0;
        default:
                return -EINVAL;
@@ -780,8 +785,10 @@ static int pd_release(struct gendisk *p, fmode_t mode)
 {
        struct pd_unit *disk = p->private_data;
 
+       lock_kernel();
        if (!--disk->access && disk->removable)
                pd_special_command(disk, pd_door_unlock);
+       unlock_kernel();
 
        return 0;
 }
@@ -812,7 +819,7 @@ static const struct block_device_operations pd_fops = {
        .owner          = THIS_MODULE,
        .open           = pd_open,
        .release        = pd_release,
-       .locked_ioctl   = pd_ioctl,
+       .ioctl          = pd_ioctl,
        .getgeo         = pd_getgeo,
        .media_changed  = pd_check_media,
        .revalidate_disk= pd_revalidate
index c059aab..4457b49 100644 (file)
@@ -152,6 +152,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_SLV, D_LUN, D_DLY};
 #include <linux/spinlock.h>
 #include <linux/blkdev.h>
 #include <linux/blkpg.h>
+#include <linux/smp_lock.h>
 #include <asm/uaccess.h>
 
 static DEFINE_SPINLOCK(pf_spin_lock);
@@ -266,7 +267,7 @@ static const struct block_device_operations pf_fops = {
        .owner          = THIS_MODULE,
        .open           = pf_open,
        .release        = pf_release,
-       .locked_ioctl   = pf_ioctl,
+       .ioctl          = pf_ioctl,
        .getgeo         = pf_getgeo,
        .media_changed  = pf_check_media,
 };
@@ -299,20 +300,26 @@ static void __init pf_init_units(void)
 static int pf_open(struct block_device *bdev, fmode_t mode)
 {
        struct pf_unit *pf = bdev->bd_disk->private_data;
+       int ret;
 
+       lock_kernel();
        pf_identify(pf);
 
+       ret = -ENODEV;
        if (pf->media_status == PF_NM)
-               return -ENODEV;
+               goto out;
 
+       ret = -EROFS;
        if ((pf->media_status == PF_RO) && (mode & FMODE_WRITE))
-               return -EROFS;
+               goto out;
 
+       ret = 0;
        pf->access++;
        if (pf->removable)
                pf_lock(pf, 1);
-
-       return 0;
+out:
+       unlock_kernel();
+       return ret;
 }
 
 static int pf_getgeo(struct block_device *bdev, struct hd_geometry *geo)
@@ -342,7 +349,10 @@ static int pf_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, u
 
        if (pf->access != 1)
                return -EBUSY;
+       lock_kernel();
        pf_eject(pf);
+       unlock_kernel();
+
        return 0;
 }
 
@@ -350,14 +360,18 @@ static int pf_release(struct gendisk *disk, fmode_t mode)
 {
        struct pf_unit *pf = disk->private_data;
 
-       if (pf->access <= 0)
+       lock_kernel();
+       if (pf->access <= 0) {
+               unlock_kernel();
                return -EINVAL;
+       }
 
        pf->access--;
 
        if (!pf->access && pf->removable)
                pf_lock(pf, 0);
 
+       unlock_kernel();
        return 0;
 
 }
index 8a549db..b1cbeb5 100644 (file)
@@ -57,6 +57,7 @@
 #include <linux/seq_file.h>
 #include <linux/miscdevice.h>
 #include <linux/freezer.h>
+#include <linux/smp_lock.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <scsi/scsi_cmnd.h>
@@ -1221,7 +1222,7 @@ static int pkt_start_recovery(struct packet_data *pkt)
        pkt->bio->bi_flags = 1 << BIO_UPTODATE;
        pkt->bio->bi_idx = 0;
 
-       BUG_ON(pkt->bio->bi_rw != (1 << BIO_RW));
+       BUG_ON(pkt->bio->bi_rw != REQ_WRITE);
        BUG_ON(pkt->bio->bi_vcnt != pkt->frames);
        BUG_ON(pkt->bio->bi_size != pkt->frames * CD_FRAMESIZE);
        BUG_ON(pkt->bio->bi_end_io != pkt_end_io_packet_write);
@@ -2382,6 +2383,7 @@ static int pkt_open(struct block_device *bdev, fmode_t mode)
 
        VPRINTK(DRIVER_NAME": entering open\n");
 
+       lock_kernel();
        mutex_lock(&ctl_mutex);
        pd = pkt_find_dev_from_minor(MINOR(bdev->bd_dev));
        if (!pd) {
@@ -2409,6 +2411,7 @@ static int pkt_open(struct block_device *bdev, fmode_t mode)
        }
 
        mutex_unlock(&ctl_mutex);
+       unlock_kernel();
        return 0;
 
 out_dec:
@@ -2416,6 +2419,7 @@ out_dec:
 out:
        VPRINTK(DRIVER_NAME": failed open (%d)\n", ret);
        mutex_unlock(&ctl_mutex);
+       unlock_kernel();
        return ret;
 }
 
@@ -2424,6 +2428,7 @@ static int pkt_close(struct gendisk *disk, fmode_t mode)
        struct pktcdvd_device *pd = disk->private_data;
        int ret = 0;
 
+       lock_kernel();
        mutex_lock(&ctl_mutex);
        pd->refcnt--;
        BUG_ON(pd->refcnt < 0);
@@ -2432,6 +2437,7 @@ static int pkt_close(struct gendisk *disk, fmode_t mode)
                pkt_release_dev(pd, flush);
        }
        mutex_unlock(&ctl_mutex);
+       unlock_kernel();
        return ret;
 }
 
@@ -2762,10 +2768,12 @@ out_mem:
 static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg)
 {
        struct pktcdvd_device *pd = bdev->bd_disk->private_data;
+       int ret;
 
        VPRINTK("pkt_ioctl: cmd %x, dev %d:%d\n", cmd,
                MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
 
+       lock_kernel();
        switch (cmd) {
        case CDROMEJECT:
                /*
@@ -2783,14 +2791,16 @@ static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
        case CDROM_LAST_WRITTEN:
        case CDROM_SEND_PACKET:
        case SCSI_IOCTL_SEND_COMMAND:
-               return __blkdev_driver_ioctl(pd->bdev, mode, cmd, arg);
+               ret = __blkdev_driver_ioctl(pd->bdev, mode, cmd, arg);
+               break;
 
        default:
                VPRINTK(DRIVER_NAME": Unknown ioctl for %s (%x)\n", pd->name, cmd);
-               return -ENOTTY;
+               ret = -ENOTTY;
        }
+       unlock_kernel();
 
-       return 0;
+       return ret;
 }
 
 static int pkt_media_changed(struct gendisk *disk)
@@ -2812,7 +2822,7 @@ static const struct block_device_operations pktcdvd_ops = {
        .owner =                THIS_MODULE,
        .open =                 pkt_open,
        .release =              pkt_close,
-       .locked_ioctl =         pkt_ioctl,
+       .ioctl =                pkt_ioctl,
        .media_changed =        pkt_media_changed,
 };
 
index 3b419e3..e9da874 100644 (file)
@@ -196,13 +196,12 @@ static void ps3disk_do_request(struct ps3_storage_device *dev,
        dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__);
 
        while ((req = blk_fetch_request(q))) {
-               if (blk_fs_request(req)) {
-                       if (ps3disk_submit_request_sg(dev, req))
-                               break;
-               } else if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
-                          req->cmd[0] == REQ_LB_OP_FLUSH) {
+               if (req->cmd_flags & REQ_FLUSH) {
                        if (ps3disk_submit_flush_request(dev, req))
                                break;
+               } else if (req->cmd_type == REQ_TYPE_FS) {
+                       if (ps3disk_submit_request_sg(dev, req))
+                               break;
                } else {
                        blk_dump_rq_flags(req, DEVICE_NAME " bad request");
                        __blk_end_request_all(req, -EIO);
@@ -257,8 +256,7 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
                return IRQ_HANDLED;
        }
 
-       if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
-           req->cmd[0] == REQ_LB_OP_FLUSH) {
+       if (req->cmd_flags & REQ_FLUSH) {
                read = 0;
                op = "flush";
        } else {
@@ -398,16 +396,6 @@ static int ps3disk_identify(struct ps3_storage_device *dev)
        return 0;
 }
 
-static void ps3disk_prepare_flush(struct request_queue *q, struct request *req)
-{
-       struct ps3_storage_device *dev = q->queuedata;
-
-       dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__);
-
-       req->cmd_type = REQ_TYPE_LINUX_BLOCK;
-       req->cmd[0] = REQ_LB_OP_FLUSH;
-}
-
 static unsigned long ps3disk_mask;
 
 static DEFINE_MUTEX(ps3disk_mask_mutex);
@@ -480,8 +468,7 @@ static int __devinit ps3disk_probe(struct ps3_system_bus_device *_dev)
        blk_queue_dma_alignment(queue, dev->blk_size-1);
        blk_queue_logical_block_size(queue, dev->blk_size);
 
-       blk_queue_ordered(queue, QUEUE_ORDERED_DRAIN_FLUSH,
-                         ps3disk_prepare_flush);
+       blk_queue_ordered(queue, QUEUE_ORDERED_DRAIN_FLUSH);
 
        blk_queue_max_segments(queue, -1);
        blk_queue_max_segment_size(queue, dev->bounce_size);
index e463657..2e46815 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/fd.h>
 #include <linux/slab.h>
 #include <linux/blkdev.h>
+#include <linux/smp_lock.h>
 #include <linux/hdreg.h>
 #include <linux/kernel.h>
 #include <linux/delay.h>
@@ -661,11 +662,23 @@ out:
        return err;
 }
 
+static int floppy_unlocked_open(struct block_device *bdev, fmode_t mode)
+{
+       int ret;
+
+       lock_kernel();
+       ret = floppy_open(bdev, mode);
+       unlock_kernel();
+
+       return ret;
+}
+
 static int floppy_release(struct gendisk *disk, fmode_t mode)
 {
        struct floppy_state *fs = disk->private_data;
        struct swim __iomem *base = fs->swd->base;
 
+       lock_kernel();
        if (fs->ref_count < 0)
                fs->ref_count = 0;
        else if (fs->ref_count > 0)
@@ -673,6 +686,7 @@ static int floppy_release(struct gendisk *disk, fmode_t mode)
 
        if (fs->ref_count == 0)
                swim_motor(base, OFF);
+       unlock_kernel();
 
        return 0;
 }
@@ -690,7 +704,9 @@ static int floppy_ioctl(struct block_device *bdev, fmode_t mode,
        case FDEJECT:
                if (fs->ref_count != 1)
                        return -EBUSY;
+               lock_kernel();
                err = floppy_eject(fs);
+               unlock_kernel();
                return err;
 
        case FDGETPRM:
@@ -751,9 +767,9 @@ static int floppy_revalidate(struct gendisk *disk)
 
 static const struct block_device_operations floppy_fops = {
        .owner           = THIS_MODULE,
-       .open            = floppy_open,
+       .open            = floppy_unlocked_open,
        .release         = floppy_release,
-       .locked_ioctl    = floppy_ioctl,
+       .ioctl           = floppy_ioctl,
        .getgeo          = floppy_getgeo,
        .media_changed   = floppy_check_change,
        .revalidate_disk = floppy_revalidate,
index ed6fb91..cc6a386 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/ioctl.h>
 #include <linux/blkdev.h>
 #include <linux/interrupt.h>
+#include <linux/smp_lock.h>
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <asm/io.h>
@@ -839,7 +840,7 @@ static int fd_eject(struct floppy_state *fs)
 static struct floppy_struct floppy_type =
        { 2880,18,2,80,0,0x1B,0x00,0xCF,0x6C,NULL };    /*  7 1.44MB 3.5"   */
 
-static int floppy_ioctl(struct block_device *bdev, fmode_t mode,
+static int floppy_locked_ioctl(struct block_device *bdev, fmode_t mode,
                        unsigned int cmd, unsigned long param)
 {
        struct floppy_state *fs = bdev->bd_disk->private_data;
@@ -867,6 +868,18 @@ static int floppy_ioctl(struct block_device *bdev, fmode_t mode,
        return -ENOTTY;
 }
 
+static int floppy_ioctl(struct block_device *bdev, fmode_t mode,
+                                unsigned int cmd, unsigned long param)
+{
+       int ret;
+
+       lock_kernel();
+       ret = floppy_locked_ioctl(bdev, mode, cmd, param);
+       unlock_kernel();
+
+       return ret;
+}
+
 static int floppy_open(struct block_device *bdev, fmode_t mode)
 {
        struct floppy_state *fs = bdev->bd_disk->private_data;
@@ -936,15 +949,28 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
        return 0;
 }
 
+static int floppy_unlocked_open(struct block_device *bdev, fmode_t mode)
+{
+       int ret;
+
+       lock_kernel();
+       ret = floppy_open(bdev, mode);
+       unlock_kernel();
+
+       return ret;
+}
+
 static int floppy_release(struct gendisk *disk, fmode_t mode)
 {
        struct floppy_state *fs = disk->private_data;
        struct swim3 __iomem *sw = fs->swim3;
+       lock_kernel();
        if (fs->ref_count > 0 && --fs->ref_count == 0) {
                swim3_action(fs, MOTOR_OFF);
                out_8(&sw->control_bic, 0xff);
                swim3_select(fs, RELAX);
        }
+       unlock_kernel();
        return 0;
 }
 
@@ -995,9 +1021,9 @@ static int floppy_revalidate(struct gendisk *disk)
 }
 
 static const struct block_device_operations floppy_fops = {
-       .open           = floppy_open,
+       .open           = floppy_unlocked_open,
        .release        = floppy_release,
-       .locked_ioctl   = floppy_ioctl,
+       .ioctl          = floppy_ioctl,
        .media_changed  = floppy_check_change,
        .revalidate_disk= floppy_revalidate,
 };
index 0536b5b..c48e148 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/timer.h>
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <scsi/scsi.h>
 
 #define DRV_NAME "ub"
@@ -648,7 +649,7 @@ static int ub_request_fn_1(struct ub_lun *lun, struct request *rq)
                return 0;
        }
 
-       if (lun->changed && !blk_pc_request(rq)) {
+       if (lun->changed && rq->cmd_type != REQ_TYPE_BLOCK_PC) {
                blk_start_request(rq);
                ub_end_rq(rq, SAM_STAT_CHECK_CONDITION);
                return 0;
@@ -684,7 +685,7 @@ static int ub_request_fn_1(struct ub_lun *lun, struct request *rq)
        }
        urq->nsg = n_elem;
 
-       if (blk_pc_request(rq)) {
+       if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
                ub_cmd_build_packet(sc, lun, cmd, urq);
        } else {
                ub_cmd_build_block(sc, lun, cmd, urq);
@@ -781,7 +782,7 @@ static void ub_rw_cmd_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
        rq = urq->rq;
 
        if (cmd->error == 0) {
-               if (blk_pc_request(rq)) {
+               if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
                        if (cmd->act_len >= rq->resid_len)
                                rq->resid_len = 0;
                        else
@@ -795,7 +796,7 @@ static void ub_rw_cmd_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
                        }
                }
        } else {
-               if (blk_pc_request(rq)) {
+               if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
                        /* UB_SENSE_SIZE is smaller than SCSI_SENSE_BUFFERSIZE */
                        memcpy(rq->sense, sc->top_sense, UB_SENSE_SIZE);
                        rq->sense_len = UB_SENSE_SIZE;
@@ -1710,6 +1711,18 @@ err_open:
        return rc;
 }
 
+static int ub_bd_unlocked_open(struct block_device *bdev, fmode_t mode)
+{
+       int ret;
+
+       lock_kernel();
+       ret = ub_bd_open(bdev, mode);
+       unlock_kernel();
+
+       return ret;
+}
+
+
 /*
  */
 static int ub_bd_release(struct gendisk *disk, fmode_t mode)
@@ -1717,7 +1730,10 @@ static int ub_bd_release(struct gendisk *disk, fmode_t mode)
        struct ub_lun *lun = disk->private_data;
        struct ub_dev *sc = lun->udev;
 
+       lock_kernel();
        ub_put(sc);
+       unlock_kernel();
+
        return 0;
 }
 
@@ -1729,8 +1745,13 @@ static int ub_bd_ioctl(struct block_device *bdev, fmode_t mode,
 {
        struct gendisk *disk = bdev->bd_disk;
        void __user *usermem = (void __user *) arg;
+       int ret;
+
+       lock_kernel();
+       ret = scsi_cmd_ioctl(disk->queue, disk, mode, cmd, usermem);
+       unlock_kernel();
 
-       return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, usermem);
+       return ret;
 }
 
 /*
@@ -1792,9 +1813,9 @@ static int ub_bd_media_changed(struct gendisk *disk)
 
 static const struct block_device_operations ub_bd_fops = {
        .owner          = THIS_MODULE,
-       .open           = ub_bd_open,
+       .open           = ub_bd_unlocked_open,
        .release        = ub_bd_release,
-       .locked_ioctl   = ub_bd_ioctl,
+       .ioctl          = ub_bd_ioctl,
        .media_changed  = ub_bd_media_changed,
        .revalidate_disk = ub_bd_revalidate,
 };
index 2f9470f..8be5715 100644 (file)
@@ -478,7 +478,7 @@ static void process_page(unsigned long data)
                                le32_to_cpu(desc->local_addr)>>9,
                                le32_to_cpu(desc->transfer_size));
                        dump_dmastat(card, control);
-               } else if (test_bit(BIO_RW, &bio->bi_rw) &&
+               } else if ((bio->bi_rw & REQ_WRITE) &&
                           le32_to_cpu(desc->local_addr) >> 9 ==
                                card->init_size) {
                        card->init_size += le32_to_cpu(desc->transfer_size) >> 9;
index 788d938..f651e51 100644 (file)
@@ -41,6 +41,7 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/string.h>
+#include <linux/smp_lock.h>
 #include <linux/dma-mapping.h>
 #include <linux/completion.h>
 #include <linux/device.h>
@@ -175,6 +176,18 @@ static int viodasd_open(struct block_device *bdev, fmode_t mode)
        return 0;
 }
 
+static int viodasd_unlocked_open(struct block_device *bdev, fmode_t mode)
+{
+       int ret;
+
+       lock_kernel();
+       ret = viodasd_open(bdev, mode);
+       unlock_kernel();
+
+       return ret;
+}
+
+
 /*
  * External release entry point.
  */
@@ -183,6 +196,7 @@ static int viodasd_release(struct gendisk *disk, fmode_t mode)
        struct viodasd_device *d = disk->private_data;
        HvLpEvent_Rc hvrc;
 
+       lock_kernel();
        /* Send the event to OS/400.  We DON'T expect a response */
        hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
                        HvLpEvent_Type_VirtualIo,
@@ -195,6 +209,9 @@ static int viodasd_release(struct gendisk *disk, fmode_t mode)
                        0, 0, 0);
        if (hvrc != 0)
                pr_warning("HV close call failed %d\n", (int)hvrc);
+
+       unlock_kernel();
+
        return 0;
 }
 
@@ -219,7 +236,7 @@ static int viodasd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
  */
 static const struct block_device_operations viodasd_fops = {
        .owner = THIS_MODULE,
-       .open = viodasd_open,
+       .open = viodasd_unlocked_open,
        .release = viodasd_release,
        .getgeo = viodasd_getgeo,
 };
@@ -361,7 +378,7 @@ static void do_viodasd_request(struct request_queue *q)
                if (req == NULL)
                        return;
                /* check that request contains a valid command */
-               if (!blk_fs_request(req)) {
+               if (req->cmd_type != REQ_TYPE_FS) {
                        viodasd_end_request(req, -EIO, blk_rq_sectors(req));
                        continue;
                }
index 23b7c48..2aafafc 100644 (file)
@@ -2,6 +2,7 @@
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/blkdev.h>
+#include <linux/smp_lock.h>
 #include <linux/hdreg.h>
 #include <linux/virtio.h>
 #include <linux/virtio_blk.h>
@@ -65,13 +66,18 @@ static void blk_done(struct virtqueue *vq)
                        break;
                }
 
-               if (blk_pc_request(vbr->req)) {
+               switch (vbr->req->cmd_type) {
+               case REQ_TYPE_BLOCK_PC:
                        vbr->req->resid_len = vbr->in_hdr.residual;
                        vbr->req->sense_len = vbr->in_hdr.sense_len;
                        vbr->req->errors = vbr->in_hdr.errors;
-               }
-               if (blk_special_request(vbr->req))
+                       break;
+               case REQ_TYPE_SPECIAL:
                        vbr->req->errors = (error != 0);
+                       break;
+               default:
+                       break;
+               }
 
                __blk_end_request_all(vbr->req, error);
                list_del(&vbr->list);
@@ -94,36 +100,35 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
                return false;
 
        vbr->req = req;
-       switch (req->cmd_type) {
-       case REQ_TYPE_FS:
-               vbr->out_hdr.type = 0;
-               vbr->out_hdr.sector = blk_rq_pos(vbr->req);
-               vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
-               break;
-       case REQ_TYPE_BLOCK_PC:
-               vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
-               vbr->out_hdr.sector = 0;
-               vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
-               break;
-       case REQ_TYPE_SPECIAL:
-               vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID;
+
+       if (req->cmd_flags & REQ_FLUSH) {
+               vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
                vbr->out_hdr.sector = 0;
                vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
-               break;
-       case REQ_TYPE_LINUX_BLOCK:
-               if (req->cmd[0] == REQ_LB_OP_FLUSH) {
-                       vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
+       } else {
+               switch (req->cmd_type) {
+               case REQ_TYPE_FS:
+                       vbr->out_hdr.type = 0;
+                       vbr->out_hdr.sector = blk_rq_pos(vbr->req);
+                       vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
+                       break;
+               case REQ_TYPE_BLOCK_PC:
+                       vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
                        vbr->out_hdr.sector = 0;
                        vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
                        break;
+               case REQ_TYPE_SPECIAL:
+                       vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID;
+                       vbr->out_hdr.sector = 0;
+                       vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
+                       break;
+               default:
+                       /* We don't put anything else in the queue. */
+                       BUG();
                }
-               /*FALLTHRU*/
-       default:
-               /* We don't put anything else in the queue. */
-               BUG();
        }
 
-       if (blk_barrier_rq(vbr->req))
+       if (vbr->req->cmd_flags & REQ_HARDBARRIER)
                vbr->out_hdr.type |= VIRTIO_BLK_T_BARRIER;
 
        sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
@@ -134,12 +139,12 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
         * block, and before the normal inhdr we put the sense data and the
         * inhdr with additional status information before the normal inhdr.
         */
-       if (blk_pc_request(vbr->req))
+       if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC)
                sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len);
 
        num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
 
-       if (blk_pc_request(vbr->req)) {
+       if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) {
                sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, 96);
                sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
                           sizeof(vbr->in_hdr));
@@ -190,12 +195,6 @@ static void do_virtblk_request(struct request_queue *q)
                virtqueue_kick(vblk->vq);
 }
 
-static void virtblk_prepare_flush(struct request_queue *q, struct request *req)
-{
-       req->cmd_type = REQ_TYPE_LINUX_BLOCK;
-       req->cmd[0] = REQ_LB_OP_FLUSH;
-}
-
 /* return id (s/n) string for *disk to *id_str
  */
 static int virtblk_get_id(struct gendisk *disk, char *id_str)
@@ -219,7 +218,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
        return blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
 }
 
-static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
+static int virtblk_locked_ioctl(struct block_device *bdev, fmode_t mode,
                         unsigned cmd, unsigned long data)
 {
        struct gendisk *disk = bdev->bd_disk;
@@ -235,6 +234,18 @@ static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
                              (void __user *)data);
 }
 
+static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
+                            unsigned int cmd, unsigned long param)
+{
+       int ret;
+
+       lock_kernel();
+       ret = virtblk_locked_ioctl(bdev, mode, cmd, param);
+       unlock_kernel();
+
+       return ret;
+}
+
 /* We provide getgeo only to please some old bootloader/partitioning tools */
 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
 {
@@ -261,7 +272,7 @@ static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
 }
 
 static const struct block_device_operations virtblk_fops = {
-       .locked_ioctl = virtblk_ioctl,
+       .ioctl  = virtblk_ioctl,
        .owner  = THIS_MODULE,
        .getgeo = virtblk_getgeo,
 };
@@ -383,8 +394,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
                 * flushing a volatile write cache on the host.  Use that
                 * to implement write barrier support.
                 */
-               blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH,
-                                 virtblk_prepare_flush);
+               blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH);
        } else if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER)) {
                /*
                 * If the BARRIER feature is supported the host expects us
@@ -393,7 +403,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
                 * never re-orders outstanding I/O.  This feature is not
                 * useful for real life scenarious and deprecated.
                 */
-               blk_queue_ordered(q, QUEUE_ORDERED_TAG, NULL);
+               blk_queue_ordered(q, QUEUE_ORDERED_TAG);
        } else {
                /*
                 * If the FLUSH feature is not supported we must assume that
@@ -401,7 +411,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
                 * caching. We still need to drain the queue to provider
                 * proper barrier semantics.
                 */
-               blk_queue_ordered(q, QUEUE_ORDERED_DRAIN, NULL);
+               blk_queue_ordered(q, QUEUE_ORDERED_DRAIN);
        }
 
        /* If disk is read-only in the host, the guest should obey */
index 18a80ff..d5a3cd7 100644 (file)
@@ -46,6 +46,7 @@
 #include <linux/init.h>
 #include <linux/wait.h>
 #include <linux/blkdev.h>
+#include <linux/smp_lock.h>
 #include <linux/blkpg.h>
 #include <linux/delay.h>
 #include <linux/io.h>
@@ -133,7 +134,7 @@ static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 
 static const struct block_device_operations xd_fops = {
        .owner  = THIS_MODULE,
-       .locked_ioctl   = xd_ioctl,
+       .ioctl  = xd_ioctl,
        .getgeo = xd_getgeo,
 };
 static DECLARE_WAIT_QUEUE_HEAD(xd_wait_int);
@@ -322,7 +323,7 @@ static void do_xd_request (struct request_queue * q)
                int res = -EIO;
                int retry;
 
-               if (!blk_fs_request(req))
+               if (req->cmd_type != REQ_TYPE_FS)
                        goto done;
                if (block + count > get_capacity(req->rq_disk))
                        goto done;
@@ -347,7 +348,7 @@ static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 }
 
 /* xd_ioctl: handle device ioctl's */
-static int xd_ioctl(struct block_device *bdev, fmode_t mode, u_int cmd, u_long arg)
+static int xd_locked_ioctl(struct block_device *bdev, fmode_t mode, u_int cmd, u_long arg)
 {
        switch (cmd) {
                case HDIO_SET_DMA:
@@ -375,6 +376,18 @@ static int xd_ioctl(struct block_device *bdev, fmode_t mode, u_int cmd, u_long a
        }
 }
 
+static int xd_ioctl(struct block_device *bdev, fmode_t mode,
+                            unsigned int cmd, unsigned long param)
+{
+       int ret;
+
+       lock_kernel();
+       ret = xd_locked_ioctl(bdev, mode, cmd, param);
+       unlock_kernel();
+
+       return ret;
+}
+
 /* xd_readwrite: handle a read/write request */
 static int xd_readwrite (u_char operation,XD_INFO *p,char *buffer,u_int block,u_int count)
 {
index f63ac3d..ac1b682 100644 (file)
@@ -41,6 +41,7 @@
 #include <linux/cdrom.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/scatterlist.h>
 
 #include <xen/xen.h>
@@ -79,6 +80,7 @@ static const struct block_device_operations xlvbd_block_fops;
  */
 struct blkfront_info
 {
+       struct mutex mutex;
        struct xenbus_device *xbdev;
        struct gendisk *gd;
        int vdevice;
@@ -95,16 +97,14 @@ struct blkfront_info
        unsigned long shadow_free;
        int feature_barrier;
        int is_ready;
-
-       /**
-        * The number of people holding this device open.  We won't allow a
-        * hot-unplug unless this is 0.
-        */
-       int users;
 };
 
 static DEFINE_SPINLOCK(blkif_io_lock);
 
+static unsigned int nr_minors;
+static unsigned long *minors;
+static DEFINE_SPINLOCK(minor_lock);
+
 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
        (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
 #define GRANT_INVALID_REF      0
@@ -139,6 +139,55 @@ static void add_id_to_freelist(struct blkfront_info *info,
        info->shadow_free = id;
 }
 
+static int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
+{
+       unsigned int end = minor + nr;
+       int rc;
+
+       if (end > nr_minors) {
+               unsigned long *bitmap, *old;
+
+               bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap),
+                                GFP_KERNEL);
+               if (bitmap == NULL)
+                       return -ENOMEM;
+
+               spin_lock(&minor_lock);
+               if (end > nr_minors) {
+                       old = minors;
+                       memcpy(bitmap, minors,
+                              BITS_TO_LONGS(nr_minors) * sizeof(*bitmap));
+                       minors = bitmap;
+                       nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG;
+               } else
+                       old = bitmap;
+               spin_unlock(&minor_lock);
+               kfree(old);
+       }
+
+       spin_lock(&minor_lock);
+       if (find_next_bit(minors, end, minor) >= end) {
+               for (; minor < end; ++minor)
+                       __set_bit(minor, minors);
+               rc = 0;
+       } else
+               rc = -EBUSY;
+       spin_unlock(&minor_lock);
+
+       return rc;
+}
+
+static void xlbd_release_minors(unsigned int minor, unsigned int nr)
+{
+       unsigned int end = minor + nr;
+
+       BUG_ON(end > nr_minors);
+       spin_lock(&minor_lock);
+       for (; minor < end; ++minor)
+               __clear_bit(minor, minors);
+       spin_unlock(&minor_lock);
+}
+
 static void blkif_restart_queue_callback(void *arg)
 {
        struct blkfront_info *info = (struct blkfront_info *)arg;
@@ -239,7 +288,7 @@ static int blkif_queue_request(struct request *req)
 
        ring_req->operation = rq_data_dir(req) ?
                BLKIF_OP_WRITE : BLKIF_OP_READ;
-       if (blk_barrier_rq(req))
+       if (req->cmd_flags & REQ_HARDBARRIER)
                ring_req->operation = BLKIF_OP_WRITE_BARRIER;
 
        ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
@@ -310,7 +359,7 @@ static void do_blkif_request(struct request_queue *rq)
 
                blk_start_request(req);
 
-               if (!blk_fs_request(req)) {
+               if (req->cmd_type != REQ_TYPE_FS) {
                        __blk_end_request_all(req, -EIO);
                        continue;
                }
@@ -372,17 +421,22 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 static int xlvbd_barrier(struct blkfront_info *info)
 {
        int err;
+       const char *barrier;
 
-       err = blk_queue_ordered(info->rq,
-                               info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE,
-                               NULL);
+       switch (info->feature_barrier) {
+       case QUEUE_ORDERED_DRAIN:       barrier = "enabled (drain)"; break;
+       case QUEUE_ORDERED_TAG:         barrier = "enabled (tag)"; break;
+       case QUEUE_ORDERED_NONE:        barrier = "disabled"; break;
+       default:                        return -EINVAL;
+       }
+
+       err = blk_queue_ordered(info->rq, info->feature_barrier);
 
        if (err)
                return err;
 
        printk(KERN_INFO "blkfront: %s: barriers %s\n",
-              info->gd->disk_name,
-              info->feature_barrier ? "enabled" : "disabled");
+              info->gd->disk_name, barrier);
        return 0;
 }
 
@@ -418,9 +472,14 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
        if ((minor % nr_parts) == 0)
                nr_minors = nr_parts;
 
+       err = xlbd_reserve_minors(minor, nr_minors);
+       if (err)
+               goto out;
+       err = -ENODEV;
+
        gd = alloc_disk(nr_minors);
        if (gd == NULL)
-               goto out;
+               goto release;
 
        offset = minor / nr_parts;
 
@@ -451,14 +510,13 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 
        if (xlvbd_init_blk_queue(gd, sector_size)) {
                del_gendisk(gd);
-               goto out;
+               goto release;
        }
 
        info->rq = gd->queue;
        info->gd = gd;
 
-       if (info->feature_barrier)
-               xlvbd_barrier(info);
+       xlvbd_barrier(info);
 
        if (vdisk_info & VDISK_READONLY)
                set_disk_ro(gd, 1);
@@ -471,10 +529,45 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 
        return 0;
 
+ release:
+       xlbd_release_minors(minor, nr_minors);
  out:
        return err;
 }
 
+static void xlvbd_release_gendisk(struct blkfront_info *info)
+{
+       unsigned int minor, nr_minors;
+       unsigned long flags;
+
+       if (info->rq == NULL)
+               return;
+
+       spin_lock_irqsave(&blkif_io_lock, flags);
+
+       /* No more blkif_request(). */
+       blk_stop_queue(info->rq);
+
+       /* No more gnttab callback work. */
+       gnttab_cancel_free_callback(&info->callback);
+       spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+       /* Flush gnttab callback work. Must be done with no locks held. */
+       flush_scheduled_work();
+
+       del_gendisk(info->gd);
+
+       minor = info->gd->first_minor;
+       nr_minors = info->gd->minors;
+       xlbd_release_minors(minor, nr_minors);
+
+       blk_cleanup_queue(info->rq);
+       info->rq = NULL;
+
+       put_disk(info->gd);
+       info->gd = NULL;
+}
+
 static void kick_pending_request_queues(struct blkfront_info *info)
 {
        if (!RING_FULL(&info->ring)) {
@@ -569,7 +662,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
                                printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
                                       info->gd->disk_name);
                                error = -EOPNOTSUPP;
-                               info->feature_barrier = 0;
+                               info->feature_barrier = QUEUE_ORDERED_NONE;
                                xlvbd_barrier(info);
                        }
                        /* fall through */
@@ -652,7 +745,7 @@ fail:
 
 
 /* Common code used when first setting up, and when resuming. */
-static int talk_to_backend(struct xenbus_device *dev,
+static int talk_to_blkback(struct xenbus_device *dev,
                           struct blkfront_info *info)
 {
        const char *message = NULL;
@@ -712,7 +805,6 @@ again:
        return err;
 }
 
-
 /**
  * Entry point to this code when a new device is created.  Allocate the basic
  * structures and the ring buffer for communication with the backend, and
@@ -773,6 +865,7 @@ static int blkfront_probe(struct xenbus_device *dev,
                return -ENOMEM;
        }
 
+       mutex_init(&info->mutex);
        info->xbdev = dev;
        info->vdevice = vdevice;
        info->connected = BLKIF_STATE_DISCONNECTED;
@@ -786,7 +879,7 @@ static int blkfront_probe(struct xenbus_device *dev,
        info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
        dev_set_drvdata(&dev->dev, info);
 
-       err = talk_to_backend(dev, info);
+       err = talk_to_blkback(dev, info);
        if (err) {
                kfree(info);
                dev_set_drvdata(&dev->dev, NULL);
@@ -881,13 +974,50 @@ static int blkfront_resume(struct xenbus_device *dev)
 
        blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
 
-       err = talk_to_backend(dev, info);
+       err = talk_to_blkback(dev, info);
        if (info->connected == BLKIF_STATE_SUSPENDED && !err)
                err = blkif_recover(info);
 
        return err;
 }
 
+static void
+blkfront_closing(struct blkfront_info *info)
+{
+       struct xenbus_device *xbdev = info->xbdev;
+       struct block_device *bdev = NULL;
+
+       mutex_lock(&info->mutex);
+
+       if (xbdev->state == XenbusStateClosing) {
+               mutex_unlock(&info->mutex);
+               return;
+       }
+
+       if (info->gd)
+               bdev = bdget_disk(info->gd, 0);
+
+       mutex_unlock(&info->mutex);
+
+       if (!bdev) {
+               xenbus_frontend_closed(xbdev);
+               return;
+       }
+
+       mutex_lock(&bdev->bd_mutex);
+
+       if (bdev->bd_openers) {
+               xenbus_dev_error(xbdev, -EBUSY,
+                                "Device in use; refusing to close");
+               xenbus_switch_state(xbdev, XenbusStateClosing);
+       } else {
+               xlvbd_release_gendisk(info);
+               xenbus_frontend_closed(xbdev);
+       }
+
+       mutex_unlock(&bdev->bd_mutex);
+       bdput(bdev);
+}
 
 /*
  * Invoked when the backend is finally 'ready' (and has told produced
@@ -899,11 +1029,31 @@ static void blkfront_connect(struct blkfront_info *info)
        unsigned long sector_size;
        unsigned int binfo;
        int err;
-
-       if ((info->connected == BLKIF_STATE_CONNECTED) ||
-           (info->connected == BLKIF_STATE_SUSPENDED) )
+       int barrier;
+
+       switch (info->connected) {
+       case BLKIF_STATE_CONNECTED:
+               /*
+                * Potentially, the back-end may be signalling
+                * a capacity change; update the capacity.
+                */
+               err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+                                  "sectors", "%Lu", &sectors);
+               if (XENBUS_EXIST_ERR(err))
+                       return;
+               printk(KERN_INFO "Setting capacity to %Lu\n",
+                      sectors);
+               set_capacity(info->gd, sectors);
+               revalidate_disk(info->gd);
+
+               /* fall through */
+       case BLKIF_STATE_SUSPENDED:
                return;
 
+       default:
+               break;
+       }
+
        dev_dbg(&info->xbdev->dev, "%s:%s.\n",
                __func__, info->xbdev->otherend);
 
@@ -920,10 +1070,26 @@ static void blkfront_connect(struct blkfront_info *info)
        }
 
        err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
-                           "feature-barrier", "%lu", &info->feature_barrier,
+                           "feature-barrier", "%lu", &barrier,
                            NULL);
+
+       /*
+        * If there's no "feature-barrier" defined, then it means
+        * we're dealing with a very old backend which writes
+        * synchronously; draining will do what needs to get done.
+        *
+        * If there are barriers, then we can do full queued writes
+        * with tagged barriers.
+        *
+        * If barriers are not supported, then there's no much we can
+        * do, so just set ordering to NONE.
+        */
        if (err)
-               info->feature_barrier = 0;
+               info->feature_barrier = QUEUE_ORDERED_DRAIN;
+       else if (barrier)
+               info->feature_barrier = QUEUE_ORDERED_TAG;
+       else
+               info->feature_barrier = QUEUE_ORDERED_NONE;
 
        err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
        if (err) {
@@ -945,53 +1111,15 @@ static void blkfront_connect(struct blkfront_info *info)
        info->is_ready = 1;
 }
 
-/**
- * Handle the change of state of the backend to Closing.  We must delete our
- * device-layer structures now, to ensure that writes are flushed through to
- * the backend.  Once is this done, we can switch to Closed in
- * acknowledgement.
- */
-static void blkfront_closing(struct xenbus_device *dev)
-{
-       struct blkfront_info *info = dev_get_drvdata(&dev->dev);
-       unsigned long flags;
-
-       dev_dbg(&dev->dev, "blkfront_closing: %s removed\n", dev->nodename);
-
-       if (info->rq == NULL)
-               goto out;
-
-       spin_lock_irqsave(&blkif_io_lock, flags);
-
-       /* No more blkif_request(). */
-       blk_stop_queue(info->rq);
-
-       /* No more gnttab callback work. */
-       gnttab_cancel_free_callback(&info->callback);
-       spin_unlock_irqrestore(&blkif_io_lock, flags);
-
-       /* Flush gnttab callback work. Must be done with no locks held. */
-       flush_scheduled_work();
-
-       blk_cleanup_queue(info->rq);
-       info->rq = NULL;
-
-       del_gendisk(info->gd);
-
- out:
-       xenbus_frontend_closed(dev);
-}
-
 /**
  * Callback received when the backend's state changes.
  */
-static void backend_changed(struct xenbus_device *dev,
+static void blkback_changed(struct xenbus_device *dev,
                            enum xenbus_state backend_state)
 {
        struct blkfront_info *info = dev_get_drvdata(&dev->dev);
-       struct block_device *bd;
 
-       dev_dbg(&dev->dev, "blkfront:backend_changed.\n");
+       dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state);
 
        switch (backend_state) {
        case XenbusStateInitialising:
@@ -1006,35 +1134,56 @@ static void backend_changed(struct xenbus_device *dev,
                break;
 
        case XenbusStateClosing:
-               if (info->gd == NULL) {
-                       xenbus_frontend_closed(dev);
-                       break;
-               }
-               bd = bdget_disk(info->gd, 0);
-               if (bd == NULL)
-                       xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
-
-               mutex_lock(&bd->bd_mutex);
-               if (info->users > 0)
-                       xenbus_dev_error(dev, -EBUSY,
-                                        "Device in use; refusing to close");
-               else
-                       blkfront_closing(dev);
-               mutex_unlock(&bd->bd_mutex);
-               bdput(bd);
+               blkfront_closing(info);
                break;
        }
 }
 
-static int blkfront_remove(struct xenbus_device *dev)
+static int blkfront_remove(struct xenbus_device *xbdev)
 {
-       struct blkfront_info *info = dev_get_drvdata(&dev->dev);
+       struct blkfront_info *info = dev_get_drvdata(&xbdev->dev);
+       struct block_device *bdev = NULL;
+       struct gendisk *disk;
 
-       dev_dbg(&dev->dev, "blkfront_remove: %s removed\n", dev->nodename);
+       dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename);
 
        blkif_free(info, 0);
 
-       kfree(info);
+       mutex_lock(&info->mutex);
+
+       disk = info->gd;
+       if (disk)
+               bdev = bdget_disk(disk, 0);
+
+       info->xbdev = NULL;
+       mutex_unlock(&info->mutex);
+
+       if (!bdev) {
+               kfree(info);
+               return 0;
+       }
+
+       /*
+        * The xbdev was removed before we reached the Closed
+        * state. See if it's safe to remove the disk. If the bdev
+        * isn't closed yet, we let release take care of it.
+        */
+
+       mutex_lock(&bdev->bd_mutex);
+       info = disk->private_data;
+
+       dev_warn(disk_to_dev(disk),
+                "%s was hot-unplugged, %d stale handles\n",
+                xbdev->nodename, bdev->bd_openers);
+
+       if (info && !bdev->bd_openers) {
+               xlvbd_release_gendisk(info);
+               disk->private_data = NULL;
+               kfree(info);
+       }
+
+       mutex_unlock(&bdev->bd_mutex);
+       bdput(bdev);
 
        return 0;
 }
@@ -1043,30 +1192,78 @@ static int blkfront_is_ready(struct xenbus_device *dev)
 {
        struct blkfront_info *info = dev_get_drvdata(&dev->dev);
 
-       return info->is_ready;
+       return info->is_ready && info->xbdev;
 }
 
 static int blkif_open(struct block_device *bdev, fmode_t mode)
 {
-       struct blkfront_info *info = bdev->bd_disk->private_data;
-       info->users++;
-       return 0;
+       struct gendisk *disk = bdev->bd_disk;
+       struct blkfront_info *info;
+       int err = 0;
+
+       lock_kernel();
+
+       info = disk->private_data;
+       if (!info) {
+               /* xbdev gone */
+               err = -ERESTARTSYS;
+               goto out;
+       }
+
+       mutex_lock(&info->mutex);
+
+       if (!info->gd)
+               /* xbdev is closed */
+               err = -ERESTARTSYS;
+
+       mutex_unlock(&info->mutex);
+
+out:
+       unlock_kernel();
+       return err;
 }
 
 static int blkif_release(struct gendisk *disk, fmode_t mode)
 {
        struct blkfront_info *info = disk->private_data;
-       info->users--;
-       if (info->users == 0) {
-               /* Check whether we have been instructed to close.  We will
-                  have ignored this request initially, as the device was
-                  still mounted. */
-               struct xenbus_device *dev = info->xbdev;
-               enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
-
-               if (state == XenbusStateClosing && info->is_ready)
-                       blkfront_closing(dev);
+       struct block_device *bdev;
+       struct xenbus_device *xbdev;
+
+       lock_kernel();
+
+       bdev = bdget_disk(disk, 0);
+       bdput(bdev);
+
+       if (bdev->bd_openers)
+               goto out;
+
+       /*
+        * Check if we have been instructed to close. We will have
+        * deferred this request, because the bdev was still open.
+        */
+
+       mutex_lock(&info->mutex);
+       xbdev = info->xbdev;
+
+       if (xbdev && xbdev->state == XenbusStateClosing) {
+               /* pending switch to state closed */
+               dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
+               xlvbd_release_gendisk(info);
+               xenbus_frontend_closed(info->xbdev);
+       }
+
+       mutex_unlock(&info->mutex);
+
+       if (!xbdev) {
+               /* sudden device removal */
+               dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
+               xlvbd_release_gendisk(info);
+               disk->private_data = NULL;
+               kfree(info);
        }
+
+out:
+       unlock_kernel();
        return 0;
 }
 
@@ -1076,7 +1273,7 @@ static const struct block_device_operations xlvbd_block_fops =
        .open = blkif_open,
        .release = blkif_release,
        .getgeo = blkif_getgeo,
-       .locked_ioctl = blkif_ioctl,
+       .ioctl = blkif_ioctl,
 };
 
 
@@ -1092,7 +1289,7 @@ static struct xenbus_driver blkfront = {
        .probe = blkfront_probe,
        .remove = blkfront_remove,
        .resume = blkfront_resume,
-       .otherend_changed = backend_changed,
+       .otherend_changed = blkback_changed,
        .is_ready = blkfront_is_ready,
 };
 
index a7b83c0..b71888b 100644 (file)
@@ -89,6 +89,7 @@
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/blkdev.h>
+#include <linux/smp_lock.h>
 #include <linux/ata.h>
 #include <linux/hdreg.h>
 #include <linux/platform_device.h>
@@ -465,7 +466,7 @@ struct request *ace_get_next_request(struct request_queue * q)
        struct request *req;
 
        while ((req = blk_peek_request(q)) != NULL) {
-               if (blk_fs_request(req))
+               if (req->cmd_type == REQ_TYPE_FS)
                        break;
                blk_start_request(req);
                __blk_end_request_all(req, -EIO);
@@ -901,11 +902,14 @@ static int ace_open(struct block_device *bdev, fmode_t mode)
 
        dev_dbg(ace->dev, "ace_open() users=%i\n", ace->users + 1);
 
+       lock_kernel();
        spin_lock_irqsave(&ace->lock, flags);
        ace->users++;
        spin_unlock_irqrestore(&ace->lock, flags);
 
        check_disk_change(bdev);
+       unlock_kernel();
+
        return 0;
 }
 
@@ -917,6 +921,7 @@ static int ace_release(struct gendisk *disk, fmode_t mode)
 
        dev_dbg(ace->dev, "ace_release() users=%i\n", ace->users - 1);
 
+       lock_kernel();
        spin_lock_irqsave(&ace->lock, flags);
        ace->users--;
        if (ace->users == 0) {
@@ -924,6 +929,7 @@ static int ace_release(struct gendisk *disk, fmode_t mode)
                ace_out(ace, ACE_CTRL, val & ~ACE_CTRL_LOCKREQ);
        }
        spin_unlock_irqrestore(&ace->lock, flags);
+       unlock_kernel();
        return 0;
 }
 
index 9114654..d75b2bb 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/module.h>
 #include <linux/blkdev.h>
 #include <linux/bitops.h>
+#include <linux/smp_lock.h>
 #include <linux/slab.h>
 
 #include <asm/setup.h>
@@ -153,6 +154,7 @@ static int z2_open(struct block_device *bdev, fmode_t mode)
 
     device = MINOR(bdev->bd_dev);
 
+    lock_kernel();
     if ( current_device != -1 && current_device != device )
     {
        rc = -EBUSY;
@@ -294,20 +296,25 @@ static int z2_open(struct block_device *bdev, fmode_t mode)
        set_capacity(z2ram_gendisk, z2ram_size >> 9);
     }
 
+    unlock_kernel();
     return 0;
 
 err_out_kfree:
     kfree(z2ram_map);
 err_out:
+    unlock_kernel();
     return rc;
 }
 
 static int
 z2_release(struct gendisk *disk, fmode_t mode)
 {
-    if ( current_device == -1 )
-       return 0;     
-
+    lock_kernel();
+    if ( current_device == -1 ) {
+       unlock_kernel();
+       return 0;
+    }
+    unlock_kernel();
     /*
      * FIXME: unmap memory
      */
index e3749d0..af13c62 100644 (file)
 
 -------------------------------------------------------------------------*/
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #define REVISION "Revision: 3.20"
 #define VERSION "Id: cdrom.c 3.20 2003/12/17"
 
@@ -314,11 +316,17 @@ static const char *mrw_format_status[] = {
 static const char *mrw_address_space[] = { "DMA", "GAA" };
 
 #if (ERRLOGMASK!=CD_NOTHING)
-#define cdinfo(type, fmt, args...) \
-        if ((ERRLOGMASK & type) || debug==1 ) \
-            printk(KERN_INFO "cdrom: " fmt, ## args)
+#define cdinfo(type, fmt, args...)                     \
+do {                                                   \
+       if ((ERRLOGMASK & type) || debug == 1)          \
+               pr_info(fmt, ##args);                   \
+} while (0)
 #else
-#define cdinfo(type, fmt, args...) 
+#define cdinfo(type, fmt, args...)                     \
+do {                                                   \
+       if (0 && (ERRLOGMASK & type) || debug == 1)     \
+               pr_info(fmt, ##args);                   \
+} while (0)
 #endif
 
 /* These are used to simplify getting data in from and back to user land */
@@ -395,7 +403,7 @@ int register_cdrom(struct cdrom_device_info *cdi)
        if (cdo->open == NULL || cdo->release == NULL)
                return -EINVAL;
        if (!banner_printed) {
-               printk(KERN_INFO "Uniform CD-ROM driver " REVISION "\n");
+               pr_info("Uniform CD-ROM driver " REVISION "\n");
                banner_printed = 1;
                cdrom_sysctl_register();
        }
@@ -546,7 +554,7 @@ static int cdrom_mrw_bgformat(struct cdrom_device_info *cdi, int cont)
        unsigned char buffer[12];
        int ret;
 
-       printk(KERN_INFO "cdrom: %sstarting format\n", cont ? "Re" : "");
+       pr_info("%sstarting format\n", cont ? "Re" : "");
 
        /*
         * FmtData bit set (bit 4), format type is 1
@@ -576,7 +584,7 @@ static int cdrom_mrw_bgformat(struct cdrom_device_info *cdi, int cont)
 
        ret = cdi->ops->generic_packet(cdi, &cgc);
        if (ret)
-               printk(KERN_INFO "cdrom: bgformat failed\n");
+               pr_info("bgformat failed\n");
 
        return ret;
 }
@@ -622,8 +630,7 @@ static int cdrom_mrw_exit(struct cdrom_device_info *cdi)
 
        ret = 0;
        if (di.mrw_status == CDM_MRW_BGFORMAT_ACTIVE) {
-               printk(KERN_INFO "cdrom: issuing MRW back ground "
-                               "format suspend\n");
+               pr_info("issuing MRW background format suspend\n");
                ret = cdrom_mrw_bgformat_susp(cdi, 0);
        }
 
@@ -658,7 +665,8 @@ static int cdrom_mrw_set_lba_space(struct cdrom_device_info *cdi, int space)
        if ((ret = cdrom_mode_select(cdi, &cgc)))
                return ret;
 
-       printk(KERN_INFO "cdrom: %s: mrw address space %s selected\n", cdi->name, mrw_address_space[space]);
+       pr_info("%s: mrw address space %s selected\n",
+               cdi->name, mrw_address_space[space]);
        return 0;
 }
 
@@ -762,7 +770,7 @@ static int cdrom_mrw_open_write(struct cdrom_device_info *cdi)
         * always reset to DMA lba space on open
         */
        if (cdrom_mrw_set_lba_space(cdi, MRW_LBA_DMA)) {
-               printk(KERN_ERR "cdrom: failed setting lba address space\n");
+               pr_err("failed setting lba address space\n");
                return 1;
        }
 
@@ -781,8 +789,7 @@ static int cdrom_mrw_open_write(struct cdrom_device_info *cdi)
         * 3    -       MRW formatting complete
         */
        ret = 0;
-       printk(KERN_INFO "cdrom open: mrw_status '%s'\n",
-                       mrw_format_status[di.mrw_status]);
+       pr_info("open: mrw_status '%s'\n", mrw_format_status[di.mrw_status]);
        if (!di.mrw_status)
                ret = 1;
        else if (di.mrw_status == CDM_MRW_BGFORMAT_INACTIVE &&
@@ -932,8 +939,7 @@ static void cdrom_dvd_rw_close_write(struct cdrom_device_info *cdi)
                return;
        }
 
-       printk(KERN_INFO "cdrom: %s: dirty DVD+RW media, \"finalizing\"\n",
-              cdi->name);
+       pr_info("%s: dirty DVD+RW media, \"finalizing\"\n", cdi->name);
 
        init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
        cgc.cmd[0] = GPCMD_FLUSH_CACHE;
@@ -2176,7 +2182,7 @@ retry:
         * frame dma, so drop to single frame dma if we need to
         */
        if (cdi->cdda_method == CDDA_BPC_FULL && nframes > 1) {
-               printk("cdrom: dropping to single frame dma\n");
+               pr_info("dropping to single frame dma\n");
                cdi->cdda_method = CDDA_BPC_SINGLE;
                goto retry;
        }
@@ -2189,7 +2195,7 @@ retry:
        if (cdi->last_sense != 0x04 && cdi->last_sense != 0x0b)
                return ret;
 
-       printk("cdrom: dropping to old style cdda (sense=%x)\n", cdi->last_sense);
+       pr_info("dropping to old style cdda (sense=%x)\n", cdi->last_sense);
        cdi->cdda_method = CDDA_OLD;
        return cdrom_read_cdda_old(cdi, ubuf, lba, nframes);    
 }
@@ -3401,7 +3407,7 @@ static int cdrom_print_info(const char *header, int val, char *info,
                                        "\t%d", CDROM_CAN(val) != 0);
                        break;
                default:
-                       printk(KERN_INFO "cdrom: invalid option%d\n", option);
+                       pr_info("invalid option%d\n", option);
                        return 1;
                }
                if (!ret)
@@ -3491,7 +3497,7 @@ doit:
        mutex_unlock(&cdrom_mutex);
        return proc_dostring(ctl, write, buffer, lenp, ppos);
 done:
-       printk(KERN_INFO "cdrom: info buffer too small\n");
+       pr_info("info buffer too small\n");
        goto doit;
 }
 
@@ -3665,7 +3671,7 @@ static int __init cdrom_init(void)
 
 static void __exit cdrom_exit(void)
 {
-       printk(KERN_INFO "Uniform CD-ROM driver unloaded\n");
+       pr_info("Uniform CD-ROM driver unloaded\n");
        cdrom_sysctl_unregister();
 }
 
index 03c71f7..261107d 100644 (file)
@@ -19,6 +19,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/fs.h>
@@ -32,6 +34,7 @@
 #include <linux/blkdev.h>
 #include <linux/interrupt.h>
 #include <linux/device.h>
+#include <linux/smp_lock.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
 #include <linux/platform_device.h>
@@ -339,8 +342,7 @@ static int gdrom_get_last_session(struct cdrom_device_info *cd_info,
                tocuse = 0;
                err = gdrom_readtoc_cmd(gd.toc, 0);
                if (err) {
-                       printk(KERN_INFO "GDROM: Could not get CD "
-                               "table of contents\n");
+                       pr_info("Could not get CD table of contents\n");
                        return -ENXIO;
                }
        }
@@ -357,8 +359,7 @@ static int gdrom_get_last_session(struct cdrom_device_info *cd_info,
        } while (track >= fentry);
 
        if ((track > 100) || (track < get_entry_track(gd.toc->first))) {
-               printk(KERN_INFO "GDROM: No data on the last "
-                       "session of the CD\n");
+               pr_info("No data on the last session of the CD\n");
                gdrom_getsense(NULL);
                return -ENXIO;
        }
@@ -451,14 +452,14 @@ static int gdrom_getsense(short *bufstring)
                goto cleanup_sense;
        insw(GDROM_DATA_REG, &sense, sense_command->buflen/2);
        if (sense[1] & 40) {
-               printk(KERN_INFO "GDROM: Drive not ready - command aborted\n");
+               pr_info("Drive not ready - command aborted\n");
                goto cleanup_sense;
        }
        sense_key = sense[1] & 0x0F;
        if (sense_key < ARRAY_SIZE(sense_texts))
-               printk(KERN_INFO "GDROM: %s\n", sense_texts[sense_key].text);
+               pr_info("%s\n", sense_texts[sense_key].text);
        else
-               printk(KERN_ERR "GDROM: Unknown sense key: %d\n", sense_key);
+               pr_err("Unknown sense key: %d\n", sense_key);
        if (bufstring) /* return addional sense data */
                memcpy(bufstring, &sense[4], 2);
        if (sense_key < 2)
@@ -492,12 +493,18 @@ static struct cdrom_device_ops gdrom_ops = {
 
 static int gdrom_bdops_open(struct block_device *bdev, fmode_t mode)
 {
-       return cdrom_open(gd.cd_info, bdev, mode);
+       int ret;
+       lock_kernel();
+       ret = cdrom_open(gd.cd_info, bdev, mode);
+       unlock_kernel();
+       return ret;
 }
 
 static int gdrom_bdops_release(struct gendisk *disk, fmode_t mode)
 {
+       lock_kernel();
        cdrom_release(gd.cd_info, mode);
+       unlock_kernel();
        return 0;
 }
 
@@ -509,7 +516,13 @@ static int gdrom_bdops_mediachanged(struct gendisk *disk)
 static int gdrom_bdops_ioctl(struct block_device *bdev, fmode_t mode,
        unsigned cmd, unsigned long arg)
 {
-       return cdrom_ioctl(gd.cd_info, bdev, mode, cmd, arg);
+       int ret;
+
+       lock_kernel();
+       ret = cdrom_ioctl(gd.cd_info, bdev, mode, cmd, arg);
+       unlock_kernel();
+
+       return ret;
 }
 
 static const struct block_device_operations gdrom_bdops = {
@@ -517,7 +530,7 @@ static const struct block_device_operations gdrom_bdops = {
        .open                   = gdrom_bdops_open,
        .release                = gdrom_bdops_release,
        .media_changed          = gdrom_bdops_mediachanged,
-       .locked_ioctl           = gdrom_bdops_ioctl,
+       .ioctl                  = gdrom_bdops_ioctl,
 };
 
 static irqreturn_t gdrom_command_interrupt(int irq, void *dev_id)
@@ -643,14 +656,13 @@ static void gdrom_request(struct request_queue *rq)
        struct request *req;
 
        while ((req = blk_fetch_request(rq)) != NULL) {
-               if (!blk_fs_request(req)) {
-                       printk(KERN_DEBUG "GDROM: Non-fs request ignored\n");
+               if (req->cmd_type != REQ_TYPE_FS) {
+                       printk(KERN_DEBUG "gdrom: Non-fs request ignored\n");
                        __blk_end_request_all(req, -EIO);
                        continue;
                }
                if (rq_data_dir(req) != READ) {
-                       printk(KERN_NOTICE "GDROM: Read only device -");
-                       printk(" write request ignored\n");
+                       pr_notice("Read only device - write request ignored\n");
                        __blk_end_request_all(req, -EIO);
                        continue;
                }
@@ -685,7 +697,7 @@ static int __devinit gdrom_outputversion(void)
        firmw_ver = kstrndup(id->firmver, 16, GFP_KERNEL);
        if (!firmw_ver)
                goto free_manuf_name;
-       printk(KERN_INFO "GDROM: %s from %s with firmware %s\n",
+       pr_info("%s from %s with firmware %s\n",
                model_name, manuf_name, firmw_ver);
        err = 0;
        kfree(firmw_ver);
@@ -757,7 +769,7 @@ static int __devinit probe_gdrom(struct platform_device *devptr)
        int err;
        /* Start the device */
        if (gdrom_execute_diagnostic() != 1) {
-               printk(KERN_WARNING "GDROM: ATA Probe for GDROM failed.\n");
+               pr_warning("ATA Probe for GDROM failed\n");
                return -ENODEV;
        }
        /* Print out firmware ID */
@@ -767,7 +779,7 @@ static int __devinit probe_gdrom(struct platform_device *devptr)
        gdrom_major = register_blkdev(0, GDROM_DEV_NAME);
        if (gdrom_major <= 0)
                return gdrom_major;
-       printk(KERN_INFO "GDROM: Registered with major number %d\n",
+       pr_info("Registered with major number %d\n",
                gdrom_major);
        /* Specify basic properties of drive */
        gd.cd_info = kzalloc(sizeof(struct cdrom_device_info), GFP_KERNEL);
@@ -818,7 +830,7 @@ probe_fail_no_disk:
        unregister_blkdev(gdrom_major, GDROM_DEV_NAME);
        gdrom_major = 0;
 probe_fail_no_mem:
-       printk(KERN_WARNING "GDROM: Probe failed - error is 0x%X\n", err);
+       pr_warning("Probe failed - error is 0x%X\n", err);
        return err;
 }
 
index 451cd70..56bf9f4 100644 (file)
@@ -31,6 +31,8 @@
  * the OS/400 partition.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/major.h>
 #include <linux/blkdev.h>
 #include <linux/cdrom.h>
@@ -40,6 +42,7 @@
 #include <linux/module.h>
 #include <linux/completion.h>
 #include <linux/proc_fs.h>
+#include <linux/smp_lock.h>
 #include <linux/seq_file.h>
 #include <linux/scatterlist.h>
 
@@ -53,9 +56,6 @@
 
 #define VIOCD_VERS "1.06"
 
-#define VIOCD_KERN_WARNING             KERN_WARNING "viocd: "
-#define VIOCD_KERN_INFO                        KERN_INFO "viocd: "
-
 /*
  * Should probably make this a module parameter....sigh
  */
@@ -154,13 +154,21 @@ static const struct file_operations proc_viocd_operations = {
 static int viocd_blk_open(struct block_device *bdev, fmode_t mode)
 {
        struct disk_info *di = bdev->bd_disk->private_data;
-       return cdrom_open(&di->viocd_info, bdev, mode);
+       int ret;
+
+       lock_kernel();
+       ret = cdrom_open(&di->viocd_info, bdev, mode);
+       unlock_kernel();
+
+       return ret;
 }
 
 static int viocd_blk_release(struct gendisk *disk, fmode_t mode)
 {
        struct disk_info *di = disk->private_data;
+       lock_kernel();
        cdrom_release(&di->viocd_info, mode);
+       unlock_kernel();
        return 0;
 }
 
@@ -168,7 +176,13 @@ static int viocd_blk_ioctl(struct block_device *bdev, fmode_t mode,
                unsigned cmd, unsigned long arg)
 {
        struct disk_info *di = bdev->bd_disk->private_data;
-       return cdrom_ioctl(&di->viocd_info, bdev, mode, cmd, arg);
+       int ret;
+
+       lock_kernel();
+       ret = cdrom_ioctl(&di->viocd_info, bdev, mode, cmd, arg);
+       unlock_kernel();
+
+       return ret;
 }
 
 static int viocd_blk_media_changed(struct gendisk *disk)
@@ -181,7 +195,7 @@ static const struct block_device_operations viocd_fops = {
        .owner =                THIS_MODULE,
        .open =                 viocd_blk_open,
        .release =              viocd_blk_release,
-       .locked_ioctl =         viocd_blk_ioctl,
+       .ioctl =                viocd_blk_ioctl,
        .media_changed =        viocd_blk_media_changed,
 };
 
@@ -202,9 +216,8 @@ static int viocd_open(struct cdrom_device_info *cdi, int purpose)
                        (u64)&we, VIOVERSION << 16, ((u64)device_no << 48),
                        0, 0, 0);
        if (hvrc != 0) {
-               printk(VIOCD_KERN_WARNING
-                               "bad rc on HvCallEvent_signalLpEventFast %d\n",
-                               (int)hvrc);
+               pr_warning("bad rc on HvCallEvent_signalLpEventFast %d\n",
+                          (int)hvrc);
                return -EIO;
        }
 
@@ -213,8 +226,8 @@ static int viocd_open(struct cdrom_device_info *cdi, int purpose)
        if (we.rc) {
                const struct vio_error_entry *err =
                        vio_lookup_rc(viocd_err_table, we.sub_result);
-               printk(VIOCD_KERN_WARNING "bad rc %d:0x%04X on open: %s\n",
-                               we.rc, we.sub_result, err->msg);
+               pr_warning("bad rc %d:0x%04X on open: %s\n",
+                          we.rc, we.sub_result, err->msg);
                return -err->errno;
        }
 
@@ -234,9 +247,8 @@ static void viocd_release(struct cdrom_device_info *cdi)
                        viopath_targetinst(viopath_hostLp), 0,
                        VIOVERSION << 16, ((u64)device_no << 48), 0, 0, 0);
        if (hvrc != 0)
-               printk(VIOCD_KERN_WARNING
-                               "bad rc on HvCallEvent_signalLpEventFast %d\n",
-                               (int)hvrc);
+               pr_warning("bad rc on HvCallEvent_signalLpEventFast %d\n",
+                          (int)hvrc);
 }
 
 /* Send a read or write request to OS/400 */
@@ -262,13 +274,12 @@ static int send_request(struct request *req)
 
        sg_init_table(&sg, 1);
         if (blk_rq_map_sg(req->q, req, &sg) == 0) {
-               printk(VIOCD_KERN_WARNING
-                               "error setting up scatter/gather list\n");
+               pr_warning("error setting up scatter/gather list\n");
                return -1;
        }
 
        if (dma_map_sg(diskinfo->dev, &sg, 1, direction) == 0) {
-               printk(VIOCD_KERN_WARNING "error allocating sg tce\n");
+               pr_warning("error allocating sg tce\n");
                return -1;
        }
        dmaaddr = sg_dma_address(&sg);
@@ -284,7 +295,7 @@ static int send_request(struct request *req)
                        ((u64)DEVICE_NR(diskinfo) << 48) | dmaaddr,
                        (u64)blk_rq_pos(req) * 512, len, 0);
        if (hvrc != HvLpEvent_Rc_Good) {
-               printk(VIOCD_KERN_WARNING "hv error on op %d\n", (int)hvrc);
+               pr_warning("hv error on op %d\n", (int)hvrc);
                return -1;
        }
 
@@ -298,11 +309,10 @@ static void do_viocd_request(struct request_queue *q)
        struct request *req;
 
        while ((rwreq == 0) && ((req = blk_fetch_request(q)) != NULL)) {
-               if (!blk_fs_request(req))
+               if (req->cmd_type != REQ_TYPE_FS)
                        __blk_end_request_all(req, -EIO);
                else if (send_request(req) < 0) {
-                       printk(VIOCD_KERN_WARNING
-                                       "unable to send message to OS/400!");
+                       pr_warning("unable to send message to OS/400!\n");
                        __blk_end_request_all(req, -EIO);
                } else
                        rwreq++;
@@ -327,8 +337,8 @@ static int viocd_media_changed(struct cdrom_device_info *cdi, int disc_nr)
                        (u64)&we, VIOVERSION << 16, ((u64)device_no << 48),
                        0, 0, 0);
        if (hvrc != 0) {
-               printk(VIOCD_KERN_WARNING "bad rc on HvCallEvent_signalLpEventFast %d\n",
-                               (int)hvrc);
+               pr_warning("bad rc on HvCallEvent_signalLpEventFast %d\n",
+                          (int)hvrc);
                return -EIO;
        }
 
@@ -338,9 +348,8 @@ static int viocd_media_changed(struct cdrom_device_info *cdi, int disc_nr)
        if (we.rc) {
                const struct vio_error_entry *err =
                        vio_lookup_rc(viocd_err_table, we.sub_result);
-               printk(VIOCD_KERN_WARNING
-                               "bad rc %d:0x%04X on check_change: %s; Assuming no change\n",
-                               we.rc, we.sub_result, err->msg);
+               pr_warning("bad rc %d:0x%04X on check_change: %s; Assuming no change\n",
+                          we.rc, we.sub_result, err->msg);
                return 0;
        }
 
@@ -367,8 +376,8 @@ static int viocd_lock_door(struct cdrom_device_info *cdi, int locking)
                        (u64)&we, VIOVERSION << 16,
                        (device_no << 48) | (flags << 32), 0, 0, 0);
        if (hvrc != 0) {
-               printk(VIOCD_KERN_WARNING "bad rc on HvCallEvent_signalLpEventFast %d\n",
-                               (int)hvrc);
+               pr_warning("bad rc on HvCallEvent_signalLpEventFast %d\n",
+                          (int)hvrc);
                return -EIO;
        }
 
@@ -455,8 +464,7 @@ static void vio_handle_cd_event(struct HvLpEvent *event)
                return;
        /* First, we should NEVER get an int here...only acks */
        if (hvlpevent_is_int(event)) {
-               printk(VIOCD_KERN_WARNING
-                               "Yikes! got an int in viocd event handler!\n");
+               pr_warning("Yikes! got an int in viocd event handler!\n");
                if (hvlpevent_need_ack(event)) {
                        event->xRc = HvLpEvent_Rc_InvalidSubtype;
                        HvCallEvent_ackLpEvent(event);
@@ -510,10 +518,9 @@ return_complete:
                        const struct vio_error_entry *err =
                                vio_lookup_rc(viocd_err_table,
                                                bevent->sub_result);
-                       printk(VIOCD_KERN_WARNING "request %p failed "
-                                       "with rc %d:0x%04X: %s\n",
-                                       req, event->xRc,
-                                       bevent->sub_result, err->msg);
+                       pr_warning("request %p failed with rc %d:0x%04X: %s\n",
+                                  req, event->xRc,
+                                  bevent->sub_result, err->msg);
                        __blk_end_request_all(req, -EIO);
                } else
                        __blk_end_request_all(req, 0);
@@ -524,9 +531,8 @@ return_complete:
                break;
 
        default:
-               printk(VIOCD_KERN_WARNING
-                               "message with invalid subtype %0x04X!\n",
-                               event->xSubtype & VIOMINOR_SUBTYPE_MASK);
+               pr_warning("message with invalid subtype %0x04X!\n",
+                          event->xSubtype & VIOMINOR_SUBTYPE_MASK);
                if (hvlpevent_need_ack(event)) {
                        event->xRc = HvLpEvent_Rc_InvalidSubtype;
                        HvCallEvent_ackLpEvent(event);
@@ -593,23 +599,19 @@ static int viocd_probe(struct vio_dev *vdev, const struct vio_device_id *id)
        sprintf(c->name, VIOCD_DEVICE "%c", 'a' + deviceno);
 
        if (register_cdrom(c) != 0) {
-               printk(VIOCD_KERN_WARNING "Cannot register viocd CD-ROM %s!\n",
-                               c->name);
+               pr_warning("Cannot register viocd CD-ROM %s!\n", c->name);
                goto out;
        }
-       printk(VIOCD_KERN_INFO "cd %s is iSeries resource %10.10s "
-                       "type %4.4s, model %3.3s\n",
-                       c->name, d->rsrcname, d->type, d->model);
+       pr_info("cd %s is iSeries resource %10.10s type %4.4s, model %3.3s\n",
+               c->name, d->rsrcname, d->type, d->model);
        q = blk_init_queue(do_viocd_request, &viocd_reqlock);
        if (q == NULL) {
-               printk(VIOCD_KERN_WARNING "Cannot allocate queue for %s!\n",
-                               c->name);
+               pr_warning("Cannot allocate queue for %s!\n", c->name);
                goto out_unregister_cdrom;
        }
        gendisk = alloc_disk(1);
        if (gendisk == NULL) {
-               printk(VIOCD_KERN_WARNING "Cannot create gendisk for %s!\n",
-                               c->name);
+               pr_warning("Cannot create gendisk for %s!\n", c->name);
                goto out_cleanup_queue;
        }
        gendisk->major = VIOCD_MAJOR;
@@ -682,21 +684,19 @@ static int __init viocd_init(void)
                        return -ENODEV;
        }
 
-       printk(VIOCD_KERN_INFO "vers " VIOCD_VERS ", hosting partition %d\n",
-                       viopath_hostLp);
+       pr_info("vers " VIOCD_VERS ", hosting partition %d\n", viopath_hostLp);
 
        if (register_blkdev(VIOCD_MAJOR, VIOCD_DEVICE) != 0) {
-               printk(VIOCD_KERN_WARNING "Unable to get major %d for %s\n",
-                               VIOCD_MAJOR, VIOCD_DEVICE);
+               pr_warning("Unable to get major %d for %s\n",
+                          VIOCD_MAJOR, VIOCD_DEVICE);
                return -EIO;
        }
 
        ret = viopath_open(viopath_hostLp, viomajorsubtype_cdio,
                        MAX_CD_REQ + 2);
        if (ret) {
-               printk(VIOCD_KERN_WARNING
-                               "error opening path to host partition %d\n",
-                               viopath_hostLp);
+               pr_warning("error opening path to host partition %d\n",
+                          viopath_hostLp);
                goto out_unregister;
        }
 
index f9daffd..e88a2cf 100644 (file)
@@ -190,7 +190,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
 
        BUG_ON(sense_len > sizeof(*sense));
 
-       if (blk_sense_request(rq) || drive->sense_rq_armed)
+       if (rq->cmd_type == REQ_TYPE_SENSE || drive->sense_rq_armed)
                return;
 
        memset(sense, 0, sizeof(*sense));
@@ -307,13 +307,16 @@ EXPORT_SYMBOL_GPL(ide_cd_expiry);
 
 int ide_cd_get_xferlen(struct request *rq)
 {
-       if (blk_fs_request(rq))
+       switch (rq->cmd_type) {
+       case REQ_TYPE_FS:
                return 32768;
-       else if (blk_sense_request(rq) || blk_pc_request(rq) ||
-                        rq->cmd_type == REQ_TYPE_ATA_PC)
+       case REQ_TYPE_SENSE:
+       case REQ_TYPE_BLOCK_PC:
+       case REQ_TYPE_ATA_PC:
                return blk_rq_bytes(rq);
-       else
+       default:
                return 0;
+       }
 }
 EXPORT_SYMBOL_GPL(ide_cd_get_xferlen);
 
@@ -474,12 +477,12 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
                if (uptodate == 0)
                        drive->failed_pc = NULL;
 
-               if (blk_special_request(rq)) {
+               if (rq->cmd_type == REQ_TYPE_SPECIAL) {
                        rq->errors = 0;
                        error = 0;
                } else {
 
-                       if (blk_fs_request(rq) == 0 && uptodate <= 0) {
+                       if (rq->cmd_type != REQ_TYPE_FS && uptodate <= 0) {
                                if (rq->errors == 0)
                                        rq->errors = -EIO;
                        }
index 2de76cc..31fc769 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/delay.h>
 #include <linux/timer.h>
 #include <linux/seq_file.h>
+#include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/errno.h>
@@ -176,7 +177,7 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive,
                        if (!sense->valid)
                                break;
                        if (failed_command == NULL ||
-                                       !blk_fs_request(failed_command))
+                           failed_command->cmd_type != REQ_TYPE_FS)
                                break;
                        sector = (sense->information[0] << 24) |
                                 (sense->information[1] << 16) |
@@ -292,7 +293,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
                                  "stat 0x%x",
                                  rq->cmd[0], rq->cmd_type, err, stat);
 
-       if (blk_sense_request(rq)) {
+       if (rq->cmd_type == REQ_TYPE_SENSE) {
                /*
                 * We got an error trying to get sense info from the drive
                 * (probably while trying to recover from a former error).
@@ -303,7 +304,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
        }
 
        /* if we have an error, pass CHECK_CONDITION as the SCSI status byte */
-       if (blk_pc_request(rq) && !rq->errors)
+       if (rq->cmd_type == REQ_TYPE_BLOCK_PC && !rq->errors)
                rq->errors = SAM_STAT_CHECK_CONDITION;
 
        if (blk_noretry_request(rq))
@@ -311,13 +312,14 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
 
        switch (sense_key) {
        case NOT_READY:
-               if (blk_fs_request(rq) && rq_data_dir(rq) == WRITE) {
+               if (rq->cmd_type == REQ_TYPE_FS && rq_data_dir(rq) == WRITE) {
                        if (ide_cd_breathe(drive, rq))
                                return 1;
                } else {
                        cdrom_saw_media_change(drive);
 
-                       if (blk_fs_request(rq) && !blk_rq_quiet(rq))
+                       if (rq->cmd_type == REQ_TYPE_FS &&
+                           !(rq->cmd_flags & REQ_QUIET))
                                printk(KERN_ERR PFX "%s: tray open\n",
                                        drive->name);
                }
@@ -326,7 +328,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
        case UNIT_ATTENTION:
                cdrom_saw_media_change(drive);
 
-               if (blk_fs_request(rq) == 0)
+               if (rq->cmd_type != REQ_TYPE_FS)
                        return 0;
 
                /*
@@ -352,7 +354,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
                 * No point in retrying after an illegal request or data
                 * protect error.
                 */
-               if (!blk_rq_quiet(rq))
+               if (!(rq->cmd_flags & REQ_QUIET))
                        ide_dump_status(drive, "command error", stat);
                do_end_request = 1;
                break;
@@ -361,20 +363,20 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
                 * No point in re-trying a zillion times on a bad sector.
                 * If we got here the error is not correctable.
                 */
-               if (!blk_rq_quiet(rq))
+               if (!(rq->cmd_flags & REQ_QUIET))
                        ide_dump_status(drive, "media error "
                                        "(bad sector)", stat);
                do_end_request = 1;
                break;
        case BLANK_CHECK:
                /* disk appears blank? */
-               if (!blk_rq_quiet(rq))
+               if (!(rq->cmd_flags & REQ_QUIET))
                        ide_dump_status(drive, "media error (blank)",
                                        stat);
                do_end_request = 1;
                break;
        default:
-               if (blk_fs_request(rq) == 0)
+               if (rq->cmd_type != REQ_TYPE_FS)
                        break;
                if (err & ~ATA_ABORTED) {
                        /* go to the default handler for other errors */
@@ -385,7 +387,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
                        do_end_request = 1;
        }
 
-       if (blk_fs_request(rq) == 0) {
+       if (rq->cmd_type != REQ_TYPE_FS) {
                rq->cmd_flags |= REQ_FAILED;
                do_end_request = 1;
        }
@@ -532,7 +534,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
        ide_expiry_t *expiry = NULL;
        int dma_error = 0, dma, thislen, uptodate = 0;
        int write = (rq_data_dir(rq) == WRITE) ? 1 : 0, rc = 0;
-       int sense = blk_sense_request(rq);
+       int sense = (rq->cmd_type == REQ_TYPE_SENSE);
        unsigned int timeout;
        u16 len;
        u8 ireason, stat;
@@ -575,7 +577,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
 
        ide_read_bcount_and_ireason(drive, &len, &ireason);
 
-       thislen = blk_fs_request(rq) ? len : cmd->nleft;
+       thislen = (rq->cmd_type == REQ_TYPE_FS) ? len : cmd->nleft;
        if (thislen > len)
                thislen = len;
 
@@ -584,7 +586,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
 
        /* If DRQ is clear, the command has completed. */
        if ((stat & ATA_DRQ) == 0) {
-               if (blk_fs_request(rq)) {
+               if (rq->cmd_type == REQ_TYPE_FS) {
                        /*
                         * If we're not done reading/writing, complain.
                         * Otherwise, complete the command normally.
@@ -598,7 +600,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
                                        rq->cmd_flags |= REQ_FAILED;
                                uptodate = 0;
                        }
-               } else if (!blk_pc_request(rq)) {
+               } else if (rq->cmd_type != REQ_TYPE_BLOCK_PC) {
                        ide_cd_request_sense_fixup(drive, cmd);
 
                        uptodate = cmd->nleft ? 0 : 1;
@@ -647,7 +649,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
 
        /* pad, if necessary */
        if (len > 0) {
-               if (blk_fs_request(rq) == 0 || write == 0)
+               if (rq->cmd_type != REQ_TYPE_FS || write == 0)
                        ide_pad_transfer(drive, write, len);
                else {
                        printk(KERN_ERR PFX "%s: confused, missing data\n",
@@ -656,11 +658,11 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
                }
        }
 
-       if (blk_pc_request(rq)) {
+       if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
                timeout = rq->timeout;
        } else {
                timeout = ATAPI_WAIT_PC;
-               if (!blk_fs_request(rq))
+               if (rq->cmd_type != REQ_TYPE_FS)
                        expiry = ide_cd_expiry;
        }
 
@@ -669,7 +671,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
        return ide_started;
 
 out_end:
-       if (blk_pc_request(rq) && rc == 0) {
+       if (rq->cmd_type == REQ_TYPE_BLOCK_PC && rc == 0) {
                rq->resid_len = 0;
                blk_end_request_all(rq, 0);
                hwif->rq = NULL;
@@ -677,7 +679,7 @@ out_end:
                if (sense && uptodate)
                        ide_cd_complete_failed_rq(drive, rq);
 
-               if (blk_fs_request(rq)) {
+               if (rq->cmd_type == REQ_TYPE_FS) {
                        if (cmd->nleft == 0)
                                uptodate = 1;
                } else {
@@ -690,7 +692,7 @@ out_end:
                                return ide_stopped;
 
                /* make sure it's fully ended */
-               if (blk_fs_request(rq) == 0) {
+               if (rq->cmd_type != REQ_TYPE_FS) {
                        rq->resid_len -= cmd->nbytes - cmd->nleft;
                        if (uptodate == 0 && (cmd->tf_flags & IDE_TFLAG_WRITE))
                                rq->resid_len += cmd->last_xfer_len;
@@ -750,7 +752,7 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
        ide_debug_log(IDE_DBG_PC, "rq->cmd[0]: 0x%x, rq->cmd_type: 0x%x",
                                  rq->cmd[0], rq->cmd_type);
 
-       if (blk_pc_request(rq))
+       if (rq->cmd_type == REQ_TYPE_BLOCK_PC)
                rq->cmd_flags |= REQ_QUIET;
        else
                rq->cmd_flags &= ~REQ_FAILED;
@@ -791,21 +793,26 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
        if (drive->debug_mask & IDE_DBG_RQ)
                blk_dump_rq_flags(rq, "ide_cd_do_request");
 
-       if (blk_fs_request(rq)) {
+       switch (rq->cmd_type) {
+       case REQ_TYPE_FS:
                if (cdrom_start_rw(drive, rq) == ide_stopped)
                        goto out_end;
-       } else if (blk_sense_request(rq) || blk_pc_request(rq) ||
-                  rq->cmd_type == REQ_TYPE_ATA_PC) {
+               break;
+       case REQ_TYPE_SENSE:
+       case REQ_TYPE_BLOCK_PC:
+       case REQ_TYPE_ATA_PC:
                if (!rq->timeout)
                        rq->timeout = ATAPI_WAIT_PC;
 
                cdrom_do_block_pc(drive, rq);
-       } else if (blk_special_request(rq)) {
+               break;
+       case REQ_TYPE_SPECIAL:
                /* right now this can only be a reset... */
                uptodate = 1;
                goto out_end;
-       } else
+       default:
                BUG();
+       }
 
        /* prepare sense request for this command */
        ide_prep_sense(drive, rq);
@@ -817,7 +824,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 
        cmd.rq = rq;
 
-       if (blk_fs_request(rq) || blk_rq_bytes(rq)) {
+       if (rq->cmd_type == REQ_TYPE_FS || blk_rq_bytes(rq)) {
                ide_init_sg_cmd(&cmd, blk_rq_bytes(rq));
                ide_map_sg(drive, &cmd);
        }
@@ -1373,9 +1380,9 @@ static int ide_cdrom_prep_pc(struct request *rq)
 
 static int ide_cdrom_prep_fn(struct request_queue *q, struct request *rq)
 {
-       if (blk_fs_request(rq))
+       if (rq->cmd_type == REQ_TYPE_FS)
                return ide_cdrom_prep_fs(q, rq);
-       else if (blk_pc_request(rq))
+       else if (rq->cmd_type == REQ_TYPE_BLOCK_PC)
                return ide_cdrom_prep_pc(rq);
 
        return 0;
@@ -1592,17 +1599,19 @@ static struct ide_driver ide_cdrom_driver = {
 
 static int idecd_open(struct block_device *bdev, fmode_t mode)
 {
-       struct cdrom_info *info = ide_cd_get(bdev->bd_disk);
-       int rc = -ENOMEM;
+       struct cdrom_info *info;
+       int rc = -ENXIO;
 
+       lock_kernel();
+       info = ide_cd_get(bdev->bd_disk);
        if (!info)
-               return -ENXIO;
+               goto out;
 
        rc = cdrom_open(&info->devinfo, bdev, mode);
-
        if (rc < 0)
                ide_cd_put(info);
-
+out:
+       unlock_kernel();
        return rc;
 }
 
@@ -1610,9 +1619,11 @@ static int idecd_release(struct gendisk *disk, fmode_t mode)
 {
        struct cdrom_info *info = ide_drv_g(disk, cdrom_info);
 
+       lock_kernel();
        cdrom_release(&info->devinfo, mode);
 
        ide_cd_put(info);
+       unlock_kernel();
 
        return 0;
 }
@@ -1656,7 +1667,7 @@ static int idecd_get_spindown(struct cdrom_device_info *cdi, unsigned long arg)
        return 0;
 }
 
-static int idecd_ioctl(struct block_device *bdev, fmode_t mode,
+static int idecd_locked_ioctl(struct block_device *bdev, fmode_t mode,
                        unsigned int cmd, unsigned long arg)
 {
        struct cdrom_info *info = ide_drv_g(bdev->bd_disk, cdrom_info);
@@ -1678,6 +1689,19 @@ static int idecd_ioctl(struct block_device *bdev, fmode_t mode,
        return err;
 }
 
+static int idecd_ioctl(struct block_device *bdev, fmode_t mode,
+                            unsigned int cmd, unsigned long arg)
+{
+       int ret;
+
+       lock_kernel();
+       ret = idecd_locked_ioctl(bdev, mode, cmd, arg);
+       unlock_kernel();
+
+       return ret;
+}
+
+
 static int idecd_media_changed(struct gendisk *disk)
 {
        struct cdrom_info *info = ide_drv_g(disk, cdrom_info);
@@ -1698,7 +1722,7 @@ static const struct block_device_operations idecd_ops = {
        .owner                  = THIS_MODULE,
        .open                   = idecd_open,
        .release                = idecd_release,
-       .locked_ioctl           = idecd_ioctl,
+       .ioctl                  = idecd_ioctl,
        .media_changed          = idecd_media_changed,
        .revalidate_disk        = idecd_revalidate_disk
 };
index 02712bf..766b3de 100644 (file)
@@ -454,7 +454,7 @@ int ide_cdrom_packet(struct cdrom_device_info *cdi,
           touch it at all. */
 
        if (cgc->data_direction == CGC_DATA_WRITE)
-               flags |= REQ_RW;
+               flags |= REQ_WRITE;
 
        if (cgc->sense)
                memset(cgc->sense, 0, sizeof(struct request_sense));
index 33d6503..7433e07 100644 (file)
@@ -184,7 +184,7 @@ static ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
        ide_hwif_t *hwif = drive->hwif;
 
        BUG_ON(drive->dev_flags & IDE_DFLAG_BLOCKED);
-       BUG_ON(!blk_fs_request(rq));
+       BUG_ON(rq->cmd_type != REQ_TYPE_FS);
 
        ledtrig_ide_activity();
 
@@ -427,10 +427,15 @@ static void ide_disk_unlock_native_capacity(ide_drive_t *drive)
                drive->dev_flags |= IDE_DFLAG_NOHPA; /* disable HPA on resume */
 }
 
-static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
+static int idedisk_prep_fn(struct request_queue *q, struct request *rq)
 {
        ide_drive_t *drive = q->queuedata;
-       struct ide_cmd *cmd = kmalloc(sizeof(*cmd), GFP_ATOMIC);
+       struct ide_cmd *cmd;
+
+       if (!(rq->cmd_flags & REQ_FLUSH))
+               return BLKPREP_OK;
+
+       cmd = kmalloc(sizeof(*cmd), GFP_ATOMIC);
 
        /* FIXME: map struct ide_taskfile on rq->cmd[] */
        BUG_ON(cmd == NULL);
@@ -448,6 +453,8 @@ static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
        rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
        rq->special = cmd;
        cmd->rq = rq;
+
+       return BLKPREP_OK;
 }
 
 ide_devset_get(multcount, mult_count);
@@ -513,7 +520,6 @@ static void update_ordered(ide_drive_t *drive)
 {
        u16 *id = drive->id;
        unsigned ordered = QUEUE_ORDERED_NONE;
-       prepare_flush_fn *prep_fn = NULL;
 
        if (drive->dev_flags & IDE_DFLAG_WCACHE) {
                unsigned long long capacity;
@@ -538,12 +544,12 @@ static void update_ordered(ide_drive_t *drive)
 
                if (barrier) {
                        ordered = QUEUE_ORDERED_DRAIN_FLUSH;
-                       prep_fn = idedisk_prepare_flush;
+                       blk_queue_prep_rq(drive->queue, idedisk_prep_fn);
                }
        } else
                ordered = QUEUE_ORDERED_DRAIN;
 
-       blk_queue_ordered(drive->queue, ordered, prep_fn);
+       blk_queue_ordered(drive->queue, ordered);
 }
 
 ide_devset_get_flag(wcache, IDE_DFLAG_WCACHE);
index 7b783dd..ec94c66 100644 (file)
@@ -1,6 +1,7 @@
 #include <linux/kernel.h>
 #include <linux/ide.h>
 #include <linux/hdreg.h>
+#include <linux/smp_lock.h>
 
 #include "ide-disk.h"
 
@@ -18,9 +19,13 @@ int ide_disk_ioctl(ide_drive_t *drive, struct block_device *bdev, fmode_t mode,
 {
        int err;
 
+       lock_kernel();
        err = ide_setting_ioctl(drive, bdev, cmd, arg, ide_disk_ioctl_settings);
        if (err != -EOPNOTSUPP)
-               return err;
+               goto out;
 
-       return generic_ide_ioctl(drive, bdev, cmd, arg);
+       err = generic_ide_ioctl(drive, bdev, cmd, arg);
+out:
+       unlock_kernel();
+       return err;
 }
index e9abf2c..c0aa93f 100644 (file)
@@ -122,7 +122,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
                return ide_stopped;
 
        /* retry only "normal" I/O: */
-       if (!blk_fs_request(rq)) {
+       if (rq->cmd_type != REQ_TYPE_FS) {
                if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
                        struct ide_cmd *cmd = rq->special;
 
@@ -146,7 +146,8 @@ static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
 {
        struct request *rq = drive->hwif->rq;
 
-       if (rq && blk_special_request(rq) && rq->cmd[0] == REQ_DRIVE_RESET) {
+       if (rq && rq->cmd_type == REQ_TYPE_SPECIAL &&
+           rq->cmd[0] == REQ_DRIVE_RESET) {
                if (err <= 0 && rq->errors == 0)
                        rq->errors = -EIO;
                ide_complete_rq(drive, err ? err : 0, blk_rq_bytes(rq));
index 4713bdc..5406b6e 100644 (file)
@@ -73,7 +73,7 @@ static int ide_floppy_callback(ide_drive_t *drive, int dsc)
                drive->failed_pc = NULL;
 
        if (pc->c[0] == GPCMD_READ_10 || pc->c[0] == GPCMD_WRITE_10 ||
-           (rq && blk_pc_request(rq)))
+           (rq && rq->cmd_type == REQ_TYPE_BLOCK_PC))
                uptodate = 1; /* FIXME */
        else if (pc->c[0] == GPCMD_REQUEST_SENSE) {
 
@@ -98,7 +98,7 @@ static int ide_floppy_callback(ide_drive_t *drive, int dsc)
                               "Aborting request!\n");
        }
 
-       if (blk_special_request(rq))
+       if (rq->cmd_type == REQ_TYPE_SPECIAL)
                rq->errors = uptodate ? 0 : IDE_DRV_ERROR_GENERAL;
 
        return uptodate;
@@ -207,7 +207,7 @@ static void idefloppy_create_rw_cmd(ide_drive_t *drive,
        memcpy(rq->cmd, pc->c, 12);
 
        pc->rq = rq;
-       if (rq->cmd_flags & REQ_RW)
+       if (rq->cmd_flags & REQ_WRITE)
                pc->flags |= PC_FLAG_WRITING;
 
        pc->flags |= PC_FLAG_DMA_OK;
@@ -247,14 +247,16 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
                } else
                        printk(KERN_ERR PFX "%s: I/O error\n", drive->name);
 
-               if (blk_special_request(rq)) {
+               if (rq->cmd_type == REQ_TYPE_SPECIAL) {
                        rq->errors = 0;
                        ide_complete_rq(drive, 0, blk_rq_bytes(rq));
                        return ide_stopped;
                } else
                        goto out_end;
        }
-       if (blk_fs_request(rq)) {
+
+       switch (rq->cmd_type) {
+       case REQ_TYPE_FS:
                if (((long)blk_rq_pos(rq) % floppy->bs_factor) ||
                    (blk_rq_sectors(rq) % floppy->bs_factor)) {
                        printk(KERN_ERR PFX "%s: unsupported r/w rq size\n",
@@ -263,13 +265,18 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
                }
                pc = &floppy->queued_pc;
                idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block);
-       } else if (blk_special_request(rq) || blk_sense_request(rq)) {
+               break;
+       case REQ_TYPE_SPECIAL:
+       case REQ_TYPE_SENSE:
                pc = (struct ide_atapi_pc *)rq->special;
-       } else if (blk_pc_request(rq)) {
+               break;
+       case REQ_TYPE_BLOCK_PC:
                pc = &floppy->queued_pc;
                idefloppy_blockpc_cmd(floppy, pc, rq);
-       } else
+               break;
+       default:
                BUG();
+       }
 
        ide_prep_sense(drive, rq);
 
@@ -280,7 +287,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
 
        cmd.rq = rq;
 
-       if (blk_fs_request(rq) || blk_rq_bytes(rq)) {
+       if (rq->cmd_type == REQ_TYPE_FS || blk_rq_bytes(rq)) {
                ide_init_sg_cmd(&cmd, blk_rq_bytes(rq));
                ide_map_sg(drive, &cmd);
        }
@@ -290,7 +297,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
        return ide_floppy_issue_pc(drive, &cmd, pc);
 out_end:
        drive->failed_pc = NULL;
-       if (blk_fs_request(rq) == 0 && rq->errors == 0)
+       if (rq->cmd_type != REQ_TYPE_FS && rq->errors == 0)
                rq->errors = -EIO;
        ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
        return ide_stopped;
index 9c22882..fd3d05a 100644 (file)
@@ -5,6 +5,7 @@
 #include <linux/kernel.h>
 #include <linux/ide.h>
 #include <linux/cdrom.h>
+#include <linux/smp_lock.h>
 
 #include <asm/unaligned.h>
 
@@ -275,12 +276,15 @@ int ide_floppy_ioctl(ide_drive_t *drive, struct block_device *bdev,
        void __user *argp = (void __user *)arg;
        int err;
 
-       if (cmd == CDROMEJECT || cmd == CDROM_LOCKDOOR)
-               return ide_floppy_lockdoor(drive, &pc, arg, cmd);
+       lock_kernel();
+       if (cmd == CDROMEJECT || cmd == CDROM_LOCKDOOR) {
+               err = ide_floppy_lockdoor(drive, &pc, arg, cmd);
+               goto out;
+       }
 
        err = ide_floppy_format_ioctl(drive, &pc, mode, cmd, argp);
        if (err != -ENOTTY)
-               return err;
+               goto out;
 
        /*
         * skip SCSI_IOCTL_SEND_COMMAND (deprecated)
@@ -293,5 +297,7 @@ int ide_floppy_ioctl(ide_drive_t *drive, struct block_device *bdev,
        if (err == -ENOTTY)
                err = generic_ide_ioctl(drive, bdev, cmd, arg);
 
+out:
+       unlock_kernel();
        return err;
 }
index 7939953..70aeeb1 100644 (file)
@@ -1,3 +1,4 @@
+#include <linux/smp_lock.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/string.h>
@@ -237,6 +238,18 @@ out_put_idkp:
        return ret;
 }
 
+static int ide_gd_unlocked_open(struct block_device *bdev, fmode_t mode)
+{
+       int ret;
+
+       lock_kernel();
+       ret = ide_gd_open(bdev, mode);
+       unlock_kernel();
+
+       return ret;
+}
+
+
 static int ide_gd_release(struct gendisk *disk, fmode_t mode)
 {
        struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
@@ -244,6 +257,7 @@ static int ide_gd_release(struct gendisk *disk, fmode_t mode)
 
        ide_debug_log(IDE_DBG_FUNC, "enter");
 
+       lock_kernel();
        if (idkp->openers == 1)
                drive->disk_ops->flush(drive);
 
@@ -255,6 +269,7 @@ static int ide_gd_release(struct gendisk *disk, fmode_t mode)
        idkp->openers--;
 
        ide_disk_put(idkp);
+       unlock_kernel();
 
        return 0;
 }
@@ -321,9 +336,9 @@ static int ide_gd_ioctl(struct block_device *bdev, fmode_t mode,
 
 static const struct block_device_operations ide_gd_ops = {
        .owner                  = THIS_MODULE,
-       .open                   = ide_gd_open,
+       .open                   = ide_gd_unlocked_open,
        .release                = ide_gd_release,
-       .locked_ioctl           = ide_gd_ioctl,
+       .ioctl                  = ide_gd_ioctl,
        .getgeo                 = ide_gd_getgeo,
        .media_changed          = ide_gd_media_changed,
        .unlock_native_capacity = ide_gd_unlock_native_capacity,
index 172ac92..a381be8 100644 (file)
@@ -135,7 +135,7 @@ EXPORT_SYMBOL(ide_complete_rq);
 
 void ide_kill_rq(ide_drive_t *drive, struct request *rq)
 {
-       u8 drv_req = blk_special_request(rq) && rq->rq_disk;
+       u8 drv_req = (rq->cmd_type == REQ_TYPE_SPECIAL) && rq->rq_disk;
        u8 media = drive->media;
 
        drive->failed_pc = NULL;
@@ -145,7 +145,7 @@ void ide_kill_rq(ide_drive_t *drive, struct request *rq)
        } else {
                if (media == ide_tape)
                        rq->errors = IDE_DRV_ERROR_GENERAL;
-               else if (blk_fs_request(rq) == 0 && rq->errors == 0)
+               else if (rq->cmd_type != REQ_TYPE_FS && rq->errors == 0)
                        rq->errors = -EIO;
        }
 
@@ -307,7 +307,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
 {
        ide_startstop_t startstop;
 
-       BUG_ON(!blk_rq_started(rq));
+       BUG_ON(!(rq->cmd_flags & REQ_STARTED));
 
 #ifdef DEBUG
        printk("%s: start_request: current=0x%08lx\n",
@@ -353,7 +353,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
                            pm->pm_step == IDE_PM_COMPLETED)
                                ide_complete_pm_rq(drive, rq);
                        return startstop;
-               } else if (!rq->rq_disk && blk_special_request(rq))
+               } else if (!rq->rq_disk && rq->cmd_type == REQ_TYPE_SPECIAL)
                        /*
                         * TODO: Once all ULDs have been modified to
                         * check for specific op codes rather than
index 1c08311..9240609 100644 (file)
@@ -191,10 +191,10 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
 
 #ifdef DEBUG_PM
        printk("%s: completing PM request, %s\n", drive->name,
-              blk_pm_suspend_request(rq) ? "suspend" : "resume");
+              (rq->cmd_type == REQ_TYPE_PM_SUSPEND) ? "suspend" : "resume");
 #endif
        spin_lock_irqsave(q->queue_lock, flags);
-       if (blk_pm_suspend_request(rq))
+       if (rq->cmd_type == REQ_TYPE_PM_SUSPEND)
                blk_stop_queue(q);
        else
                drive->dev_flags &= ~IDE_DFLAG_BLOCKED;
@@ -210,11 +210,11 @@ void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
 {
        struct request_pm_state *pm = rq->special;
 
-       if (blk_pm_suspend_request(rq) &&
+       if (rq->cmd_type == REQ_TYPE_PM_SUSPEND &&
            pm->pm_step == IDE_PM_START_SUSPEND)
                /* Mark drive blocked when starting the suspend sequence. */
                drive->dev_flags |= IDE_DFLAG_BLOCKED;
-       else if (blk_pm_resume_request(rq) &&
+       else if (rq->cmd_type == REQ_TYPE_PM_RESUME &&
                 pm->pm_step == IDE_PM_START_RESUME) {
                /*
                 * The first thing we do on wakeup is to wait for BSY bit to
index b072328..6d622cb 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/errno.h>
 #include <linux/genhd.h>
 #include <linux/seq_file.h>
+#include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
 #include <linux/ide.h>
@@ -577,7 +578,8 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
                      rq->cmd[0], (unsigned long long)blk_rq_pos(rq),
                      blk_rq_sectors(rq));
 
-       BUG_ON(!(blk_special_request(rq) || blk_sense_request(rq)));
+       BUG_ON(!(rq->cmd_type == REQ_TYPE_SPECIAL ||
+                rq->cmd_type == REQ_TYPE_SENSE));
 
        /* Retry a failed packet command */
        if (drive->failed_pc && drive->pc->c[0] == REQUEST_SENSE) {
@@ -1905,7 +1907,11 @@ static const struct file_operations idetape_fops = {
 
 static int idetape_open(struct block_device *bdev, fmode_t mode)
 {
-       struct ide_tape_obj *tape = ide_tape_get(bdev->bd_disk, false, 0);
+       struct ide_tape_obj *tape;
+
+       lock_kernel();
+       tape = ide_tape_get(bdev->bd_disk, false, 0);
+       unlock_kernel();
 
        if (!tape)
                return -ENXIO;
@@ -1917,7 +1923,10 @@ static int idetape_release(struct gendisk *disk, fmode_t mode)
 {
        struct ide_tape_obj *tape = ide_drv_g(disk, ide_tape_obj);
 
+       lock_kernel();
        ide_tape_put(tape);
+       unlock_kernel();
+
        return 0;
 }
 
@@ -1926,9 +1935,14 @@ static int idetape_ioctl(struct block_device *bdev, fmode_t mode,
 {
        struct ide_tape_obj *tape = ide_drv_g(bdev->bd_disk, ide_tape_obj);
        ide_drive_t *drive = tape->drive;
-       int err = generic_ide_ioctl(drive, bdev, cmd, arg);
+       int err;
+
+       lock_kernel();
+       err = generic_ide_ioctl(drive, bdev, cmd, arg);
        if (err == -EINVAL)
                err = idetape_blkdev_ioctl(drive, cmd, arg);
+       unlock_kernel();
+
        return err;
 }
 
@@ -1936,7 +1950,7 @@ static const struct block_device_operations idetape_block_ops = {
        .owner          = THIS_MODULE,
        .open           = idetape_open,
        .release        = idetape_release,
-       .locked_ioctl   = idetape_ioctl,
+       .ioctl          = idetape_ioctl,
 };
 
 static int ide_tape_probe(ide_drive_t *drive)
index 10f457c..0590c75 100644 (file)
@@ -356,7 +356,7 @@ static void dispatch_io(int rw, unsigned int num_regions,
        BUG_ON(num_regions > DM_IO_MAX_REGIONS);
 
        if (sync)
-               rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
+               rw |= REQ_SYNC | REQ_UNPLUG;
 
        /*
         * For multiple regions we need to be careful to rewind
@@ -364,7 +364,7 @@ static void dispatch_io(int rw, unsigned int num_regions,
         */
        for (i = 0; i < num_regions; i++) {
                *dp = old_pages;
-               if (where[i].count || (rw & (1 << BIO_RW_BARRIER)))
+               if (where[i].count || (rw & REQ_HARDBARRIER))
                        do_region(rw, i, where + i, dp, io);
        }
 
@@ -412,8 +412,8 @@ retry:
        }
        set_current_state(TASK_RUNNING);
 
-       if (io->eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) {
-               rw &= ~(1 << BIO_RW_BARRIER);
+       if (io->eopnotsupp_bits && (rw & REQ_HARDBARRIER)) {
+               rw &= ~REQ_HARDBARRIER;
                goto retry;
        }
 
@@ -479,8 +479,8 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp)
  * New collapsed (a)synchronous interface.
  *
  * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug
- * the queue with blk_unplug() some time later or set the BIO_RW_SYNC bit in
- * io_req->bi_rw. If you fail to do one of these, the IO will be submitted to
+ * the queue with blk_unplug() some time later or set REQ_SYNC in
+io_req->bi_rw. If you fail to do one of these, the IO will be submitted to
  * the disk after q->unplug_delay, which defaults to 3ms in blk-settings.c.
  */
 int dm_io(struct dm_io_request *io_req, unsigned num_regions,
index addf834..d8587ba 100644 (file)
@@ -345,7 +345,7 @@ static int run_io_job(struct kcopyd_job *job)
 {
        int r;
        struct dm_io_request io_req = {
-               .bi_rw = job->rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG),
+               .bi_rw = job->rw | REQ_SYNC | REQ_UNPLUG,
                .mem.type = DM_IO_PAGE_LIST,
                .mem.ptr.pl = job->pages,
                .mem.offset = job->offset,
index ddda531..7413626 100644 (file)
@@ -1211,7 +1211,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
        if (error == -EOPNOTSUPP)
                goto out;
 
-       if ((error == -EWOULDBLOCK) && bio_rw_flagged(bio, BIO_RW_AHEAD))
+       if ((error == -EWOULDBLOCK) && (bio->bi_rw & REQ_RAHEAD))
                goto out;
 
        if (unlikely(error)) {
index e610725..d6e28d7 100644 (file)
@@ -284,7 +284,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio,
        if (!error)
                return 0; /* I/O complete */
 
-       if ((error == -EWOULDBLOCK) && bio_rw_flagged(bio, BIO_RW_AHEAD))
+       if ((error == -EWOULDBLOCK) && (bio->bi_rw & REQ_RAHEAD))
                return error;
 
        if (error == -EOPNOTSUPP)
index d21e128..a3f21dc 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/blkpg.h>
 #include <linux/bio.h>
 #include <linux/buffer_head.h>
+#include <linux/smp_lock.h>
 #include <linux/mempool.h>
 #include <linux/slab.h>
 #include <linux/idr.h>
@@ -338,6 +339,7 @@ static int dm_blk_open(struct block_device *bdev, fmode_t mode)
 {
        struct mapped_device *md;
 
+       lock_kernel();
        spin_lock(&_minor_lock);
 
        md = bdev->bd_disk->private_data;
@@ -355,6 +357,7 @@ static int dm_blk_open(struct block_device *bdev, fmode_t mode)
 
 out:
        spin_unlock(&_minor_lock);
+       unlock_kernel();
 
        return md ? 0 : -ENXIO;
 }
@@ -362,8 +365,12 @@ out:
 static int dm_blk_close(struct gendisk *disk, fmode_t mode)
 {
        struct mapped_device *md = disk->private_data;
+
+       lock_kernel();
        atomic_dec(&md->open_count);
        dm_put(md);
+       unlock_kernel();
+
        return 0;
 }
 
@@ -614,7 +621,7 @@ static void dec_pending(struct dm_io *io, int error)
                         */
                        spin_lock_irqsave(&md->deferred_lock, flags);
                        if (__noflush_suspending(md)) {
-                               if (!bio_rw_flagged(io->bio, BIO_RW_BARRIER))
+                               if (!(io->bio->bi_rw & REQ_HARDBARRIER))
                                        bio_list_add_head(&md->deferred,
                                                          io->bio);
                        } else
@@ -626,7 +633,7 @@ static void dec_pending(struct dm_io *io, int error)
                io_error = io->error;
                bio = io->bio;
 
-               if (bio_rw_flagged(bio, BIO_RW_BARRIER)) {
+               if (bio->bi_rw & REQ_HARDBARRIER) {
                        /*
                         * There can be just one barrier request so we use
                         * a per-device variable for error reporting.
@@ -792,12 +799,12 @@ static void dm_end_request(struct request *clone, int error)
 {
        int rw = rq_data_dir(clone);
        int run_queue = 1;
-       bool is_barrier = blk_barrier_rq(clone);
+       bool is_barrier = clone->cmd_flags & REQ_HARDBARRIER;
        struct dm_rq_target_io *tio = clone->end_io_data;
        struct mapped_device *md = tio->md;
        struct request *rq = tio->orig;
 
-       if (blk_pc_request(rq) && !is_barrier) {
+       if (rq->cmd_type == REQ_TYPE_BLOCK_PC && !is_barrier) {
                rq->errors = clone->errors;
                rq->resid_len = clone->resid_len;
 
@@ -844,7 +851,7 @@ void dm_requeue_unmapped_request(struct request *clone)
        struct request_queue *q = rq->q;
        unsigned long flags;
 
-       if (unlikely(blk_barrier_rq(clone))) {
+       if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
                /*
                 * Barrier clones share an original request.
                 * Leave it to dm_end_request(), which handles this special
@@ -943,7 +950,7 @@ static void dm_complete_request(struct request *clone, int error)
        struct dm_rq_target_io *tio = clone->end_io_data;
        struct request *rq = tio->orig;
 
-       if (unlikely(blk_barrier_rq(clone))) {
+       if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
                /*
                 * Barrier clones share an original request.  So can't use
                 * softirq_done with the original.
@@ -972,7 +979,7 @@ void dm_kill_unmapped_request(struct request *clone, int error)
        struct dm_rq_target_io *tio = clone->end_io_data;
        struct request *rq = tio->orig;
 
-       if (unlikely(blk_barrier_rq(clone))) {
+       if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
                /*
                 * Barrier clones share an original request.
                 * Leave it to dm_end_request(), which handles this special
@@ -1106,7 +1113,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector,
 
        clone->bi_sector = sector;
        clone->bi_bdev = bio->bi_bdev;
-       clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER);
+       clone->bi_rw = bio->bi_rw & ~REQ_HARDBARRIER;
        clone->bi_vcnt = 1;
        clone->bi_size = to_bytes(len);
        clone->bi_io_vec->bv_offset = offset;
@@ -1133,7 +1140,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
 
        clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
        __bio_clone(clone, bio);
-       clone->bi_rw &= ~(1 << BIO_RW_BARRIER);
+       clone->bi_rw &= ~REQ_HARDBARRIER;
        clone->bi_destructor = dm_bio_destructor;
        clone->bi_sector = sector;
        clone->bi_idx = idx;
@@ -1301,7 +1308,7 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
 
        ci.map = dm_get_live_table(md);
        if (unlikely(!ci.map)) {
-               if (!bio_rw_flagged(bio, BIO_RW_BARRIER))
+               if (!(bio->bi_rw & REQ_HARDBARRIER))
                        bio_io_error(bio);
                else
                        if (!md->barrier_error)
@@ -1414,7 +1421,7 @@ static int _dm_request(struct request_queue *q, struct bio *bio)
         * we have to queue this io for later.
         */
        if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
-           unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
+           unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
                up_read(&md->io_lock);
 
                if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) &&
@@ -1455,20 +1462,9 @@ static int dm_request(struct request_queue *q, struct bio *bio)
        return _dm_request(q, bio);
 }
 
-/*
- * Mark this request as flush request, so that dm_request_fn() can
- * recognize.
- */
-static void dm_rq_prepare_flush(struct request_queue *q, struct request *rq)
-{
-       rq->cmd_type = REQ_TYPE_LINUX_BLOCK;
-       rq->cmd[0] = REQ_LB_OP_FLUSH;
-}
-
 static bool dm_rq_is_flush_request(struct request *rq)
 {
-       if (rq->cmd_type == REQ_TYPE_LINUX_BLOCK &&
-           rq->cmd[0] == REQ_LB_OP_FLUSH)
+       if (rq->cmd_flags & REQ_FLUSH)
                return true;
        else
                return false;
@@ -1912,8 +1908,7 @@ static struct mapped_device *alloc_dev(int minor)
        blk_queue_softirq_done(md->queue, dm_softirq_done);
        blk_queue_prep_rq(md->queue, dm_prep_fn);
        blk_queue_lld_busy(md->queue, dm_lld_busy);
-       blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH,
-                         dm_rq_prepare_flush);
+       blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH);
 
        md->disk = alloc_disk(1);
        if (!md->disk)
@@ -2296,7 +2291,7 @@ static void dm_wq_work(struct work_struct *work)
                if (dm_request_based(md))
                        generic_make_request(c);
                else {
-                       if (bio_rw_flagged(c, BIO_RW_BARRIER))
+                       if (c->bi_rw & REQ_HARDBARRIER)
                                process_barrier(md, c);
                        else
                                __split_and_process_bio(md, c);
index 7e0e057..ba19060 100644 (file)
@@ -294,7 +294,7 @@ static int linear_make_request (mddev_t *mddev, struct bio *bio)
        dev_info_t *tmp_dev;
        sector_t start_sector;
 
-       if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
+       if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
                md_barrier_request(mddev, bio);
                return 0;
        }
index cb20d0b..700c96e 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/blkdev.h>
 #include <linux/sysctl.h>
 #include <linux/seq_file.h>
+#include <linux/smp_lock.h>
 #include <linux/buffer_head.h> /* for invalidate_bdev */
 #include <linux/poll.h>
 #include <linux/ctype.h>
@@ -353,7 +354,7 @@ static void md_submit_barrier(struct work_struct *ws)
                /* an empty barrier - all done */
                bio_endio(bio, 0);
        else {
-               bio->bi_rw &= ~(1<<BIO_RW_BARRIER);
+               bio->bi_rw &= ~REQ_HARDBARRIER;
                if (mddev->pers->make_request(mddev, bio))
                        generic_make_request(bio);
                mddev->barrier = POST_REQUEST_BARRIER;
@@ -675,11 +676,11 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
         * if zero is reached.
         * If an error occurred, call md_error
         *
-        * As we might need to resubmit the request if BIO_RW_BARRIER
+        * As we might need to resubmit the request if REQ_HARDBARRIER
         * causes ENOTSUPP, we allocate a spare bio...
         */
        struct bio *bio = bio_alloc(GFP_NOIO, 1);
-       int rw = (1<<BIO_RW) | (1<<BIO_RW_SYNCIO) | (1<<BIO_RW_UNPLUG);
+       int rw = REQ_WRITE | REQ_SYNC | REQ_UNPLUG;
 
        bio->bi_bdev = rdev->bdev;
        bio->bi_sector = sector;
@@ -691,7 +692,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
        atomic_inc(&mddev->pending_writes);
        if (!test_bit(BarriersNotsupp, &rdev->flags)) {
                struct bio *rbio;
-               rw |= (1<<BIO_RW_BARRIER);
+               rw |= REQ_HARDBARRIER;
                rbio = bio_clone(bio, GFP_NOIO);
                rbio->bi_private = bio;
                rbio->bi_end_io = super_written_barrier;
@@ -736,7 +737,7 @@ int sync_page_io(struct block_device *bdev, sector_t sector, int size,
        struct completion event;
        int ret;
 
-       rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
+       rw |= REQ_SYNC | REQ_UNPLUG;
 
        bio->bi_bdev = bdev;
        bio->bi_sector = sector;
@@ -5902,6 +5903,7 @@ static int md_open(struct block_device *bdev, fmode_t mode)
        mddev_t *mddev = mddev_find(bdev->bd_dev);
        int err;
 
+       lock_kernel();
        if (mddev->gendisk != bdev->bd_disk) {
                /* we are racing with mddev_put which is discarding this
                 * bd_disk.
@@ -5910,6 +5912,7 @@ static int md_open(struct block_device *bdev, fmode_t mode)
                /* Wait until bdev->bd_disk is definitely gone */
                flush_scheduled_work();
                /* Then retry the open from the top */
+               unlock_kernel();
                return -ERESTARTSYS;
        }
        BUG_ON(mddev != bdev->bd_disk->private_data);
@@ -5923,6 +5926,7 @@ static int md_open(struct block_device *bdev, fmode_t mode)
 
        check_disk_size_change(mddev->gendisk, bdev);
  out:
+       unlock_kernel();
        return err;
 }
 
@@ -5931,8 +5935,10 @@ static int md_release(struct gendisk *disk, fmode_t mode)
        mddev_t *mddev = disk->private_data;
 
        BUG_ON(!mddev);
+       lock_kernel();
        atomic_dec(&mddev->openers);
        mddev_put(mddev);
+       unlock_kernel();
 
        return 0;
 }
index 10597bf..fc56e0f 100644 (file)
@@ -67,7 +67,7 @@ struct mdk_rdev_s
 #define        Faulty          1               /* device is known to have a fault */
 #define        In_sync         2               /* device is in_sync with rest of array */
 #define        WriteMostly     4               /* Avoid reading if at all possible */
-#define        BarriersNotsupp 5               /* BIO_RW_BARRIER is not supported */
+#define        BarriersNotsupp 5               /* REQ_HARDBARRIER is not supported */
 #define        AllReserved     6               /* If whole device is reserved for
                                         * one array */
 #define        AutoDetected    7               /* added by auto-detect */
@@ -254,7 +254,7 @@ struct mddev_s
                                                         * fails.  Only supported
                                                         */
        struct bio                      *biolist;       /* bios that need to be retried
-                                                        * because BIO_RW_BARRIER is not supported
+                                                        * because REQ_HARDBARRIER is not supported
                                                         */
 
        atomic_t                        recovery_active; /* blocks scheduled, but not written */
index 410fb60..0307d21 100644 (file)
@@ -91,7 +91,7 @@ static void multipath_end_request(struct bio *bio, int error)
 
        if (uptodate)
                multipath_end_bh_io(mp_bh, 0);
-       else if (!bio_rw_flagged(bio, BIO_RW_AHEAD)) {
+       else if (!(bio->bi_rw & REQ_RAHEAD)) {
                /*
                 * oops, IO error:
                 */
@@ -142,7 +142,7 @@ static int multipath_make_request(mddev_t *mddev, struct bio * bio)
        struct multipath_bh * mp_bh;
        struct multipath_info *multipath;
 
-       if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
+       if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
                md_barrier_request(mddev, bio);
                return 0;
        }
@@ -163,7 +163,7 @@ static int multipath_make_request(mddev_t *mddev, struct bio * bio)
        mp_bh->bio = *bio;
        mp_bh->bio.bi_sector += multipath->rdev->data_offset;
        mp_bh->bio.bi_bdev = multipath->rdev->bdev;
-       mp_bh->bio.bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT);
+       mp_bh->bio.bi_rw |= REQ_FAILFAST_TRANSPORT;
        mp_bh->bio.bi_end_io = multipath_end_request;
        mp_bh->bio.bi_private = mp_bh;
        generic_make_request(&mp_bh->bio);
@@ -398,7 +398,7 @@ static void multipathd (mddev_t *mddev)
                        *bio = *(mp_bh->master_bio);
                        bio->bi_sector += conf->multipaths[mp_bh->path].rdev->data_offset;
                        bio->bi_bdev = conf->multipaths[mp_bh->path].rdev->bdev;
-                       bio->bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT);
+                       bio->bi_rw |= REQ_FAILFAST_TRANSPORT;
                        bio->bi_end_io = multipath_end_request;
                        bio->bi_private = mp_bh;
                        generic_make_request(bio);
index 563abed..6f7af46 100644 (file)
@@ -483,7 +483,7 @@ static int raid0_make_request(mddev_t *mddev, struct bio *bio)
        struct strip_zone *zone;
        mdk_rdev_t *tmp_dev;
 
-       if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
+       if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
                md_barrier_request(mddev, bio);
                return 0;
        }
index a948da8..73cc74f 100644 (file)
@@ -787,7 +787,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
        struct bio_list bl;
        struct page **behind_pages = NULL;
        const int rw = bio_data_dir(bio);
-       const bool do_sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
+       const bool do_sync = (bio->bi_rw & REQ_SYNC);
        bool do_barriers;
        mdk_rdev_t *blocked_rdev;
 
@@ -822,7 +822,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
                finish_wait(&conf->wait_barrier, &w);
        }
        if (unlikely(!mddev->barriers_work &&
-                    bio_rw_flagged(bio, BIO_RW_BARRIER))) {
+                    (bio->bi_rw & REQ_HARDBARRIER))) {
                if (rw == WRITE)
                        md_write_end(mddev);
                bio_endio(bio, -EOPNOTSUPP);
@@ -877,7 +877,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
                read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset;
                read_bio->bi_bdev = mirror->rdev->bdev;
                read_bio->bi_end_io = raid1_end_read_request;
-               read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
+               read_bio->bi_rw = READ | do_sync;
                read_bio->bi_private = r1_bio;
 
                generic_make_request(read_bio);
@@ -959,7 +959,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
        atomic_set(&r1_bio->remaining, 0);
        atomic_set(&r1_bio->behind_remaining, 0);
 
-       do_barriers = bio_rw_flagged(bio, BIO_RW_BARRIER);
+       do_barriers = bio->bi_rw & REQ_HARDBARRIER;
        if (do_barriers)
                set_bit(R1BIO_Barrier, &r1_bio->state);
 
@@ -975,8 +975,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
                mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset;
                mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
                mbio->bi_end_io = raid1_end_write_request;
-               mbio->bi_rw = WRITE | (do_barriers << BIO_RW_BARRIER) |
-                       (do_sync << BIO_RW_SYNCIO);
+               mbio->bi_rw = WRITE | do_barriers | do_sync;
                mbio->bi_private = r1_bio;
 
                if (behind_pages) {
@@ -1633,7 +1632,7 @@ static void raid1d(mddev_t *mddev)
                        sync_request_write(mddev, r1_bio);
                        unplug = 1;
                } else if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) {
-                       /* some requests in the r1bio were BIO_RW_BARRIER
+                       /* some requests in the r1bio were REQ_HARDBARRIER
                         * requests which failed with -EOPNOTSUPP.  Hohumm..
                         * Better resubmit without the barrier.
                         * We know which devices to resubmit for, because
@@ -1641,7 +1640,7 @@ static void raid1d(mddev_t *mddev)
                         * We already have a nr_pending reference on these rdevs.
                         */
                        int i;
-                       const bool do_sync = bio_rw_flagged(r1_bio->master_bio, BIO_RW_SYNCIO);
+                       const bool do_sync = (r1_bio->master_bio->bi_rw & REQ_SYNC);
                        clear_bit(R1BIO_BarrierRetry, &r1_bio->state);
                        clear_bit(R1BIO_Barrier, &r1_bio->state);
                        for (i=0; i < conf->raid_disks; i++)
@@ -1662,8 +1661,7 @@ static void raid1d(mddev_t *mddev)
                                                conf->mirrors[i].rdev->data_offset;
                                        bio->bi_bdev = conf->mirrors[i].rdev->bdev;
                                        bio->bi_end_io = raid1_end_write_request;
-                                       bio->bi_rw = WRITE |
-                                               (do_sync << BIO_RW_SYNCIO);
+                                       bio->bi_rw = WRITE | do_sync;
                                        bio->bi_private = r1_bio;
                                        r1_bio->bios[i] = bio;
                                        generic_make_request(bio);
@@ -1698,7 +1696,7 @@ static void raid1d(mddev_t *mddev)
                                       (unsigned long long)r1_bio->sector);
                                raid_end_bio_io(r1_bio);
                        } else {
-                               const bool do_sync = bio_rw_flagged(r1_bio->master_bio, BIO_RW_SYNCIO);
+                               const bool do_sync = r1_bio->master_bio->bi_rw & REQ_SYNC;
                                r1_bio->bios[r1_bio->read_disk] =
                                        mddev->ro ? IO_BLOCKED : NULL;
                                r1_bio->read_disk = disk;
@@ -1715,7 +1713,7 @@ static void raid1d(mddev_t *mddev)
                                bio->bi_sector = r1_bio->sector + rdev->data_offset;
                                bio->bi_bdev = rdev->bdev;
                                bio->bi_end_io = raid1_end_read_request;
-                               bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
+                               bio->bi_rw = READ | do_sync;
                                bio->bi_private = r1_bio;
                                unplug = 1;
                                generic_make_request(bio);
index 42e64e4..62ecb66 100644 (file)
@@ -799,12 +799,12 @@ static int make_request(mddev_t *mddev, struct bio * bio)
        int i;
        int chunk_sects = conf->chunk_mask + 1;
        const int rw = bio_data_dir(bio);
-       const bool do_sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
+       const bool do_sync = (bio->bi_rw & REQ_SYNC);
        struct bio_list bl;
        unsigned long flags;
        mdk_rdev_t *blocked_rdev;
 
-       if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
+       if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
                md_barrier_request(mddev, bio);
                return 0;
        }
@@ -879,7 +879,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
                        mirror->rdev->data_offset;
                read_bio->bi_bdev = mirror->rdev->bdev;
                read_bio->bi_end_io = raid10_end_read_request;
-               read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
+               read_bio->bi_rw = READ | do_sync;
                read_bio->bi_private = r10_bio;
 
                generic_make_request(read_bio);
@@ -947,7 +947,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
                        conf->mirrors[d].rdev->data_offset;
                mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
                mbio->bi_end_io = raid10_end_write_request;
-               mbio->bi_rw = WRITE | (do_sync << BIO_RW_SYNCIO);
+               mbio->bi_rw = WRITE | do_sync;
                mbio->bi_private = r10_bio;
 
                atomic_inc(&r10_bio->remaining);
@@ -1716,7 +1716,7 @@ static void raid10d(mddev_t *mddev)
                                raid_end_bio_io(r10_bio);
                                bio_put(bio);
                        } else {
-                               const bool do_sync = bio_rw_flagged(r10_bio->master_bio, BIO_RW_SYNCIO);
+                               const bool do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
                                bio_put(bio);
                                rdev = conf->mirrors[mirror].rdev;
                                if (printk_ratelimit())
@@ -1730,7 +1730,7 @@ static void raid10d(mddev_t *mddev)
                                bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr
                                        + rdev->data_offset;
                                bio->bi_bdev = rdev->bdev;
-                               bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
+                               bio->bi_rw = READ | do_sync;
                                bio->bi_private = r10_bio;
                                bio->bi_end_io = raid10_end_read_request;
                                unplug = 1;
index 96c6902..20ac2f1 100644 (file)
@@ -3958,7 +3958,7 @@ static int make_request(mddev_t *mddev, struct bio * bi)
        const int rw = bio_data_dir(bi);
        int remaining;
 
-       if (unlikely(bio_rw_flagged(bi, BIO_RW_BARRIER))) {
+       if (unlikely(bi->bi_rw & REQ_HARDBARRIER)) {
                /* Drain all pending writes.  We only really need
                 * to ensure they have been submitted, but this is
                 * easier.
index 8327e24..eef78a0 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/kthread.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/memstick.h>
 
 #define DRIVER_NAME "mspro_block"
@@ -179,6 +180,7 @@ static int mspro_block_bd_open(struct block_device *bdev, fmode_t mode)
        struct mspro_block_data *msb = disk->private_data;
        int rc = -ENXIO;
 
+       lock_kernel();
        mutex_lock(&mspro_block_disk_lock);
 
        if (msb && msb->card) {
@@ -190,6 +192,7 @@ static int mspro_block_bd_open(struct block_device *bdev, fmode_t mode)
        }
 
        mutex_unlock(&mspro_block_disk_lock);
+       unlock_kernel();
 
        return rc;
 }
@@ -221,7 +224,11 @@ static int mspro_block_disk_release(struct gendisk *disk)
 
 static int mspro_block_bd_release(struct gendisk *disk, fmode_t mode)
 {
-       return mspro_block_disk_release(disk);
+       int ret;
+       lock_kernel();
+       ret = mspro_block_disk_release(disk);
+       unlock_kernel();
+       return ret;
 }
 
 static int mspro_block_bd_getgeo(struct block_device *bdev,
@@ -805,7 +812,8 @@ static void mspro_block_start(struct memstick_dev *card)
 
 static int mspro_block_prepare_req(struct request_queue *q, struct request *req)
 {
-       if (!blk_fs_request(req) && !blk_pc_request(req)) {
+       if (req->cmd_type != REQ_TYPE_FS &&
+           req->cmd_type != REQ_TYPE_BLOCK_PC) {
                blk_dump_rq_flags(req, "MSPro unsupported request");
                return BLKPREP_KILL;
        }
index fc593fb..e6733bc 100644 (file)
@@ -53,6 +53,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/i2o.h>
+#include <linux/smp_lock.h>
 
 #include <linux/mempool.h>
 
@@ -577,6 +578,7 @@ static int i2o_block_open(struct block_device *bdev, fmode_t mode)
        if (!dev->i2o_dev)
                return -ENODEV;
 
+       lock_kernel();
        if (dev->power > 0x1f)
                i2o_block_device_power(dev, 0x02);
 
@@ -585,6 +587,7 @@ static int i2o_block_open(struct block_device *bdev, fmode_t mode)
        i2o_block_device_lock(dev->i2o_dev, -1);
 
        osm_debug("Ready.\n");
+       unlock_kernel();
 
        return 0;
 };
@@ -615,6 +618,7 @@ static int i2o_block_release(struct gendisk *disk, fmode_t mode)
        if (!dev->i2o_dev)
                return 0;
 
+       lock_kernel();
        i2o_block_device_flush(dev->i2o_dev);
 
        i2o_block_device_unlock(dev->i2o_dev, -1);
@@ -625,6 +629,7 @@ static int i2o_block_release(struct gendisk *disk, fmode_t mode)
                operation = 0x24;
 
        i2o_block_device_power(dev, operation);
+       unlock_kernel();
 
        return 0;
 }
@@ -652,30 +657,40 @@ static int i2o_block_ioctl(struct block_device *bdev, fmode_t mode,
 {
        struct gendisk *disk = bdev->bd_disk;
        struct i2o_block_device *dev = disk->private_data;
+       int ret = -ENOTTY;
 
        /* Anyone capable of this syscall can do *real bad* things */
 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
+       lock_kernel();
        switch (cmd) {
        case BLKI2OGRSTRAT:
-               return put_user(dev->rcache, (int __user *)arg);
+               ret = put_user(dev->rcache, (int __user *)arg);
+               break;
        case BLKI2OGWSTRAT:
-               return put_user(dev->wcache, (int __user *)arg);
+               ret = put_user(dev->wcache, (int __user *)arg);
+               break;
        case BLKI2OSRSTRAT:
+               ret = -EINVAL;
                if (arg < 0 || arg > CACHE_SMARTFETCH)
-                       return -EINVAL;
+                       break;
                dev->rcache = arg;
+               ret = 0;
                break;
        case BLKI2OSWSTRAT:
+               ret = -EINVAL;
                if (arg != 0
                    && (arg < CACHE_WRITETHROUGH || arg > CACHE_SMARTBACK))
-                       return -EINVAL;
+                       break;
                dev->wcache = arg;
+               ret = 0;
                break;
        }
-       return -ENOTTY;
+       unlock_kernel();
+
+       return ret;
 };
 
 /**
@@ -883,7 +898,7 @@ static void i2o_block_request_fn(struct request_queue *q)
                if (!req)
                        break;
 
-               if (blk_fs_request(req)) {
+               if (req->cmd_type == REQ_TYPE_FS) {
                        struct i2o_block_delayed_request *dreq;
                        struct i2o_block_request *ireq = req->special;
                        unsigned int queue_depth;
@@ -930,7 +945,8 @@ static const struct block_device_operations i2o_block_fops = {
        .owner = THIS_MODULE,
        .open = i2o_block_open,
        .release = i2o_block_release,
-       .locked_ioctl = i2o_block_ioctl,
+       .ioctl = i2o_block_ioctl,
+       .compat_ioctl = i2o_block_ioctl,
        .getgeo = i2o_block_getgeo,
        .media_changed = i2o_block_media_changed
 };
index cb9fbc8..8433cde 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/kdev_t.h>
 #include <linux/blkdev.h>
 #include <linux/mutex.h>
+#include <linux/smp_lock.h>
 #include <linux/scatterlist.h>
 #include <linux/string_helpers.h>
 
@@ -107,6 +108,7 @@ static int mmc_blk_open(struct block_device *bdev, fmode_t mode)
        struct mmc_blk_data *md = mmc_blk_get(bdev->bd_disk);
        int ret = -ENXIO;
 
+       lock_kernel();
        if (md) {
                if (md->usage == 2)
                        check_disk_change(bdev);
@@ -117,6 +119,7 @@ static int mmc_blk_open(struct block_device *bdev, fmode_t mode)
                        ret = -EROFS;
                }
        }
+       unlock_kernel();
 
        return ret;
 }
@@ -125,7 +128,9 @@ static int mmc_blk_release(struct gendisk *disk, fmode_t mode)
 {
        struct mmc_blk_data *md = disk->private_data;
 
+       lock_kernel();
        mmc_blk_put(md);
+       unlock_kernel();
        return 0;
 }
 
index d6ded24..c77eb49 100644 (file)
@@ -32,7 +32,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
        /*
         * We only like normal block requests.
         */
-       if (!blk_fs_request(req)) {
+       if (req->cmd_type != REQ_TYPE_FS) {
                blk_dump_rq_flags(req, "MMC bad request");
                return BLKPREP_KILL;
        }
@@ -128,7 +128,7 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
        mq->req = NULL;
 
        blk_queue_prep_rq(mq->queue, mmc_prep_request);
-       blk_queue_ordered(mq->queue, QUEUE_ORDERED_DRAIN, NULL);
+       blk_queue_ordered(mq->queue, QUEUE_ORDERED_DRAIN);
        queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue);
 
 #ifdef CONFIG_MMC_BLOCK_BOUNCE
index 1d2144d..62e6870 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/blkdev.h>
 #include <linux/blkpg.h>
 #include <linux/spinlock.h>
+#include <linux/smp_lock.h>
 #include <linux/hdreg.h>
 #include <linux/init.h>
 #include <linux/mutex.h>
@@ -87,14 +88,14 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 
        buf = req->buffer;
 
-       if (!blk_fs_request(req))
+       if (req->cmd_type != REQ_TYPE_FS)
                return -EIO;
 
        if (blk_rq_pos(req) + blk_rq_cur_sectors(req) >
            get_capacity(req->rq_disk))
                return -EIO;
 
-       if (blk_discard_rq(req))
+       if (req->cmd_flags & REQ_DISCARD)
                return tr->discard(dev, block, nsect);
 
        switch(rq_data_dir(req)) {
@@ -178,8 +179,9 @@ static int blktrans_open(struct block_device *bdev, fmode_t mode)
        int ret;
 
        if (!dev)
-               return -ERESTARTSYS;
+               return -ERESTARTSYS; /* FIXME: busy loop! -arnd*/
 
+       lock_kernel();
        mutex_lock(&dev->lock);
 
        if (!dev->mtd) {
@@ -196,6 +198,7 @@ static int blktrans_open(struct block_device *bdev, fmode_t mode)
 unlock:
        mutex_unlock(&dev->lock);
        blktrans_dev_put(dev);
+       unlock_kernel();
        return ret;
 }
 
@@ -207,6 +210,7 @@ static int blktrans_release(struct gendisk *disk, fmode_t mode)
        if (!dev)
                return ret;
 
+       lock_kernel();
        mutex_lock(&dev->lock);
 
        /* Release one reference, we sure its not the last one here*/
@@ -219,6 +223,7 @@ static int blktrans_release(struct gendisk *disk, fmode_t mode)
 unlock:
        mutex_unlock(&dev->lock);
        blktrans_dev_put(dev);
+       unlock_kernel();
        return ret;
 }
 
@@ -251,6 +256,7 @@ static int blktrans_ioctl(struct block_device *bdev, fmode_t mode,
        if (!dev)
                return ret;
 
+       lock_kernel();
        mutex_lock(&dev->lock);
 
        if (!dev->mtd)
@@ -265,6 +271,7 @@ static int blktrans_ioctl(struct block_device *bdev, fmode_t mode,
        }
 unlock:
        mutex_unlock(&dev->lock);
+       unlock_kernel();
        blktrans_dev_put(dev);
        return ret;
 }
@@ -273,7 +280,7 @@ static const struct block_device_operations mtd_blktrans_ops = {
        .owner          = THIS_MODULE,
        .open           = blktrans_open,
        .release        = blktrans_release,
-       .locked_ioctl   = blktrans_ioctl,
+       .ioctl          = blktrans_ioctl,
        .getgeo         = blktrans_getgeo,
 };
 
index 33975e9..1a84fae 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/hdreg.h>
 #include <linux/async.h>
 #include <linux/mutex.h>
+#include <linux/smp_lock.h>
 
 #include <asm/ccwdev.h>
 #include <asm/ebcdic.h>
@@ -2196,7 +2197,7 @@ static void dasd_setup_queue(struct dasd_block *block)
         */
        blk_queue_max_segment_size(block->request_queue, PAGE_SIZE);
        blk_queue_segment_boundary(block->request_queue, PAGE_SIZE - 1);
-       blk_queue_ordered(block->request_queue, QUEUE_ORDERED_DRAIN, NULL);
+       blk_queue_ordered(block->request_queue, QUEUE_ORDERED_DRAIN);
 }
 
 /*
@@ -2235,6 +2236,7 @@ static int dasd_open(struct block_device *bdev, fmode_t mode)
        if (!block)
                return -ENODEV;
 
+       lock_kernel();
        base = block->base;
        atomic_inc(&block->open_count);
        if (test_bit(DASD_FLAG_OFFLINE, &base->flags)) {
@@ -2269,12 +2271,14 @@ static int dasd_open(struct block_device *bdev, fmode_t mode)
                goto out;
        }
 
+       unlock_kernel();
        return 0;
 
 out:
        module_put(base->discipline->owner);
 unlock:
        atomic_dec(&block->open_count);
+       unlock_kernel();
        return rc;
 }
 
@@ -2282,8 +2286,10 @@ static int dasd_release(struct gendisk *disk, fmode_t mode)
 {
        struct dasd_block *block = disk->private_data;
 
+       lock_kernel();
        atomic_dec(&block->open_count);
        module_put(block->base->discipline->owner);
+       unlock_kernel();
        return 0;
 }
 
index 9b43ae9..2bd72aa 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/blkdev.h>
+#include <linux/smp_lock.h>
 #include <linux/completion.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
@@ -775,6 +776,7 @@ dcssblk_open(struct block_device *bdev, fmode_t mode)
        struct dcssblk_dev_info *dev_info;
        int rc;
 
+       lock_kernel();
        dev_info = bdev->bd_disk->private_data;
        if (NULL == dev_info) {
                rc = -ENODEV;
@@ -784,6 +786,7 @@ dcssblk_open(struct block_device *bdev, fmode_t mode)
        bdev->bd_block_size = 4096;
        rc = 0;
 out:
+       unlock_kernel();
        return rc;
 }
 
@@ -794,6 +797,7 @@ dcssblk_release(struct gendisk *disk, fmode_t mode)
        struct segment_info *entry;
        int rc;
 
+       lock_kernel();
        if (!dev_info) {
                rc = -ENODEV;
                goto out;
@@ -811,6 +815,7 @@ dcssblk_release(struct gendisk *disk, fmode_t mode)
        up_write(&dcssblk_devices_sem);
        rc = 0;
 out:
+       unlock_kernel();
        return rc;
 }
 
index 097da8c..b7de025 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/fs.h>
 #include <linux/module.h>
 #include <linux/blkdev.h>
+#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/buffer_head.h>
 #include <linux/kernel.h>
@@ -361,6 +362,7 @@ tapeblock_open(struct block_device *bdev, fmode_t mode)
        struct tape_device *    device;
        int                     rc;
 
+       lock_kernel();
        device = tape_get_device(disk->private_data);
 
        if (device->required_tapemarks) {
@@ -384,12 +386,14 @@ tapeblock_open(struct block_device *bdev, fmode_t mode)
         *       is called.
         */
        tape_state_set(device, TS_BLKUSE);
+       unlock_kernel();
        return 0;
 
 release:
        tape_release(device);
  put_device:
        tape_put_device(device);
+       unlock_kernel();
        return rc;
 }
 
@@ -403,10 +407,12 @@ static int
 tapeblock_release(struct gendisk *disk, fmode_t mode)
 {
        struct tape_device *device = disk->private_data;
-
+       lock_kernel();
        tape_state_set(device, TS_IN_USE);
        tape_release(device);
        tape_put_device(device);
+       unlock_kernel();
 
        return 0;
 }
index 2a8cf13..4f785f2 100644 (file)
 #define SCSI_BUF_PA(address)   isa_virt_to_bus(address)
 #define SCSI_SG_PA(sgent)      (isa_page_to_bus(sg_page((sgent))) + (sgent)->offset)
 
-static void BAD_SG_DMA(Scsi_Cmnd * SCpnt,
-                      struct scatterlist *sgp,
-                      int nseg,
-                      int badseg)
-{
-       printk(KERN_CRIT "sgpnt[%d:%d] page %p/0x%llx length %u\n",
-              badseg, nseg, sg_virt(sgp),
-              (unsigned long long)SCSI_SG_PA(sgp),
-              sgp->length);
-
-       /*
-        * Not safe to continue.
-        */
-       panic("Buffer at physical address > 16Mb used for aha1542");
-}
-
 #include<linux/stat.h>
 
 #ifdef DEBUG
@@ -691,8 +675,6 @@ static int aha1542_queuecommand(Scsi_Cmnd * SCpnt, void (*done) (Scsi_Cmnd *))
                }
                scsi_for_each_sg(SCpnt, sg, sg_count, i) {
                        any2scsi(cptr[i].dataptr, SCSI_SG_PA(sg));
-                       if (SCSI_SG_PA(sg) + sg->length - 1 > ISA_DMA_THRESHOLD)
-                               BAD_SG_DMA(SCpnt, scsi_sglist(SCpnt), sg_count, i);
                        any2scsi(cptr[i].datalen, sg->length);
                };
                any2scsi(ccb[mbo].datalen, sg_count * sizeof(struct chain));
@@ -1133,16 +1115,9 @@ static int __init aha1542_detect(struct scsi_host_template * tpnt)
                                release_region(bases[indx], 4);
                                continue;
                        }
-                       /* For now we do this - until kmalloc is more intelligent
-                          we are resigned to stupid hacks like this */
-                       if (SCSI_BUF_PA(shpnt) >= ISA_DMA_THRESHOLD) {
-                               printk(KERN_ERR "Invalid address for shpnt with 1542.\n");
-                               goto unregister;
-                       }
                        if (!aha1542_test_port(bases[indx], shpnt))
                                goto unregister;
 
-
                        base_io = bases[indx];
 
                        /* Set the Bus on/off-times as not to ruin floppy performance */
index ee4b691..fda4de3 100644 (file)
@@ -716,7 +716,7 @@ static int _osd_req_list_objects(struct osd_request *or,
                return PTR_ERR(bio);
        }
 
-       bio->bi_rw &= ~(1 << BIO_RW);
+       bio->bi_rw &= ~REQ_WRITE;
        or->in.bio = bio;
        or->in.total_bytes = bio->bi_size;
        return 0;
@@ -814,7 +814,7 @@ void osd_req_write(struct osd_request *or,
 {
        _osd_req_encode_common(or, OSD_ACT_WRITE, obj, offset, len);
        WARN_ON(or->out.bio || or->out.total_bytes);
-       WARN_ON(0 ==  bio_rw_flagged(bio, BIO_RW));
+       WARN_ON(0 == (bio->bi_rw & REQ_WRITE));
        or->out.bio = bio;
        or->out.total_bytes = len;
 }
@@ -829,7 +829,7 @@ int osd_req_write_kern(struct osd_request *or,
        if (IS_ERR(bio))
                return PTR_ERR(bio);
 
-       bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */
+       bio->bi_rw |= REQ_WRITE; /* FIXME: bio_set_dir() */
        osd_req_write(or, obj, offset, bio, len);
        return 0;
 }
@@ -865,7 +865,7 @@ void osd_req_read(struct osd_request *or,
 {
        _osd_req_encode_common(or, OSD_ACT_READ, obj, offset, len);
        WARN_ON(or->in.bio || or->in.total_bytes);
-       WARN_ON(1 == bio_rw_flagged(bio, BIO_RW));
+       WARN_ON(1 == (bio->bi_rw & REQ_WRITE));
        or->in.bio = bio;
        or->in.total_bytes = len;
 }
index 2bf9846..bbbc186 100644 (file)
@@ -320,7 +320,7 @@ static int scsi_check_sense(struct scsi_cmnd *scmd)
                                    "changed. The Linux SCSI layer does not "
                                    "automatically adjust these parameters.\n");
 
-               if (blk_barrier_rq(scmd->request))
+               if (scmd->request->cmd_flags & REQ_HARDBARRIER)
                        /*
                         * barrier requests should always retry on UA
                         * otherwise block will get a spurious error
@@ -1331,16 +1331,16 @@ int scsi_noretry_cmd(struct scsi_cmnd *scmd)
        case DID_OK:
                break;
        case DID_BUS_BUSY:
-               return blk_failfast_transport(scmd->request);
+               return (scmd->request->cmd_flags & REQ_FAILFAST_TRANSPORT);
        case DID_PARITY:
-               return blk_failfast_dev(scmd->request);
+               return (scmd->request->cmd_flags & REQ_FAILFAST_DEV);
        case DID_ERROR:
                if (msg_byte(scmd->result) == COMMAND_COMPLETE &&
                    status_byte(scmd->result) == RESERVATION_CONFLICT)
                        return 0;
                /* fall through */
        case DID_SOFT_ERROR:
-               return blk_failfast_driver(scmd->request);
+               return (scmd->request->cmd_flags & REQ_FAILFAST_DRIVER);
        }
 
        switch (status_byte(scmd->result)) {
@@ -1349,7 +1349,9 @@ int scsi_noretry_cmd(struct scsi_cmnd *scmd)
                 * assume caller has checked sense and determinted
                 * the check condition was retryable.
                 */
-               return blk_failfast_dev(scmd->request);
+               if (scmd->request->cmd_flags & REQ_FAILFAST_DEV ||
+                   scmd->request->cmd_type == REQ_TYPE_BLOCK_PC)
+                       return 1;
        }
 
        return 0;
index 1646fe7..b8de389 100644 (file)
@@ -85,7 +85,7 @@ static void scsi_unprep_request(struct request *req)
 {
        struct scsi_cmnd *cmd = req->special;
 
-       req->cmd_flags &= ~REQ_DONTPREP;
+       blk_unprep_request(req);
        req->special = NULL;
 
        scsi_put_command(cmd);
@@ -722,7 +722,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
                        sense_deferred = scsi_sense_is_deferred(&sshdr);
        }
 
-       if (blk_pc_request(req)) { /* SG_IO ioctl from block level */
+       if (req->cmd_type == REQ_TYPE_BLOCK_PC) { /* SG_IO ioctl from block level */
                req->errors = result;
                if (result) {
                        if (sense_valid && req->sense) {
@@ -757,7 +757,8 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
                }
        }
 
-       BUG_ON(blk_bidi_rq(req)); /* bidi not support for !blk_pc_request yet */
+       /* no bidi support for !REQ_TYPE_BLOCK_PC yet */
+       BUG_ON(blk_bidi_rq(req));
 
        /*
         * Next deal with any sectors which we were able to correctly
@@ -1010,11 +1011,8 @@ int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask)
 
 err_exit:
        scsi_release_buffers(cmd);
-       if (error == BLKPREP_KILL)
-               scsi_put_command(cmd);
-       else /* BLKPREP_DEFER */
-               scsi_unprep_request(cmd->request);
-
+       scsi_put_command(cmd);
+       cmd->request->special = NULL;
        return error;
 }
 EXPORT_SYMBOL(scsi_init_io);
index cc8a1d1..8e2e893 100644 (file)
@@ -46,6 +46,7 @@
 #include <linux/blkdev.h>
 #include <linux/blkpg.h>
 #include <linux/delay.h>
+#include <linux/smp_lock.h>
 #include <linux/mutex.h>
 #include <linux/string_helpers.h>
 #include <linux/async.h>
@@ -411,54 +412,85 @@ static void sd_prot_op(struct scsi_cmnd *scmd, unsigned int dif)
 }
 
 /**
- * sd_prepare_discard - unmap blocks on thinly provisioned device
+ * scsi_setup_discard_cmnd - unmap blocks on thinly provisioned device
+ * @sdp: scsi device to operate one
  * @rq: Request to prepare
  *
  * Will issue either UNMAP or WRITE SAME(16) depending on preference
  * indicated by target device.
  **/
-static int sd_prepare_discard(struct request *rq)
+static int scsi_setup_discard_cmnd(struct scsi_device *sdp, struct request *rq)
 {
        struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
        struct bio *bio = rq->bio;
        sector_t sector = bio->bi_sector;
-       unsigned int num = bio_sectors(bio);
+       unsigned int nr_sectors = bio_sectors(bio);
+       unsigned int len;
+       int ret;
+       struct page *page;
 
        if (sdkp->device->sector_size == 4096) {
                sector >>= 3;
-               num >>= 3;
+               nr_sectors >>= 3;
        }
 
-       rq->cmd_type = REQ_TYPE_BLOCK_PC;
        rq->timeout = SD_TIMEOUT;
 
        memset(rq->cmd, 0, rq->cmd_len);
 
+       page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+       if (!page)
+               return BLKPREP_DEFER;
+
        if (sdkp->unmap) {
-               char *buf = kmap_atomic(bio_page(bio), KM_USER0);
+               char *buf = page_address(page);
 
+               rq->cmd_len = 10;
                rq->cmd[0] = UNMAP;
                rq->cmd[8] = 24;
-               rq->cmd_len = 10;
-
-               /* Ensure that data length matches payload */
-               rq->__data_len = bio->bi_size = bio->bi_io_vec->bv_len = 24;
 
                put_unaligned_be16(6 + 16, &buf[0]);
                put_unaligned_be16(16, &buf[2]);
                put_unaligned_be64(sector, &buf[8]);
-               put_unaligned_be32(num, &buf[16]);
+               put_unaligned_be32(nr_sectors, &buf[16]);
 
-               kunmap_atomic(buf, KM_USER0);
+               len = 24;
        } else {
+               rq->cmd_len = 16;
                rq->cmd[0] = WRITE_SAME_16;
                rq->cmd[1] = 0x8; /* UNMAP */
                put_unaligned_be64(sector, &rq->cmd[2]);
-               put_unaligned_be32(num, &rq->cmd[10]);
-               rq->cmd_len = 16;
+               put_unaligned_be32(nr_sectors, &rq->cmd[10]);
+
+               len = sdkp->device->sector_size;
        }
 
-       return BLKPREP_OK;
+       blk_add_request_payload(rq, page, len);
+       ret = scsi_setup_blk_pc_cmnd(sdp, rq);
+       rq->buffer = page_address(page);
+       if (ret != BLKPREP_OK) {
+               __free_page(page);
+               rq->buffer = NULL;
+       }
+       return ret;
+}
+
+static int scsi_setup_flush_cmnd(struct scsi_device *sdp, struct request *rq)
+{
+       rq->timeout = SD_TIMEOUT;
+       rq->retries = SD_MAX_RETRIES;
+       rq->cmd[0] = SYNCHRONIZE_CACHE;
+       rq->cmd_len = 10;
+
+       return scsi_setup_blk_pc_cmnd(sdp, rq);
+}
+
+static void sd_unprep_fn(struct request_queue *q, struct request *rq)
+{
+       if (rq->cmd_flags & REQ_DISCARD) {
+               free_page((unsigned long)rq->buffer);
+               rq->buffer = NULL;
+       }
 }
 
 /**
@@ -485,10 +517,13 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
         * Discard request come in as REQ_TYPE_FS but we turn them into
         * block PC requests to make life easier.
         */
-       if (blk_discard_rq(rq))
-               ret = sd_prepare_discard(rq);
-
-       if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
+       if (rq->cmd_flags & REQ_DISCARD) {
+               ret = scsi_setup_discard_cmnd(sdp, rq);
+               goto out;
+       } else if (rq->cmd_flags & REQ_FLUSH) {
+               ret = scsi_setup_flush_cmnd(sdp, rq);
+               goto out;
+       } else if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
                ret = scsi_setup_blk_pc_cmnd(sdp, rq);
                goto out;
        } else if (rq->cmd_type != REQ_TYPE_FS) {
@@ -636,7 +671,7 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
                SCpnt->cmnd[0] = VARIABLE_LENGTH_CMD;
                SCpnt->cmnd[7] = 0x18;
                SCpnt->cmnd[9] = (rq_data_dir(rq) == READ) ? READ_32 : WRITE_32;
-               SCpnt->cmnd[10] = protect | (blk_fua_rq(rq) ? 0x8 : 0);
+               SCpnt->cmnd[10] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : 0);
 
                /* LBA */
                SCpnt->cmnd[12] = sizeof(block) > 4 ? (unsigned char) (block >> 56) & 0xff : 0;
@@ -661,7 +696,7 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
                SCpnt->cmnd[31] = (unsigned char) this_count & 0xff;
        } else if (block > 0xffffffff) {
                SCpnt->cmnd[0] += READ_16 - READ_6;
-               SCpnt->cmnd[1] = protect | (blk_fua_rq(rq) ? 0x8 : 0);
+               SCpnt->cmnd[1] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : 0);
                SCpnt->cmnd[2] = sizeof(block) > 4 ? (unsigned char) (block >> 56) & 0xff : 0;
                SCpnt->cmnd[3] = sizeof(block) > 4 ? (unsigned char) (block >> 48) & 0xff : 0;
                SCpnt->cmnd[4] = sizeof(block) > 4 ? (unsigned char) (block >> 40) & 0xff : 0;
@@ -682,7 +717,7 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
                        this_count = 0xffff;
 
                SCpnt->cmnd[0] += READ_10 - READ_6;
-               SCpnt->cmnd[1] = protect | (blk_fua_rq(rq) ? 0x8 : 0);
+               SCpnt->cmnd[1] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : 0);
                SCpnt->cmnd[2] = (unsigned char) (block >> 24) & 0xff;
                SCpnt->cmnd[3] = (unsigned char) (block >> 16) & 0xff;
                SCpnt->cmnd[4] = (unsigned char) (block >> 8) & 0xff;
@@ -691,7 +726,7 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
                SCpnt->cmnd[7] = (unsigned char) (this_count >> 8) & 0xff;
                SCpnt->cmnd[8] = (unsigned char) this_count & 0xff;
        } else {
-               if (unlikely(blk_fua_rq(rq))) {
+               if (unlikely(rq->cmd_flags & REQ_FUA)) {
                        /*
                         * This happens only if this drive failed
                         * 10byte rw command with ILLEGAL_REQUEST
@@ -745,6 +780,8 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
  *     or from within the kernel (e.g. as a result of a mount(1) ).
  *     In the latter case @inode and @filp carry an abridged amount
  *     of information as noted above.
+ *
+ *     Locking: called with bdev->bd_mutex held.
  **/
 static int sd_open(struct block_device *bdev, fmode_t mode)
 {
@@ -799,7 +836,7 @@ static int sd_open(struct block_device *bdev, fmode_t mode)
        if (!scsi_device_online(sdev))
                goto error_out;
 
-       if (!sdkp->openers++ && sdev->removable) {
+       if ((atomic_inc_return(&sdkp->openers) == 1) && sdev->removable) {
                if (scsi_block_when_processing_errors(sdev))
                        scsi_set_medium_removal(sdev, SCSI_REMOVAL_PREVENT);
        }
@@ -823,6 +860,8 @@ error_autopm:
  *
  *     Note: may block (uninterruptible) if error recovery is underway
  *     on this disk.
+ *
+ *     Locking: called with bdev->bd_mutex held.
  **/
 static int sd_release(struct gendisk *disk, fmode_t mode)
 {
@@ -831,7 +870,7 @@ static int sd_release(struct gendisk *disk, fmode_t mode)
 
        SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_release\n"));
 
-       if (!--sdkp->openers && sdev->removable) {
+       if (atomic_dec_return(&sdkp->openers) && sdev->removable) {
                if (scsi_block_when_processing_errors(sdev))
                        scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW);
        }
@@ -904,7 +943,7 @@ static int sd_ioctl(struct block_device *bdev, fmode_t mode,
        error = scsi_nonblockable_ioctl(sdp, cmd, p,
                                        (mode & FMODE_NDELAY) != 0);
        if (!scsi_block_when_processing_errors(sdp) || !error)
-               return error;
+               goto out;
 
        /*
         * Send SCSI addressing ioctls directly to mid level, send other
@@ -914,13 +953,17 @@ static int sd_ioctl(struct block_device *bdev, fmode_t mode,
        switch (cmd) {
                case SCSI_IOCTL_GET_IDLUN:
                case SCSI_IOCTL_GET_BUS_NUMBER:
-                       return scsi_ioctl(sdp, cmd, p);
+                       error = scsi_ioctl(sdp, cmd, p);
+                       break;
                default:
                        error = scsi_cmd_ioctl(disk->queue, disk, mode, cmd, p);
                        if (error != -ENOTTY)
-                               return error;
+                               break;
+                       error = scsi_ioctl(sdp, cmd, p);
+                       break;
        }
-       return scsi_ioctl(sdp, cmd, p);
+out:
+       return error;
 }
 
 static void set_media_not_present(struct scsi_disk *sdkp)
@@ -1045,15 +1088,6 @@ static int sd_sync_cache(struct scsi_disk *sdkp)
        return 0;
 }
 
-static void sd_prepare_flush(struct request_queue *q, struct request *rq)
-{
-       rq->cmd_type = REQ_TYPE_BLOCK_PC;
-       rq->timeout = SD_TIMEOUT;
-       rq->retries = SD_MAX_RETRIES;
-       rq->cmd[0] = SYNCHRONIZE_CACHE;
-       rq->cmd_len = 10;
-}
-
 static void sd_rescan(struct device *dev)
 {
        struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev);
@@ -1103,7 +1137,7 @@ static const struct block_device_operations sd_fops = {
        .owner                  = THIS_MODULE,
        .open                   = sd_open,
        .release                = sd_release,
-       .locked_ioctl           = sd_ioctl,
+       .ioctl                  = sd_ioctl,
        .getgeo                 = sd_getgeo,
 #ifdef CONFIG_COMPAT
        .compat_ioctl           = sd_compat_ioctl,
@@ -1120,7 +1154,7 @@ static unsigned int sd_completed_bytes(struct scsi_cmnd *scmd)
        u64 bad_lba;
        int info_valid;
 
-       if (!blk_fs_request(scmd->request))
+       if (scmd->request->cmd_type != REQ_TYPE_FS)
                return 0;
 
        info_valid = scsi_get_sense_info_fld(scmd->sense_buffer,
@@ -1171,6 +1205,12 @@ static int sd_done(struct scsi_cmnd *SCpnt)
        int sense_valid = 0;
        int sense_deferred = 0;
 
+       if (SCpnt->request->cmd_flags & REQ_DISCARD) {
+               if (!result)
+                       scsi_set_resid(SCpnt, 0);
+               return good_bytes;
+       }
+
        if (result) {
                sense_valid = scsi_command_normalize_sense(SCpnt, &sshdr);
                if (sense_valid)
@@ -2121,7 +2161,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
        else
                ordered = QUEUE_ORDERED_DRAIN;
 
-       blk_queue_ordered(sdkp->disk->queue, ordered, sd_prepare_flush);
+       blk_queue_ordered(sdkp->disk->queue, ordered);
 
        set_capacity(disk, sdkp->capacity);
        kfree(buffer);
@@ -2234,6 +2274,7 @@ static void sd_probe_async(void *data, async_cookie_t cookie)
        sd_revalidate_disk(gd);
 
        blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);
+       blk_queue_unprep_rq(sdp->request_queue, sd_unprep_fn);
 
        gd->driverfs_dev = &sdp->sdev_gendev;
        gd->flags = GENHD_FL_EXT_DEVT;
@@ -2313,7 +2354,7 @@ static int sd_probe(struct device *dev)
        sdkp->driver = &sd_template;
        sdkp->disk = gd;
        sdkp->index = index;
-       sdkp->openers = 0;
+       atomic_set(&sdkp->openers, 0);
        sdkp->previous_state = 1;
 
        if (!sdp->request_queue->rq_timeout) {
@@ -2372,6 +2413,7 @@ static int sd_remove(struct device *dev)
 
        async_synchronize_full();
        blk_queue_prep_rq(sdkp->device->request_queue, scsi_prep_fn);
+       blk_queue_unprep_rq(sdkp->device->request_queue, NULL);
        device_del(&sdkp->dev);
        del_gendisk(sdkp->disk);
        sd_shutdown(dev);
index 43d3caf..f81a930 100644 (file)
@@ -47,7 +47,7 @@ struct scsi_disk {
        struct scsi_device *device;
        struct device   dev;
        struct gendisk  *disk;
-       unsigned int    openers;        /* protected by BKL for now, yuck */
+       atomic_t        openers;
        sector_t        capacity;       /* size in 512-byte sectors */
        u32             index;
        unsigned short  hw_sector_size;
index 0a90abc..ba9c3e0 100644 (file)
@@ -44,6 +44,7 @@
 #include <linux/init.h>
 #include <linux/blkdev.h>
 #include <linux/mutex.h>
+#include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <asm/uaccess.h>
 
@@ -466,22 +467,27 @@ static int sr_prep_fn(struct request_queue *q, struct request *rq)
 
 static int sr_block_open(struct block_device *bdev, fmode_t mode)
 {
-       struct scsi_cd *cd = scsi_cd_get(bdev->bd_disk);
+       struct scsi_cd *cd;
        int ret = -ENXIO;
 
+       lock_kernel();
+       cd = scsi_cd_get(bdev->bd_disk);
        if (cd) {
                ret = cdrom_open(&cd->cdi, bdev, mode);
                if (ret)
                        scsi_cd_put(cd);
        }
+       unlock_kernel();
        return ret;
 }
 
 static int sr_block_release(struct gendisk *disk, fmode_t mode)
 {
        struct scsi_cd *cd = scsi_cd(disk);
+       lock_kernel();
        cdrom_release(&cd->cdi, mode);
        scsi_cd_put(cd);
+       unlock_kernel();
        return 0;
 }
 
@@ -493,6 +499,8 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
        void __user *argp = (void __user *)arg;
        int ret;
 
+       lock_kernel();
+
        /*
         * Send SCSI addressing ioctls directly to mid level, send other
         * ioctls to cdrom/block level.
@@ -500,12 +508,13 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
        switch (cmd) {
        case SCSI_IOCTL_GET_IDLUN:
        case SCSI_IOCTL_GET_BUS_NUMBER:
-               return scsi_ioctl(sdev, cmd, argp);
+               ret = scsi_ioctl(sdev, cmd, argp);
+               goto out;
        }
 
        ret = cdrom_ioctl(&cd->cdi, bdev, mode, cmd, arg);
        if (ret != -ENOSYS)
-               return ret;
+               goto out;
 
        /*
         * ENODEV means that we didn't recognise the ioctl, or that we
@@ -516,8 +525,12 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
        ret = scsi_nonblockable_ioctl(sdev, cmd, argp,
                                        (mode & FMODE_NDELAY) != 0);
        if (ret != -ENODEV)
-               return ret;
-       return scsi_ioctl(sdev, cmd, argp);
+               goto out;
+       ret = scsi_ioctl(sdev, cmd, argp);
+
+out:
+       unlock_kernel();
+       return ret;
 }
 
 static int sr_block_media_changed(struct gendisk *disk)
@@ -531,7 +544,7 @@ static const struct block_device_operations sr_bdops =
        .owner          = THIS_MODULE,
        .open           = sr_block_open,
        .release        = sr_block_release,
-       .locked_ioctl   = sr_block_ioctl,
+       .ioctl          = sr_block_ioctl,
        .media_changed  = sr_block_media_changed,
        /* 
         * No compat_ioctl for now because sr_block_ioctl never
index b5838d5..713620e 100644 (file)
@@ -2022,7 +2022,7 @@ static void NCR5380_information_transfer (struct Scsi_Host *instance)
                if((count > SUN3_DMA_MINSIZE) && (sun3_dma_setup_done
                                                  != cmd))
                {
-                       if(blk_fs_request(cmd->request)) {
+                       if (cmd->request->cmd_type == REQ_TYPE_FS) {
                                sun3scsi_dma_setup(d, count,
                                                   rq_data_dir(cmd->request));
                                sun3_dma_setup_done = cmd;
index e606cf0..613f588 100644 (file)
@@ -524,7 +524,7 @@ static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted,
                                                  struct scsi_cmnd *cmd,
                                                  int write_flag)
 {
-       if(blk_fs_request(cmd->request))
+       if (cmd->request->cmd_type == REQ_TYPE_FS)
                return wanted;
        else
                return 0;
index aaa4fd0..7c526b8 100644 (file)
@@ -458,7 +458,7 @@ static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted,
                                                  struct scsi_cmnd *cmd,
                                                  int write_flag)
 {
-       if(blk_fs_request(cmd->request))
+       if (cmd->request->cmd_type == REQ_TYPE_FS)
                return wanted;
        else
                return 0;
index f7ea2a3..ff1d247 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/major.h>
 #include <linux/delay.h>
 #include <linux/hdreg.h>
+#include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
@@ -805,7 +806,8 @@ static void blkvsc_init_rw(struct blkvsc_request *blkvsc_req)
                        blkvsc_req->cmnd[0] = READ_16;
                }
 
-               blkvsc_req->cmnd[1] |= blk_fua_rq(blkvsc_req->req) ? 0x8 : 0;
+               blkvsc_req->cmnd[1] |=
+                       (blkvsc_req->req->cmd_flags & REQ_FUA) ? 0x8 : 0;
 
                *(unsigned long long *)&blkvsc_req->cmnd[2] =
                                cpu_to_be64(blkvsc_req->sector_start);
@@ -821,7 +823,8 @@ static void blkvsc_init_rw(struct blkvsc_request *blkvsc_req)
                        blkvsc_req->cmnd[0] = READ_10;
                }
 
-               blkvsc_req->cmnd[1] |= blk_fua_rq(blkvsc_req->req) ? 0x8 : 0;
+               blkvsc_req->cmnd[1] |=
+                       (blkvsc_req->req->cmd_flags & REQ_FUA) ? 0x8 : 0;
 
                *(unsigned int *)&blkvsc_req->cmnd[2] =
                                cpu_to_be32(blkvsc_req->sector_start);
@@ -1268,7 +1271,7 @@ static void blkvsc_request(struct request_queue *queue)
                DPRINT_DBG(BLKVSC_DRV, "- req %p\n", req);
 
                blkdev = req->rq_disk->private_data;
-               if (blkdev->shutting_down || !blk_fs_request(req) ||
+               if (blkdev->shutting_down || req->cmd_type != REQ_TYPE_FS ||
                    blkdev->media_not_present) {
                        __blk_end_request_cur(req, 0);
                        continue;
@@ -1306,6 +1309,7 @@ static int blkvsc_open(struct block_device *bdev, fmode_t mode)
        DPRINT_DBG(BLKVSC_DRV, "- users %d disk %s\n", blkdev->users,
                   blkdev->gd->disk_name);
 
+       lock_kernel();
        spin_lock(&blkdev->lock);
 
        if (!blkdev->users && blkdev->device_type == DVD_TYPE) {
@@ -1317,6 +1321,7 @@ static int blkvsc_open(struct block_device *bdev, fmode_t mode)
        blkdev->users++;
 
        spin_unlock(&blkdev->lock);
+       unlock_kernel();
        return 0;
 }
 
@@ -1327,6 +1332,7 @@ static int blkvsc_release(struct gendisk *disk, fmode_t mode)
        DPRINT_DBG(BLKVSC_DRV, "- users %d disk %s\n", blkdev->users,
                   blkdev->gd->disk_name);
 
+       lock_kernel();
        spin_lock(&blkdev->lock);
        if (blkdev->users == 1) {
                spin_unlock(&blkdev->lock);
@@ -1337,6 +1343,7 @@ static int blkvsc_release(struct gendisk *disk, fmode_t mode)
        blkdev->users--;
 
        spin_unlock(&blkdev->lock);
+       unlock_kernel();
        return 0;
 }
 
index 7b3e973..7e49527 100644 (file)
@@ -133,17 +133,12 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev,
 }
 EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt);
 
+static void xenbus_switch_fatal(struct xenbus_device *, int, int,
+                               const char *, ...);
 
-/**
- * xenbus_switch_state
- * @dev: xenbus device
- * @state: new state
- *
- * Advertise in the store a change of the given driver to the given new_state.
- * Return 0 on success, or -errno on error.  On error, the device will switch
- * to XenbusStateClosing, and the error will be saved in the store.
- */
-int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
+static int
+__xenbus_switch_state(struct xenbus_device *dev,
+                     enum xenbus_state state, int depth)
 {
        /* We check whether the state is currently set to the given value, and
           if not, then the state is set.  We don't want to unconditionally
@@ -152,35 +147,65 @@ int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
           to it, as the device will be tearing down, and we don't want to
           resurrect that directory.
 
-          Note that, because of this cached value of our state, this function
-          will not work inside a Xenstore transaction (something it was
-          trying to in the past) because dev->state would not get reset if
-          the transaction was aborted.
-
+          Note that, because of this cached value of our state, this
+          function will not take a caller's Xenstore transaction
+          (something it was trying to in the past) because dev->state
+          would not get reset if the transaction was aborted.
         */
 
+       struct xenbus_transaction xbt;
        int current_state;
-       int err;
+       int err, abort;
 
        if (state == dev->state)
                return 0;
 
-       err = xenbus_scanf(XBT_NIL, dev->nodename, "state", "%d",
-                          &current_state);
-       if (err != 1)
+again:
+       abort = 1;
+
+       err = xenbus_transaction_start(&xbt);
+       if (err) {
+               xenbus_switch_fatal(dev, depth, err, "starting transaction");
                return 0;
+       }
+
+       err = xenbus_scanf(xbt, dev->nodename, "state", "%d", &current_state);
+       if (err != 1)
+               goto abort;
 
-       err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%d", state);
+       err = xenbus_printf(xbt, dev->nodename, "state", "%d", state);
        if (err) {
-               if (state != XenbusStateClosing) /* Avoid looping */
-                       xenbus_dev_fatal(dev, err, "writing new state");
-               return err;
+               xenbus_switch_fatal(dev, depth, err, "writing new state");
+               goto abort;
        }
 
-       dev->state = state;
+       abort = 0;
+abort:
+       err = xenbus_transaction_end(xbt, abort);
+       if (err) {
+               if (err == -EAGAIN && !abort)
+                       goto again;
+               xenbus_switch_fatal(dev, depth, err, "ending transaction");
+       } else
+               dev->state = state;
 
        return 0;
 }
+
+/**
+ * xenbus_switch_state
+ * @dev: xenbus device
+ * @state: new state
+ *
+ * Advertise in the store a change of the given driver to the given new_state.
+ * Return 0 on success, or -errno on error.  On error, the device will switch
+ * to XenbusStateClosing, and the error will be saved in the store.
+ */
+int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
+{
+       return __xenbus_switch_state(dev, state, 0);
+}
+
 EXPORT_SYMBOL_GPL(xenbus_switch_state);
 
 int xenbus_frontend_closed(struct xenbus_device *dev)
@@ -283,6 +308,23 @@ void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...)
 }
 EXPORT_SYMBOL_GPL(xenbus_dev_fatal);
 
+/**
+ * Equivalent to xenbus_dev_fatal(dev, err, fmt, args), but helps
+ * avoiding recursion within xenbus_switch_state.
+ */
+static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
+                               const char *fmt, ...)
+{
+       va_list ap;
+
+       va_start(ap, fmt);
+       xenbus_va_dev_error(dev, err, fmt, ap);
+       va_end(ap);
+
+       if (!depth)
+               __xenbus_switch_state(dev, XenbusStateClosing, 1);
+}
+
 /**
  * xenbus_grant_ring
  * @dev: xenbus device
index e7bf6ca..8abb2df 100644 (file)
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -843,7 +843,8 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
        if (!bio)
                goto out_bmd;
 
-       bio->bi_rw |= (!write_to_vm << BIO_RW);
+       if (!write_to_vm)
+               bio->bi_rw |= REQ_WRITE;
 
        ret = 0;
 
@@ -1024,7 +1025,7 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
         * set data direction, and check if mapped pages need bouncing
         */
        if (!write_to_vm)
-               bio->bi_rw |= (1 << BIO_RW);
+               bio->bi_rw |= REQ_WRITE;
 
        bio->bi_bdev = bdev;
        bio->bi_flags |= (1 << BIO_USER_MAPPED);
index 451afbd..6641146 100644 (file)
@@ -1346,13 +1346,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                return ret;
        }
 
-       lock_kernel();
  restart:
 
        ret = -ENXIO;
        disk = get_gendisk(bdev->bd_dev, &partno);
        if (!disk)
-               goto out_unlock_kernel;
+               goto out;
 
        mutex_lock_nested(&bdev->bd_mutex, for_part);
        if (!bdev->bd_openers) {
@@ -1432,7 +1431,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
        if (for_part)
                bdev->bd_part_count++;
        mutex_unlock(&bdev->bd_mutex);
-       unlock_kernel();
        return 0;
 
  out_clear:
@@ -1445,9 +1443,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
        bdev->bd_contains = NULL;
  out_unlock_bdev:
        mutex_unlock(&bdev->bd_mutex);
- out_unlock_kernel:
-       unlock_kernel();
-
+ out:
        if (disk)
                module_put(disk->fops->owner);
        put_disk(disk);
@@ -1516,7 +1512,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
        struct block_device *victim = NULL;
 
        mutex_lock_nested(&bdev->bd_mutex, for_part);
-       lock_kernel();
        if (for_part)
                bdev->bd_part_count--;
 
@@ -1541,7 +1536,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
                        victim = bdev->bd_contains;
                bdev->bd_contains = NULL;
        }
-       unlock_kernel();
        mutex_unlock(&bdev->bd_mutex);
        bdput(bdev);
        if (victim)
index 34f7c37..64f1008 100644 (file)
@@ -480,7 +480,7 @@ static void end_workqueue_bio(struct bio *bio, int err)
        end_io_wq->work.func = end_workqueue_fn;
        end_io_wq->work.flags = 0;
 
-       if (bio->bi_rw & (1 << BIO_RW)) {
+       if (bio->bi_rw & REQ_WRITE) {
                if (end_io_wq->metadata)
                        btrfs_queue_worker(&fs_info->endio_meta_write_workers,
                                           &end_io_wq->work);
@@ -604,7 +604,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
 
        atomic_inc(&fs_info->nr_async_submits);
 
-       if (rw & (1 << BIO_RW_SYNCIO))
+       if (rw & REQ_SYNC)
                btrfs_set_work_high_prio(&async->work);
 
        btrfs_queue_worker(&fs_info->workers, &async->work);
@@ -668,7 +668,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
                                          bio, 1);
        BUG_ON(ret);
 
-       if (!(rw & (1 << BIO_RW))) {
+       if (!(rw & REQ_WRITE)) {
                /*
                 * called for a read, do the setup so that checksum validation
                 * can happen in the async kernel threads
@@ -1427,7 +1427,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
         * ram and up to date before trying to verify things.  For
         * blocksize <= pagesize, it is basically a noop
         */
-       if (!(bio->bi_rw & (1 << BIO_RW)) && end_io_wq->metadata &&
+       if (!(bio->bi_rw & REQ_WRITE) && end_io_wq->metadata &&
            !bio_ready_for_csum(bio)) {
                btrfs_queue_worker(&fs_info->endio_meta_workers,
                                   &end_io_wq->work);
index 8976c33..c038644 100644 (file)
@@ -1429,7 +1429,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
        ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
        BUG_ON(ret);
 
-       if (!(rw & (1 << BIO_RW))) {
+       if (!(rw & REQ_WRITE)) {
                if (bio_flags & EXTENT_BIO_COMPRESSED) {
                        return btrfs_submit_compressed_read(inode, bio,
                                                    mirror_num, bio_flags);
@@ -1841,7 +1841,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
        bio->bi_size = 0;
 
        bio_add_page(bio, page, failrec->len, start - page_offset(page));
-       if (failed_bio->bi_rw & (1 << BIO_RW))
+       if (failed_bio->bi_rw & REQ_WRITE)
                rw = WRITE;
        else
                rw = READ;
@@ -5647,7 +5647,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
        struct bio_vec *bvec = bio->bi_io_vec;
        u64 start;
        int skip_sum;
-       int write = rw & (1 << BIO_RW);
+       int write = rw & REQ_WRITE;
        int ret = 0;
 
        skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
index d6e3af8..dd318ff 100644 (file)
@@ -258,7 +258,7 @@ loop_lock:
 
                BUG_ON(atomic_read(&cur->bi_cnt) == 0);
 
-               if (bio_rw_flagged(cur, BIO_RW_SYNCIO))
+               if (cur->bi_rw & REQ_SYNC)
                        num_sync_run++;
 
                submit_bio(cur->bi_rw, cur);
@@ -2651,7 +2651,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
        int max_errors = 0;
        struct btrfs_multi_bio *multi = NULL;
 
-       if (multi_ret && !(rw & (1 << BIO_RW)))
+       if (multi_ret && !(rw & REQ_WRITE))
                stripes_allocated = 1;
 again:
        if (multi_ret) {
@@ -2687,7 +2687,7 @@ again:
                mirror_num = 0;
 
        /* if our multi bio struct is too small, back off and try again */
-       if (rw & (1 << BIO_RW)) {
+       if (rw & REQ_WRITE) {
                if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
                                 BTRFS_BLOCK_GROUP_DUP)) {
                        stripes_required = map->num_stripes;
@@ -2697,7 +2697,7 @@ again:
                        max_errors = 1;
                }
        }
-       if (multi_ret && (rw & (1 << BIO_RW)) &&
+       if (multi_ret && (rw & REQ_WRITE) &&
            stripes_allocated < stripes_required) {
                stripes_allocated = map->num_stripes;
                free_extent_map(em);
@@ -2733,7 +2733,7 @@ again:
        num_stripes = 1;
        stripe_index = 0;
        if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
-               if (unplug_page || (rw & (1 << BIO_RW)))
+               if (unplug_page || (rw & REQ_WRITE))
                        num_stripes = map->num_stripes;
                else if (mirror_num)
                        stripe_index = mirror_num - 1;
@@ -2744,7 +2744,7 @@ again:
                }
 
        } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
-               if (rw & (1 << BIO_RW))
+               if (rw & REQ_WRITE)
                        num_stripes = map->num_stripes;
                else if (mirror_num)
                        stripe_index = mirror_num - 1;
@@ -2755,7 +2755,7 @@ again:
                stripe_index = do_div(stripe_nr, factor);
                stripe_index *= map->sub_stripes;
 
-               if (unplug_page || (rw & (1 << BIO_RW)))
+               if (unplug_page || (rw & REQ_WRITE))
                        num_stripes = map->sub_stripes;
                else if (mirror_num)
                        stripe_index += mirror_num - 1;
@@ -2945,7 +2945,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
        struct btrfs_pending_bios *pending_bios;
 
        /* don't bother with additional async steps for reads, right now */
-       if (!(rw & (1 << BIO_RW))) {
+       if (!(rw & REQ_WRITE)) {
                bio_get(bio);
                submit_bio(rw, bio);
                bio_put(bio);
@@ -2964,7 +2964,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
        bio->bi_rw |= rw;
 
        spin_lock(&device->io_lock);
-       if (bio_rw_flagged(bio, BIO_RW_SYNCIO))
+       if (bio->bi_rw & REQ_SYNC)
                pending_bios = &device->pending_sync_bios;
        else
                pending_bios = &device->pending_bios;
index 66b9cf7..de89645 100644 (file)
@@ -177,7 +177,7 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf,
                nbytes = req->uc_outSize; /* don't have more space! */
        }
         if (copy_from_user(req->uc_data, buf, nbytes)) {
-               req->uc_flags |= REQ_ABORT;
+               req->uc_flags |= CODA_REQ_ABORT;
                wake_up(&req->uc_sleep);
                retval = -EFAULT;
                goto out;
@@ -254,8 +254,8 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf,
                retval = -EFAULT;
         
        /* If request was not a signal, enqueue and don't free */
-       if (!(req->uc_flags & REQ_ASYNC)) {
-               req->uc_flags |= REQ_READ;
+       if (!(req->uc_flags & CODA_REQ_ASYNC)) {
+               req->uc_flags |= CODA_REQ_READ;
                list_add_tail(&(req->uc_chain), &vcp->vc_processing);
                goto out;
        }
@@ -315,19 +315,19 @@ static int coda_psdev_release(struct inode * inode, struct file * file)
                list_del(&req->uc_chain);
 
                /* Async requests need to be freed here */
-               if (req->uc_flags & REQ_ASYNC) {
+               if (req->uc_flags & CODA_REQ_ASYNC) {
                        CODA_FREE(req->uc_data, sizeof(struct coda_in_hdr));
                        kfree(req);
                        continue;
                }
-               req->uc_flags |= REQ_ABORT;
+               req->uc_flags |= CODA_REQ_ABORT;
                wake_up(&req->uc_sleep);
        }
 
        list_for_each_entry_safe(req, tmp, &vcp->vc_processing, uc_chain) {
                list_del(&req->uc_chain);
 
-               req->uc_flags |= REQ_ABORT;
+               req->uc_flags |= CODA_REQ_ABORT;
                wake_up(&req->uc_sleep);
        }
 
index f09c5ed..b8893ab 100644 (file)
@@ -604,7 +604,7 @@ static void coda_unblock_signals(sigset_t *old)
                               (((r)->uc_opcode != CODA_CLOSE && \
                                 (r)->uc_opcode != CODA_STORE && \
                                 (r)->uc_opcode != CODA_RELEASE) || \
-                               (r)->uc_flags & REQ_READ))
+                               (r)->uc_flags & CODA_REQ_READ))
 
 static inline void coda_waitfor_upcall(struct upc_req *req)
 {
@@ -624,7 +624,7 @@ static inline void coda_waitfor_upcall(struct upc_req *req)
                        set_current_state(TASK_UNINTERRUPTIBLE);
 
                /* got a reply */
-               if (req->uc_flags & (REQ_WRITE | REQ_ABORT))
+               if (req->uc_flags & (CODA_REQ_WRITE | CODA_REQ_ABORT))
                        break;
 
                if (blocked && time_after(jiffies, timeout) &&
@@ -708,7 +708,7 @@ static int coda_upcall(struct venus_comm *vcp,
        coda_waitfor_upcall(req);
 
        /* Op went through, interrupt or not... */
-       if (req->uc_flags & REQ_WRITE) {
+       if (req->uc_flags & CODA_REQ_WRITE) {
                out = (union outputArgs *)req->uc_data;
                /* here we map positive Venus errors to kernel errors */
                error = -out->oh.result;
@@ -717,13 +717,13 @@ static int coda_upcall(struct venus_comm *vcp,
        }
 
        error = -EINTR;
-       if ((req->uc_flags & REQ_ABORT) || !signal_pending(current)) {
+       if ((req->uc_flags & CODA_REQ_ABORT) || !signal_pending(current)) {
                printk(KERN_WARNING "coda: Unexpected interruption.\n");
                goto exit;
        }
 
        /* Interrupted before venus read it. */
-       if (!(req->uc_flags & REQ_READ))
+       if (!(req->uc_flags & CODA_REQ_READ))
                goto exit;
 
        /* Venus saw the upcall, make sure we can send interrupt signal */
@@ -747,7 +747,7 @@ static int coda_upcall(struct venus_comm *vcp,
        sig_inputArgs->ih.opcode = CODA_SIGNAL;
        sig_inputArgs->ih.unique = req->uc_unique;
 
-       sig_req->uc_flags = REQ_ASYNC;
+       sig_req->uc_flags = CODA_REQ_ASYNC;
        sig_req->uc_opcode = sig_inputArgs->ih.opcode;
        sig_req->uc_unique = sig_inputArgs->ih.unique;
        sig_req->uc_inSize = sizeof(struct coda_in_hdr);
index 4337cad..e273220 100644 (file)
@@ -599,7 +599,7 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
                        } else {
                                bio = master_dev->bio;
                                /* FIXME: bio_set_dir() */
-                               bio->bi_rw |= (1 << BIO_RW);
+                               bio->bi_rw |= REQ_WRITE;
                        }
 
                        osd_req_write(or, &ios->obj, per_dev->offset, bio,
index b7c7586..2f76c4a 100644 (file)
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
+#include <linux/tracepoint.h>
 #include "internal.h"
 
-#define inode_to_bdi(inode)    ((inode)->i_mapping->backing_dev_info)
-
-/*
- * We don't actually have pdflush, but this one is exported though /proc...
- */
-int nr_pdflush_threads;
-
 /*
  * Passed into wb_writeback(), essentially a subset of writeback_control
  */
@@ -50,6 +44,21 @@ struct wb_writeback_work {
        struct completion *done;        /* set if the caller waits */
 };
 
+/*
+ * Include the creation of the trace points after defining the
+ * wb_writeback_work structure so that the definition remains local to this
+ * file.
+ */
+#define CREATE_TRACE_POINTS
+#include <trace/events/writeback.h>
+
+#define inode_to_bdi(inode)    ((inode)->i_mapping->backing_dev_info)
+
+/*
+ * We don't actually have pdflush, but this one is exported though /proc...
+ */
+int nr_pdflush_threads;
+
 /**
  * writeback_in_progress - determine whether there is writeback in progress
  * @bdi: the device's backing_dev_info structure.
@@ -65,22 +74,21 @@ int writeback_in_progress(struct backing_dev_info *bdi)
 static void bdi_queue_work(struct backing_dev_info *bdi,
                struct wb_writeback_work *work)
 {
-       spin_lock(&bdi->wb_lock);
-       list_add_tail(&work->list, &bdi->work_list);
-       spin_unlock(&bdi->wb_lock);
+       trace_writeback_queue(bdi, work);
 
-       /*
-        * If the default thread isn't there, make sure we add it. When
-        * it gets created and wakes up, we'll run this work.
-        */
-       if (unlikely(list_empty_careful(&bdi->wb_list)))
+       spin_lock_bh(&bdi->wb_lock);
+       list_add_tail(&work->list, &bdi->work_list);
+       if (bdi->wb.task) {
+               wake_up_process(bdi->wb.task);
+       } else {
+               /*
+                * The bdi thread isn't there, wake up the forker thread which
+                * will create and run it.
+                */
+               trace_writeback_nothread(bdi, work);
                wake_up_process(default_backing_dev_info.wb.task);
-       else {
-               struct bdi_writeback *wb = &bdi->wb;
-
-               if (wb->task)
-                       wake_up_process(wb->task);
        }
+       spin_unlock_bh(&bdi->wb_lock);
 }
 
 static void
@@ -95,8 +103,10 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
         */
        work = kzalloc(sizeof(*work), GFP_ATOMIC);
        if (!work) {
-               if (bdi->wb.task)
+               if (bdi->wb.task) {
+                       trace_writeback_nowork(bdi);
                        wake_up_process(bdi->wb.task);
+               }
                return;
        }
 
@@ -643,10 +653,14 @@ static long wb_writeback(struct bdi_writeback *wb,
                wbc.more_io = 0;
                wbc.nr_to_write = MAX_WRITEBACK_PAGES;
                wbc.pages_skipped = 0;
+
+               trace_wbc_writeback_start(&wbc, wb->bdi);
                if (work->sb)
                        __writeback_inodes_sb(work->sb, wb, &wbc);
                else
                        writeback_inodes_wb(wb, &wbc);
+               trace_wbc_writeback_written(&wbc, wb->bdi);
+
                work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
                wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
 
@@ -674,6 +688,7 @@ static long wb_writeback(struct bdi_writeback *wb,
                if (!list_empty(&wb->b_more_io))  {
                        inode = list_entry(wb->b_more_io.prev,
                                                struct inode, i_list);
+                       trace_wbc_writeback_wait(&wbc, wb->bdi);
                        inode_wait_for_writeback(inode);
                }
                spin_unlock(&inode_lock);
@@ -686,17 +701,17 @@ static long wb_writeback(struct bdi_writeback *wb,
  * Return the next wb_writeback_work struct that hasn't been processed yet.
  */
 static struct wb_writeback_work *
-get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb)
+get_next_work_item(struct backing_dev_info *bdi)
 {
        struct wb_writeback_work *work = NULL;
 
-       spin_lock(&bdi->wb_lock);
+       spin_lock_bh(&bdi->wb_lock);
        if (!list_empty(&bdi->work_list)) {
                work = list_entry(bdi->work_list.next,
                                  struct wb_writeback_work, list);
                list_del_init(&work->list);
        }
-       spin_unlock(&bdi->wb_lock);
+       spin_unlock_bh(&bdi->wb_lock);
        return work;
 }
 
@@ -744,7 +759,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
        struct wb_writeback_work *work;
        long wrote = 0;
 
-       while ((work = get_next_work_item(bdi, wb)) != NULL) {
+       while ((work = get_next_work_item(bdi)) != NULL) {
                /*
                 * Override sync mode, in case we must wait for completion
                 * because this thread is exiting now.
@@ -752,6 +767,8 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
                if (force_wait)
                        work->sync_mode = WB_SYNC_ALL;
 
+               trace_writeback_exec(bdi, work);
+
                wrote += wb_writeback(wb, work);
 
                /*
@@ -776,47 +793,66 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
  * Handle writeback of dirty data for the device backed by this bdi. Also
  * wakes up periodically and does kupdated style flushing.
  */
-int bdi_writeback_task(struct bdi_writeback *wb)
+int bdi_writeback_thread(void *data)
 {
-       unsigned long last_active = jiffies;
-       unsigned long wait_jiffies = -1UL;
+       struct bdi_writeback *wb = data;
+       struct backing_dev_info *bdi = wb->bdi;
        long pages_written;
 
+       current->flags |= PF_FLUSHER | PF_SWAPWRITE;
+       set_freezable();
+       wb->last_active = jiffies;
+
+       /*
+        * Our parent may run at a different priority, just set us to normal
+        */
+       set_user_nice(current, 0);
+
+       trace_writeback_thread_start(bdi);
+
        while (!kthread_should_stop()) {
+               /*
+                * Remove own delayed wake-up timer, since we are already awake
+                * and we'll take care of the preriodic write-back.
+                */
+               del_timer(&wb->wakeup_timer);
+
                pages_written = wb_do_writeback(wb, 0);
 
+               trace_writeback_pages_written(pages_written);
+
                if (pages_written)
-                       last_active = jiffies;
-               else if (wait_jiffies != -1UL) {
-                       unsigned long max_idle;
+                       wb->last_active = jiffies;
 
-                       /*
-                        * Longest period of inactivity that we tolerate. If we
-                        * see dirty data again later, the task will get
-                        * recreated automatically.
-                        */
-                       max_idle = max(5UL * 60 * HZ, wait_jiffies);
-                       if (time_after(jiffies, max_idle + last_active))
-                               break;
+               set_current_state(TASK_INTERRUPTIBLE);
+               if (!list_empty(&bdi->work_list)) {
+                       __set_current_state(TASK_RUNNING);
+                       continue;
                }
 
-               if (dirty_writeback_interval) {
-                       wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
-                       schedule_timeout_interruptible(wait_jiffies);
-               } else {
-                       set_current_state(TASK_INTERRUPTIBLE);
-                       if (list_empty_careful(&wb->bdi->work_list) &&
-                           !kthread_should_stop())
-                               schedule();
-                       __set_current_state(TASK_RUNNING);
+               if (wb_has_dirty_io(wb) && dirty_writeback_interval)
+                       schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
+               else {
+                       /*
+                        * We have nothing to do, so can go sleep without any
+                        * timeout and save power. When a work is queued or
+                        * something is made dirty - we will be woken up.
+                        */
+                       schedule();
                }
 
                try_to_freeze();
        }
 
+       /* Flush any work that raced with us exiting */
+       if (!list_empty(&bdi->work_list))
+               wb_do_writeback(wb, 1);
+
+       trace_writeback_thread_stop(bdi);
        return 0;
 }
 
+
 /*
  * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
  * the whole world.
@@ -891,6 +927,8 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
 void __mark_inode_dirty(struct inode *inode, int flags)
 {
        struct super_block *sb = inode->i_sb;
+       struct backing_dev_info *bdi = NULL;
+       bool wakeup_bdi = false;
 
        /*
         * Don't do this for I_DIRTY_PAGES - that doesn't actually
@@ -944,22 +982,31 @@ void __mark_inode_dirty(struct inode *inode, int flags)
                 * reposition it (that would break b_dirty time-ordering).
                 */
                if (!was_dirty) {
-                       struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
-                       struct backing_dev_info *bdi = wb->bdi;
-
-                       if (bdi_cap_writeback_dirty(bdi) &&
-                           !test_bit(BDI_registered, &bdi->state)) {
-                               WARN_ON(1);
-                               printk(KERN_ERR "bdi-%s not registered\n",
-                                                               bdi->name);
+                       bdi = inode_to_bdi(inode);
+
+                       if (bdi_cap_writeback_dirty(bdi)) {
+                               WARN(!test_bit(BDI_registered, &bdi->state),
+                                    "bdi-%s not registered\n", bdi->name);
+
+                               /*
+                                * If this is the first dirty inode for this
+                                * bdi, we have to wake-up the corresponding
+                                * bdi thread to make sure background
+                                * write-back happens later.
+                                */
+                               if (!wb_has_dirty_io(&bdi->wb))
+                                       wakeup_bdi = true;
                        }
 
                        inode->dirtied_when = jiffies;
-                       list_move(&inode->i_list, &wb->b_dirty);
+                       list_move(&inode->i_list, &bdi->wb.b_dirty);
                }
        }
 out:
        spin_unlock(&inode_lock);
+
+       if (wakeup_bdi)
+               bdi_wakeup_thread_delayed(bdi);
 }
 EXPORT_SYMBOL(__mark_inode_dirty);
 
index 6a857e2..cde1248 100644 (file)
@@ -595,7 +595,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
        if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
                goto skip_barrier;
        get_bh(bh);
-       submit_bh(WRITE_SYNC | (1 << BIO_RW_BARRIER) | (1 << BIO_RW_META), bh);
+       submit_bh(WRITE_BARRIER | REQ_META, bh);
        wait_on_buffer(bh);
        if (buffer_eopnotsupp(bh)) {
                clear_buffer_eopnotsupp(bh);
@@ -605,7 +605,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
                lock_buffer(bh);
 skip_barrier:
                get_bh(bh);
-               submit_bh(WRITE_SYNC | (1 << BIO_RW_META), bh);
+               submit_bh(WRITE_SYNC | REQ_META, bh);
                wait_on_buffer(bh);
        }
        if (!buffer_uptodate(bh))
index 18176d0..f3b071f 100644 (file)
@@ -36,8 +36,8 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
 {
        struct buffer_head *bh, *head;
        int nr_underway = 0;
-       int write_op = (1 << BIO_RW_META) | ((wbc->sync_mode == WB_SYNC_ALL ?
-                       WRITE_SYNC_PLUG : WRITE));
+       int write_op = REQ_META |
+               (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC_PLUG : WRITE);
 
        BUG_ON(!PageLocked(page));
        BUG_ON(!page_has_buffers(page));
@@ -225,7 +225,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
        }
        bh->b_end_io = end_buffer_read_sync;
        get_bh(bh);
-       submit_bh(READ_SYNC | (1 << BIO_RW_META), bh);
+       submit_bh(READ_SYNC | REQ_META, bh);
        if (!(flags & DIO_WAIT))
                return 0;
 
@@ -432,7 +432,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
        if (buffer_uptodate(first_bh))
                goto out;
        if (!buffer_locked(first_bh))
-               ll_rw_block(READ_SYNC | (1 << BIO_RW_META), 1, &first_bh);
+               ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh);
 
        dblock++;
        extlen--;
index 4f44bde..4d4b1e8 100644 (file)
@@ -274,7 +274,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
 
        bio->bi_end_io = end_bio_io_page;
        bio->bi_private = page;
-       submit_bio(READ_SYNC | (1 << BIO_RW_META), bio);
+       submit_bio(READ_SYNC | REQ_META, bio);
        wait_on_page_locked(page);
        bio_put(bio);
        if (!PageUptodate(page)) {
index 2e6a272..4588fb9 100644 (file)
@@ -508,7 +508,7 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
                 * Last BIO is always sent through the following
                 * submission.
                 */
-               rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
+               rw |= REQ_SYNC | REQ_UNPLUG;
                res = nilfs_segbuf_submit_bio(segbuf, &wi, rw);
        }
 
index efdbfec..8f1dfae 100644 (file)
@@ -399,17 +399,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
                 * If the page isn't uptodate, we may need to start io on it
                 */
                if (!PageUptodate(page)) {
-                       /*
-                        * If in nonblock mode then dont block on waiting
-                        * for an in-flight io page
-                        */
-                       if (flags & SPLICE_F_NONBLOCK) {
-                               if (!trylock_page(page)) {
-                                       error = -EAGAIN;
-                                       break;
-                               }
-                       } else
-                               lock_page(page);
+                       lock_page(page);
 
                        /*
                         * Page was truncated, or invalidated by the
@@ -597,7 +587,6 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
        struct page *pages[PIPE_DEF_BUFFERS];
        struct partial_page partial[PIPE_DEF_BUFFERS];
        struct iovec *vec, __vec[PIPE_DEF_BUFFERS];
-       pgoff_t index;
        ssize_t res;
        size_t this_len;
        int error;
@@ -621,7 +610,6 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
                        goto shrink_ret;
        }
 
-       index = *ppos >> PAGE_CACHE_SHIFT;
        offset = *ppos & ~PAGE_CACHE_MASK;
        nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
index 2547daf..9d65d4d 100644 (file)
@@ -39,6 +39,7 @@ header-y += ax25.h
 header-y += b1lli.h
 header-y += baycom.h
 header-y += bfs_fs.h
+header-y += blk_types.h
 header-y += blkpg.h
 header-y += bpqether.h
 header-y += bsg.h
index f391d45..e24afab 100644 (file)
@@ -544,7 +544,7 @@ extern int audit_signals;
 #define audit_putname(n) do { ; } while (0)
 #define __audit_inode(n,d) do { ; } while (0)
 #define __audit_inode_child(i,p) do { ; } while (0)
-#define audit_inode(n,d) do { ; } while (0)
+#define audit_inode(n,d) do { (void)(d); } while (0)
 #define audit_inode_child(i,p) do { ; } while (0)
 #define audit_core_dumps(i) do { ; } while (0)
 #define auditsc_get_stamp(c,t,s) (0)
index e9aec0d..7628219 100644 (file)
@@ -45,22 +45,21 @@ enum bdi_stat_item {
 #define BDI_STAT_BATCH (8*(1+ilog2(nr_cpu_ids)))
 
 struct bdi_writeback {
-       struct list_head list;                  /* hangs off the bdi */
-
-       struct backing_dev_info *bdi;           /* our parent bdi */
+       struct backing_dev_info *bdi;   /* our parent bdi */
        unsigned int nr;
 
-       unsigned long last_old_flush;           /* last old data flush */
+       unsigned long last_old_flush;   /* last old data flush */
+       unsigned long last_active;      /* last time bdi thread was active */
 
-       struct task_struct      *task;          /* writeback task */
-       struct list_head        b_dirty;        /* dirty inodes */
-       struct list_head        b_io;           /* parked for writeback */
-       struct list_head        b_more_io;      /* parked for more writeback */
+       struct task_struct *task;       /* writeback thread */
+       struct timer_list wakeup_timer; /* used for delayed bdi thread wakeup */
+       struct list_head b_dirty;       /* dirty inodes */
+       struct list_head b_io;          /* parked for writeback */
+       struct list_head b_more_io;     /* parked for more writeback */
 };
 
 struct backing_dev_info {
        struct list_head bdi_list;
-       struct rcu_head rcu_head;
        unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */
        unsigned long state;    /* Always use atomic bitops on this */
        unsigned int capabilities; /* Device capabilities */
@@ -80,8 +79,7 @@ struct backing_dev_info {
        unsigned int max_ratio, max_prop_frac;
 
        struct bdi_writeback wb;  /* default writeback info for this bdi */
-       spinlock_t wb_lock;       /* protects update side of wb_list */
-       struct list_head wb_list; /* the flusher threads hanging off this bdi */
+       spinlock_t wb_lock;       /* protects work_list */
 
        struct list_head work_list;
 
@@ -105,9 +103,10 @@ void bdi_unregister(struct backing_dev_info *bdi);
 int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
 void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages);
 void bdi_start_background_writeback(struct backing_dev_info *bdi);
-int bdi_writeback_task(struct bdi_writeback *wb);
+int bdi_writeback_thread(void *data);
 int bdi_has_dirty_io(struct backing_dev_info *bdi);
 void bdi_arm_supers_timer(void);
+void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi);
 
 extern spinlock_t bdi_lock;
 extern struct list_head bdi_list;
index 7fc5606..5274103 100644 (file)
@@ -9,7 +9,7 @@
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
-
+ *
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
@@ -28,6 +28,9 @@
 
 #include <asm/io.h>
 
+/* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */
+#include <linux/blk_types.h>
+
 #define BIO_DEBUG
 
 #ifdef BIO_DEBUG
 #define BIO_MAX_SIZE           (BIO_MAX_PAGES << PAGE_CACHE_SHIFT)
 #define BIO_MAX_SECTORS                (BIO_MAX_SIZE >> 9)
 
-/*
- * was unsigned short, but we might as well be ready for > 64kB I/O pages
- */
-struct bio_vec {
-       struct page     *bv_page;
-       unsigned int    bv_len;
-       unsigned int    bv_offset;
-};
-
-struct bio_set;
-struct bio;
-struct bio_integrity_payload;
-typedef void (bio_end_io_t) (struct bio *, int);
-typedef void (bio_destructor_t) (struct bio *);
-
-/*
- * main unit of I/O for the block layer and lower layers (ie drivers and
- * stacking drivers)
- */
-struct bio {
-       sector_t                bi_sector;      /* device address in 512 byte
-                                                  sectors */
-       struct bio              *bi_next;       /* request queue link */
-       struct block_device     *bi_bdev;
-       unsigned long           bi_flags;       /* status, command, etc */
-       unsigned long           bi_rw;          /* bottom bits READ/WRITE,
-                                                * top bits priority
-                                                */
-
-       unsigned short          bi_vcnt;        /* how many bio_vec's */
-       unsigned short          bi_idx;         /* current index into bvl_vec */
-
-       /* Number of segments in this BIO after
-        * physical address coalescing is performed.
-        */
-       unsigned int            bi_phys_segments;
-
-       unsigned int            bi_size;        /* residual I/O count */
-
-       /*
-        * To keep track of the max segment size, we account for the
-        * sizes of the first and last mergeable segments in this bio.
-        */
-       unsigned int            bi_seg_front_size;
-       unsigned int            bi_seg_back_size;
-
-       unsigned int            bi_max_vecs;    /* max bvl_vecs we can hold */
-
-       unsigned int            bi_comp_cpu;    /* completion CPU */
-
-       atomic_t                bi_cnt;         /* pin count */
-
-       struct bio_vec          *bi_io_vec;     /* the actual vec list */
-
-       bio_end_io_t            *bi_end_io;
-
-       void                    *bi_private;
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
-       struct bio_integrity_payload *bi_integrity;  /* data integrity */
-#endif
-
-       bio_destructor_t        *bi_destructor; /* destructor */
-
-       /*
-        * We can inline a number of vecs at the end of the bio, to avoid
-        * double allocations for a small number of bio_vecs. This member
-        * MUST obviously be kept at the very end of the bio.
-        */
-       struct bio_vec          bi_inline_vecs[0];
-};
-
-/*
- * bio flags
- */
-#define BIO_UPTODATE   0       /* ok after I/O completion */
-#define BIO_RW_BLOCK   1       /* RW_AHEAD set, and read/write would block */
-#define BIO_EOF                2       /* out-out-bounds error */
-#define BIO_SEG_VALID  3       /* bi_phys_segments valid */
-#define BIO_CLONED     4       /* doesn't own data */
-#define BIO_BOUNCED    5       /* bio is a bounce bio */
-#define BIO_USER_MAPPED 6      /* contains user pages */
-#define BIO_EOPNOTSUPP 7       /* not supported */
-#define BIO_CPU_AFFINE 8       /* complete bio on same CPU as submitted */
-#define BIO_NULL_MAPPED 9      /* contains invalid user pages */
-#define BIO_FS_INTEGRITY 10    /* fs owns integrity data, not block layer */
-#define BIO_QUIET      11      /* Make BIO Quiet */
-#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
-
-/*
- * top 4 bits of bio flags indicate the pool this bio came from
- */
-#define BIO_POOL_BITS          (4)
-#define BIO_POOL_NONE          ((1UL << BIO_POOL_BITS) - 1)
-#define BIO_POOL_OFFSET                (BITS_PER_LONG - BIO_POOL_BITS)
-#define BIO_POOL_MASK          (1UL << BIO_POOL_OFFSET)
-#define BIO_POOL_IDX(bio)      ((bio)->bi_flags >> BIO_POOL_OFFSET)    
-
-/*
- * bio bi_rw flags
- *
- * bit 0 -- data direction
- *     If not set, bio is a read from device. If set, it's a write to device.
- * bit 1 -- fail fast device errors
- * bit 2 -- fail fast transport errors
- * bit 3 -- fail fast driver errors
- * bit 4 -- rw-ahead when set
- * bit 5 -- barrier
- *     Insert a serialization point in the IO queue, forcing previously
- *     submitted IO to be completed before this one is issued.
- * bit 6 -- synchronous I/O hint.
- * bit 7 -- Unplug the device immediately after submitting this bio.
- * bit 8 -- metadata request
- *     Used for tracing to differentiate metadata and data IO. May also
- *     get some preferential treatment in the IO scheduler
- * bit 9 -- discard sectors
- *     Informs the lower level device that this range of sectors is no longer
- *     used by the file system and may thus be freed by the device. Used
- *     for flash based storage.
- *     Don't want driver retries for any fast fail whatever the reason.
- * bit 10 -- Tell the IO scheduler not to wait for more requests after this
-       one has been submitted, even if it is a SYNC request.
- */
-enum bio_rw_flags {
-       BIO_RW,
-       BIO_RW_FAILFAST_DEV,
-       BIO_RW_FAILFAST_TRANSPORT,
-       BIO_RW_FAILFAST_DRIVER,
-       /* above flags must match REQ_* */
-       BIO_RW_AHEAD,
-       BIO_RW_BARRIER,
-       BIO_RW_SYNCIO,
-       BIO_RW_UNPLUG,
-       BIO_RW_META,
-       BIO_RW_DISCARD,
-       BIO_RW_NOIDLE,
-};
-
-/*
- * First four bits must match between bio->bi_rw and rq->cmd_flags, make
- * that explicit here.
- */
-#define BIO_RW_RQ_MASK         0xf
-
-static inline bool bio_rw_flagged(struct bio *bio, enum bio_rw_flags flag)
-{
-       return (bio->bi_rw & (1 << flag)) != 0;
-}
-
 /*
  * upper 16 bits of bi_rw define the io priority of this bio
  */
@@ -211,7 +66,10 @@ static inline bool bio_rw_flagged(struct bio *bio, enum bio_rw_flags flag)
 #define bio_offset(bio)                bio_iovec((bio))->bv_offset
 #define bio_segments(bio)      ((bio)->bi_vcnt - (bio)->bi_idx)
 #define bio_sectors(bio)       ((bio)->bi_size >> 9)
-#define bio_empty_barrier(bio) (bio_rw_flagged(bio, BIO_RW_BARRIER) && !bio_has_data(bio) && !bio_rw_flagged(bio, BIO_RW_DISCARD))
+#define bio_empty_barrier(bio) \
+       ((bio->bi_rw & REQ_HARDBARRIER) && \
+        !bio_has_data(bio) && \
+        !(bio->bi_rw & REQ_DISCARD))
 
 static inline unsigned int bio_cur_bytes(struct bio *bio)
 {
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
new file mode 100644 (file)
index 0000000..5369177
--- /dev/null
@@ -0,0 +1,194 @@
+/*
+ * Block data types and constants.  Directly include this file only to
+ * break include dependency loop.
+ */
+#ifndef __LINUX_BLK_TYPES_H
+#define __LINUX_BLK_TYPES_H
+
+#ifdef CONFIG_BLOCK
+
+#include <linux/types.h>
+
+struct bio_set;
+struct bio;
+struct bio_integrity_payload;
+struct page;
+struct block_device;
+typedef void (bio_end_io_t) (struct bio *, int);
+typedef void (bio_destructor_t) (struct bio *);
+
+/*
+ * was unsigned short, but we might as well be ready for > 64kB I/O pages
+ */
+struct bio_vec {
+       struct page     *bv_page;
+       unsigned int    bv_len;
+       unsigned int    bv_offset;
+};
+
+/*
+ * main unit of I/O for the block layer and lower layers (ie drivers and
+ * stacking drivers)
+ */
+struct bio {
+       sector_t                bi_sector;      /* device address in 512 byte
+                                                  sectors */
+       struct bio              *bi_next;       /* request queue link */
+       struct block_device     *bi_bdev;
+       unsigned long           bi_flags;       /* status, command, etc */
+       unsigned long           bi_rw;          /* bottom bits READ/WRITE,
+                                                * top bits priority
+                                                */
+
+       unsigned short          bi_vcnt;        /* how many bio_vec's */
+       unsigned short          bi_idx;         /* current index into bvl_vec */
+
+       /* Number of segments in this BIO after
+        * physical address coalescing is performed.
+        */
+       unsigned int            bi_phys_segments;
+
+       unsigned int            bi_size;        /* residual I/O count */
+
+       /*
+        * To keep track of the max segment size, we account for the
+        * sizes of the first and last mergeable segments in this bio.
+        */
+       unsigned int            bi_seg_front_size;
+       unsigned int            bi_seg_back_size;
+
+       unsigned int            bi_max_vecs;    /* max bvl_vecs we can hold */
+
+       unsigned int            bi_comp_cpu;    /* completion CPU */
+
+       atomic_t                bi_cnt;         /* pin count */
+
+       struct bio_vec          *bi_io_vec;     /* the actual vec list */
+
+       bio_end_io_t            *bi_end_io;
+
+       void                    *bi_private;
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+       struct bio_integrity_payload *bi_integrity;  /* data integrity */
+#endif
+
+       bio_destructor_t        *bi_destructor; /* destructor */
+
+       /*
+        * We can inline a number of vecs at the end of the bio, to avoid
+        * double allocations for a small number of bio_vecs. This member
+        * MUST obviously be kept at the very end of the bio.
+        */
+       struct bio_vec          bi_inline_vecs[0];
+};
+
+/*
+ * bio flags
+ */
+#define BIO_UPTODATE   0       /* ok after I/O completion */
+#define BIO_RW_BLOCK   1       /* RW_AHEAD set, and read/write would block */
+#define BIO_EOF                2       /* out-out-bounds error */
+#define BIO_SEG_VALID  3       /* bi_phys_segments valid */
+#define BIO_CLONED     4       /* doesn't own data */
+#define BIO_BOUNCED    5       /* bio is a bounce bio */
+#define BIO_USER_MAPPED 6      /* contains user pages */
+#define BIO_EOPNOTSUPP 7       /* not supported */
+#define BIO_CPU_AFFINE 8       /* complete bio on same CPU as submitted */
+#define BIO_NULL_MAPPED 9      /* contains invalid user pages */
+#define BIO_FS_INTEGRITY 10    /* fs owns integrity data, not block layer */
+#define BIO_QUIET      11      /* Make BIO Quiet */
+#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
+
+/*
+ * top 4 bits of bio flags indicate the pool this bio came from
+ */
+#define BIO_POOL_BITS          (4)
+#define BIO_POOL_NONE          ((1UL << BIO_POOL_BITS) - 1)
+#define BIO_POOL_OFFSET                (BITS_PER_LONG - BIO_POOL_BITS)
+#define BIO_POOL_MASK          (1UL << BIO_POOL_OFFSET)
+#define BIO_POOL_IDX(bio)      ((bio)->bi_flags >> BIO_POOL_OFFSET)
+
+#endif /* CONFIG_BLOCK */
+
+/*
+ * Request flags.  For use in the cmd_flags field of struct request, and in
+ * bi_rw of struct bio.  Note that some flags are only valid in either one.
+ */
+enum rq_flag_bits {
+       /* common flags */
+       __REQ_WRITE,            /* not set, read. set, write */
+       __REQ_FAILFAST_DEV,     /* no driver retries of device errors */
+       __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
+       __REQ_FAILFAST_DRIVER,  /* no driver retries of driver errors */
+
+       __REQ_HARDBARRIER,      /* may not be passed by drive either */
+       __REQ_SYNC,             /* request is sync (sync write or read) */
+       __REQ_META,             /* metadata io request */
+       __REQ_DISCARD,          /* request to discard sectors */
+       __REQ_NOIDLE,           /* don't anticipate more IO after this one */
+
+       /* bio only flags */
+       __REQ_UNPLUG,           /* unplug the immediately after submission */
+       __REQ_RAHEAD,           /* read ahead, can fail anytime */
+
+       /* request only flags */
+       __REQ_SORTED,           /* elevator knows about this request */
+       __REQ_SOFTBARRIER,      /* may not be passed by ioscheduler */
+       __REQ_FUA,              /* forced unit access */
+       __REQ_NOMERGE,          /* don't touch this for merging */
+       __REQ_STARTED,          /* drive already may have started this one */
+       __REQ_DONTPREP,         /* don't call prep for this one */
+       __REQ_QUEUED,           /* uses queueing */
+       __REQ_ELVPRIV,          /* elevator private data attached */
+       __REQ_FAILED,           /* set if the request failed */
+       __REQ_QUIET,            /* don't worry about errors */
+       __REQ_PREEMPT,          /* set for "ide_preempt" requests */
+       __REQ_ORDERED_COLOR,    /* is before or after barrier */
+       __REQ_ALLOCED,          /* request came from our alloc pool */
+       __REQ_COPY_USER,        /* contains copies of user pages */
+       __REQ_INTEGRITY,        /* integrity metadata has been remapped */
+       __REQ_FLUSH,            /* request for cache flush */
+       __REQ_IO_STAT,          /* account I/O stat */
+       __REQ_MIXED_MERGE,      /* merge of different types, fail separately */
+       __REQ_NR_BITS,          /* stops here */
+};
+
+#define REQ_WRITE              (1 << __REQ_WRITE)
+#define REQ_FAILFAST_DEV       (1 << __REQ_FAILFAST_DEV)
+#define REQ_FAILFAST_TRANSPORT (1 << __REQ_FAILFAST_TRANSPORT)
+#define REQ_FAILFAST_DRIVER    (1 << __REQ_FAILFAST_DRIVER)
+#define REQ_HARDBARRIER                (1 << __REQ_HARDBARRIER)
+#define REQ_SYNC               (1 << __REQ_SYNC)
+#define REQ_META               (1 << __REQ_META)
+#define REQ_DISCARD            (1 << __REQ_DISCARD)
+#define REQ_NOIDLE             (1 << __REQ_NOIDLE)
+
+#define REQ_FAILFAST_MASK \
+       (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
+#define REQ_COMMON_MASK \
+       (REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \
+        REQ_META| REQ_DISCARD | REQ_NOIDLE)
+
+#define REQ_UNPLUG             (1 << __REQ_UNPLUG)
+#define REQ_RAHEAD             (1 << __REQ_RAHEAD)
+
+#define REQ_SORTED             (1 << __REQ_SORTED)
+#define REQ_SOFTBARRIER                (1 << __REQ_SOFTBARRIER)
+#define REQ_FUA                        (1 << __REQ_FUA)
+#define REQ_NOMERGE            (1 << __REQ_NOMERGE)
+#define REQ_STARTED            (1 << __REQ_STARTED)
+#define REQ_DONTPREP           (1 << __REQ_DONTPREP)
+#define REQ_QUEUED             (1 << __REQ_QUEUED)
+#define REQ_ELVPRIV            (1 << __REQ_ELVPRIV)
+#define REQ_FAILED             (1 << __REQ_FAILED)
+#define REQ_QUIET              (1 << __REQ_QUIET)
+#define REQ_PREEMPT            (1 << __REQ_PREEMPT)
+#define REQ_ORDERED_COLOR      (1 << __REQ_ORDERED_COLOR)
+#define REQ_ALLOCED            (1 << __REQ_ALLOCED)
+#define REQ_COPY_USER          (1 << __REQ_COPY_USER)
+#define REQ_INTEGRITY          (1 << __REQ_INTEGRITY)
+#define REQ_FLUSH              (1 << __REQ_FLUSH)
+#define REQ_IO_STAT            (1 << __REQ_IO_STAT)
+#define REQ_MIXED_MERGE                (1 << __REQ_MIXED_MERGE)
+
+#endif /* __LINUX_BLK_TYPES_H */
index 09a8402..89c855c 100644 (file)
@@ -60,7 +60,6 @@ enum rq_cmd_type_bits {
        REQ_TYPE_PM_RESUME,             /* resume request */
        REQ_TYPE_PM_SHUTDOWN,           /* shutdown request */
        REQ_TYPE_SPECIAL,               /* driver defined type */
-       REQ_TYPE_LINUX_BLOCK,           /* generic block layer message */
        /*
         * for ATA/ATAPI devices. this really doesn't belong here, ide should
         * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver
@@ -70,84 +69,6 @@ enum rq_cmd_type_bits {
        REQ_TYPE_ATA_PC,
 };
 
-/*
- * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being
- * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a
- * SCSI cdb.
- *
- * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need,
- * typically to differentiate REQ_TYPE_SPECIAL requests.
- *
- */
-enum {
-       REQ_LB_OP_EJECT = 0x40,         /* eject request */
-       REQ_LB_OP_FLUSH = 0x41,         /* flush request */
-};
-
-/*
- * request type modified bits. first four bits match BIO_RW* bits, important
- */
-enum rq_flag_bits {
-       __REQ_RW,               /* not set, read. set, write */
-       __REQ_FAILFAST_DEV,     /* no driver retries of device errors */
-       __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
-       __REQ_FAILFAST_DRIVER,  /* no driver retries of driver errors */
-       /* above flags must match BIO_RW_* */
-       __REQ_DISCARD,          /* request to discard sectors */
-       __REQ_SORTED,           /* elevator knows about this request */
-       __REQ_SOFTBARRIER,      /* may not be passed by ioscheduler */
-       __REQ_HARDBARRIER,      /* may not be passed by drive either */
-       __REQ_FUA,              /* forced unit access */
-       __REQ_NOMERGE,          /* don't touch this for merging */
-       __REQ_STARTED,          /* drive already may have started this one */
-       __REQ_DONTPREP,         /* don't call prep for this one */
-       __REQ_QUEUED,           /* uses queueing */
-       __REQ_ELVPRIV,          /* elevator private data attached */
-       __REQ_FAILED,           /* set if the request failed */
-       __REQ_QUIET,            /* don't worry about errors */
-       __REQ_PREEMPT,          /* set for "ide_preempt" requests */
-       __REQ_ORDERED_COLOR,    /* is before or after barrier */
-       __REQ_RW_SYNC,          /* request is sync (sync write or read) */
-       __REQ_ALLOCED,          /* request came from our alloc pool */
-       __REQ_RW_META,          /* metadata io request */
-       __REQ_COPY_USER,        /* contains copies of user pages */
-       __REQ_INTEGRITY,        /* integrity metadata has been remapped */
-       __REQ_NOIDLE,           /* Don't anticipate more IO after this one */
-       __REQ_IO_STAT,          /* account I/O stat */
-       __REQ_MIXED_MERGE,      /* merge of different types, fail separately */
-       __REQ_NR_BITS,          /* stops here */
-};
-
-#define REQ_RW         (1 << __REQ_RW)
-#define REQ_FAILFAST_DEV       (1 << __REQ_FAILFAST_DEV)
-#define REQ_FAILFAST_TRANSPORT (1 << __REQ_FAILFAST_TRANSPORT)
-#define REQ_FAILFAST_DRIVER    (1 << __REQ_FAILFAST_DRIVER)
-#define REQ_DISCARD    (1 << __REQ_DISCARD)
-#define REQ_SORTED     (1 << __REQ_SORTED)
-#define REQ_SOFTBARRIER        (1 << __REQ_SOFTBARRIER)
-#define REQ_HARDBARRIER        (1 << __REQ_HARDBARRIER)
-#define REQ_FUA                (1 << __REQ_FUA)
-#define REQ_NOMERGE    (1 << __REQ_NOMERGE)
-#define REQ_STARTED    (1 << __REQ_STARTED)
-#define REQ_DONTPREP   (1 << __REQ_DONTPREP)
-#define REQ_QUEUED     (1 << __REQ_QUEUED)
-#define REQ_ELVPRIV    (1 << __REQ_ELVPRIV)
-#define REQ_FAILED     (1 << __REQ_FAILED)
-#define REQ_QUIET      (1 << __REQ_QUIET)
-#define REQ_PREEMPT    (1 << __REQ_PREEMPT)
-#define REQ_ORDERED_COLOR      (1 << __REQ_ORDERED_COLOR)
-#define REQ_RW_SYNC    (1 << __REQ_RW_SYNC)
-#define REQ_ALLOCED    (1 << __REQ_ALLOCED)
-#define REQ_RW_META    (1 << __REQ_RW_META)
-#define REQ_COPY_USER  (1 << __REQ_COPY_USER)
-#define REQ_INTEGRITY  (1 << __REQ_INTEGRITY)
-#define REQ_NOIDLE     (1 << __REQ_NOIDLE)
-#define REQ_IO_STAT    (1 << __REQ_IO_STAT)
-#define REQ_MIXED_MERGE        (1 << __REQ_MIXED_MERGE)
-
-#define REQ_FAILFAST_MASK      (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | \
-                                REQ_FAILFAST_DRIVER)
-
 #define BLK_MAX_CDB    16
 
 /*
@@ -264,6 +185,7 @@ struct request_pm_state
 typedef void (request_fn_proc) (struct request_queue *q);
 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
+typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
 typedef void (unplug_fn) (struct request_queue *);
 
 struct bio_vec;
@@ -275,7 +197,6 @@ struct bvec_merge_data {
 };
 typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *,
                             struct bio_vec *);
-typedef void (prepare_flush_fn) (struct request_queue *, struct request *);
 typedef void (softirq_done_fn)(struct request *);
 typedef int (dma_drain_needed_fn)(struct request *);
 typedef int (lld_busy_fn) (struct request_queue *q);
@@ -346,9 +267,9 @@ struct request_queue
        request_fn_proc         *request_fn;
        make_request_fn         *make_request_fn;
        prep_rq_fn              *prep_rq_fn;
+       unprep_rq_fn            *unprep_rq_fn;
        unplug_fn               *unplug_fn;
        merge_bvec_fn           *merge_bvec_fn;
-       prepare_flush_fn        *prepare_flush_fn;
        softirq_done_fn         *softirq_done_fn;
        rq_timed_out_fn         *rq_timed_out_fn;
        dma_drain_needed_fn     *dma_drain_needed;
@@ -467,11 +388,13 @@ struct request_queue
 #define QUEUE_FLAG_IO_STAT     15      /* do IO stats */
 #define QUEUE_FLAG_DISCARD     16      /* supports DISCARD */
 #define QUEUE_FLAG_NOXMERGES   17      /* No extended merges */
+#define QUEUE_FLAG_ADD_RANDOM  18      /* Contributes to random pool */
 
 #define QUEUE_FLAG_DEFAULT     ((1 << QUEUE_FLAG_IO_STAT) |            \
                                 (1 << QUEUE_FLAG_CLUSTER) |            \
                                 (1 << QUEUE_FLAG_STACKABLE)    |       \
-                                (1 << QUEUE_FLAG_SAME_COMP))
+                                (1 << QUEUE_FLAG_SAME_COMP)    |       \
+                                (1 << QUEUE_FLAG_ADD_RANDOM))
 
 static inline int queue_is_locked(struct request_queue *q)
 {
@@ -596,38 +519,26 @@ enum {
        test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
 #define blk_queue_nonrot(q)    test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
 #define blk_queue_io_stat(q)   test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
+#define blk_queue_add_random(q)        test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
 #define blk_queue_flushing(q)  ((q)->ordseq)
 #define blk_queue_stackable(q) \
        test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
 #define blk_queue_discard(q)   test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
 
-#define blk_fs_request(rq)     ((rq)->cmd_type == REQ_TYPE_FS)
-#define blk_pc_request(rq)     ((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
-#define blk_special_request(rq)        ((rq)->cmd_type == REQ_TYPE_SPECIAL)
-#define blk_sense_request(rq)  ((rq)->cmd_type == REQ_TYPE_SENSE)
-
-#define blk_failfast_dev(rq)   ((rq)->cmd_flags & REQ_FAILFAST_DEV)
-#define blk_failfast_transport(rq) ((rq)->cmd_flags & REQ_FAILFAST_TRANSPORT)
-#define blk_failfast_driver(rq)        ((rq)->cmd_flags & REQ_FAILFAST_DRIVER)
-#define blk_noretry_request(rq)        (blk_failfast_dev(rq) ||        \
-                                blk_failfast_transport(rq) ||  \
-                                blk_failfast_driver(rq))
-#define blk_rq_started(rq)     ((rq)->cmd_flags & REQ_STARTED)
-#define blk_rq_io_stat(rq)     ((rq)->cmd_flags & REQ_IO_STAT)
-#define blk_rq_quiet(rq)       ((rq)->cmd_flags & REQ_QUIET)
-
-#define blk_account_rq(rq)     (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) 
-
-#define blk_pm_suspend_request(rq)     ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND)
-#define blk_pm_resume_request(rq)      ((rq)->cmd_type == REQ_TYPE_PM_RESUME)
+#define blk_noretry_request(rq) \
+       ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
+                            REQ_FAILFAST_DRIVER))
+
+#define blk_account_rq(rq) \
+       (((rq)->cmd_flags & REQ_STARTED) && \
+        ((rq)->cmd_type == REQ_TYPE_FS || \
+         ((rq)->cmd_flags & REQ_DISCARD)))
+
 #define blk_pm_request(rq)     \
-       (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq))
+       ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \
+        (rq)->cmd_type == REQ_TYPE_PM_RESUME)
 
 #define blk_rq_cpu_valid(rq)   ((rq)->cpu != -1)
-#define blk_sorted_rq(rq)      ((rq)->cmd_flags & REQ_SORTED)
-#define blk_barrier_rq(rq)     ((rq)->cmd_flags & REQ_HARDBARRIER)
-#define blk_fua_rq(rq)         ((rq)->cmd_flags & REQ_FUA)
-#define blk_discard_rq(rq)     ((rq)->cmd_flags & REQ_DISCARD)
 #define blk_bidi_rq(rq)                ((rq)->next_rq != NULL)
 /* rq->queuelist of dequeued request must be list_empty() */
 #define blk_queued_rq(rq)      (!list_empty(&(rq)->queuelist))
@@ -641,7 +552,7 @@ enum {
  */
 static inline bool rw_is_sync(unsigned int rw_flags)
 {
-       return !(rw_flags & REQ_RW) || (rw_flags & REQ_RW_SYNC);
+       return !(rw_flags & REQ_WRITE) || (rw_flags & REQ_SYNC);
 }
 
 static inline bool rq_is_sync(struct request *rq)
@@ -649,9 +560,6 @@ static inline bool rq_is_sync(struct request *rq)
        return rw_is_sync(rq->cmd_flags);
 }
 
-#define rq_is_meta(rq)         ((rq)->cmd_flags & REQ_RW_META)
-#define rq_noidle(rq)          ((rq)->cmd_flags & REQ_NOIDLE)
-
 static inline int blk_queue_full(struct request_queue *q, int sync)
 {
        if (sync)
@@ -684,7 +592,8 @@ static inline void blk_clear_queue_full(struct request_queue *q, int sync)
        (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER)
 #define rq_mergeable(rq)       \
        (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \
-        (blk_discard_rq(rq) || blk_fs_request((rq))))
+        (((rq)->cmd_flags & REQ_DISCARD) || \
+         (rq)->cmd_type == REQ_TYPE_FS))
 
 /*
  * q->prep_rq_fn return values
@@ -709,7 +618,7 @@ extern unsigned long blk_max_low_pfn, blk_max_pfn;
 #define BLK_BOUNCE_HIGH                -1ULL
 #endif
 #define BLK_BOUNCE_ANY         (-1ULL)
-#define BLK_BOUNCE_ISA         (ISA_DMA_THRESHOLD)
+#define BLK_BOUNCE_ISA         (DMA_BIT_MASK(24))
 
 /*
  * default timeout for SG_IO if none specified
@@ -781,6 +690,8 @@ extern struct request *blk_make_request(struct request_queue *, struct bio *,
                                        gfp_t);
 extern void blk_insert_request(struct request_queue *, struct request *, int, void *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
+extern void blk_add_request_payload(struct request *rq, struct page *page,
+               unsigned int len);
 extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
 extern int blk_lld_busy(struct request_queue *q);
 extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
@@ -915,6 +826,7 @@ extern void blk_complete_request(struct request *);
 extern void __blk_complete_request(struct request *);
 extern void blk_abort_request(struct request *);
 extern void blk_abort_queue(struct request_queue *);
+extern void blk_unprep_request(struct request *);
 
 /*
  * Access functions for manipulating queue properties
@@ -959,6 +871,7 @@ extern int blk_queue_dma_drain(struct request_queue *q,
 extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn);
 extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
 extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn);
+extern void blk_queue_unprep_rq(struct request_queue *, unprep_rq_fn *ufn);
 extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
 extern void blk_queue_dma_alignment(struct request_queue *, int);
 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
@@ -966,7 +879,7 @@ extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
-extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *);
+extern int blk_queue_ordered(struct request_queue *, unsigned);
 extern bool blk_do_ordered(struct request_queue *, struct request **);
 extern unsigned blk_ordered_cur_seq(struct request_queue *);
 extern unsigned blk_ordered_req_seq(struct request *);
@@ -1020,7 +933,7 @@ static inline int sb_issue_discard(struct super_block *sb,
 {
        block <<= (sb->s_blocksize_bits - 9);
        nr_blocks <<= (sb->s_blocksize_bits - 9);
-       return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL,
+       return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS,
                                   BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
 }
 
@@ -1333,7 +1246,6 @@ static inline int blk_integrity_rq(struct request *rq)
 struct block_device_operations {
        int (*open) (struct block_device *, fmode_t);
        int (*release) (struct gendisk *, fmode_t);
-       int (*locked_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
        int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
        int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
        int (*direct_access) (struct block_device *, sector_t,
index 416bf62..3395cf7 100644 (file)
@@ -5,6 +5,7 @@
 #ifdef __KERNEL__
 #include <linux/blkdev.h>
 #include <linux/relay.h>
+#include <linux/compat.h>
 #endif
 
 /*
@@ -220,11 +221,26 @@ static inline int blk_trace_init_sysfs(struct device *dev)
 
 #endif /* CONFIG_BLK_DEV_IO_TRACE */
 
+#ifdef CONFIG_COMPAT
+
+struct compat_blk_user_trace_setup {
+       char name[32];
+       u16 act_mask;
+       u32 buf_size;
+       u32 buf_nr;
+       compat_u64 start_lba;
+       compat_u64 end_lba;
+       u32 pid;
+};
+#define BLKTRACESETUP32 _IOWR(0x12, 115, struct compat_blk_user_trace_setup)
+
+#endif
+
 #if defined(CONFIG_EVENT_TRACING) && defined(CONFIG_BLOCK)
 
 static inline int blk_cmd_buf_len(struct request *rq)
 {
-       return blk_pc_request(rq) ? rq->cmd_len * 3 : 1;
+       return (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? rq->cmd_len * 3 : 1;
 }
 
 extern void blk_dump_cmd(char *buf, struct request *rq);
index 8859e2e..284b520 100644 (file)
@@ -86,9 +86,9 @@ struct upc_req {
        wait_queue_head_t   uc_sleep;   /* process' wait queue */
 };
 
-#define REQ_ASYNC  0x1
-#define REQ_READ   0x2
-#define REQ_WRITE  0x4
-#define REQ_ABORT  0x8
+#define CODA_REQ_ASYNC  0x1
+#define CODA_REQ_READ   0x2
+#define CODA_REQ_WRITE  0x4
+#define CODA_REQ_ABORT  0x8
 
 #endif
index b8d2516..479ee3a 100644 (file)
@@ -53,7 +53,7 @@
 
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.3.8"
+#define REL_VERSION "8.3.8.1"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 94
index ce77a74..5f04281 100644 (file)
@@ -78,10 +78,11 @@ NL_PACKET(syncer_conf, 8,
        NL_INTEGER(     30,     T_MAY_IGNORE,   rate)
        NL_INTEGER(     31,     T_MAY_IGNORE,   after)
        NL_INTEGER(     32,     T_MAY_IGNORE,   al_extents)
-       NL_INTEGER(     71,     T_MAY_IGNORE,   dp_volume)
-       NL_INTEGER(     72,     T_MAY_IGNORE,   dp_interval)
-       NL_INTEGER(     73,     T_MAY_IGNORE,   throttle_th)
-       NL_INTEGER(     74,     T_MAY_IGNORE,   hold_off_th)
+/*     NL_INTEGER(     71,     T_MAY_IGNORE,   dp_volume)
+ *     NL_INTEGER(     72,     T_MAY_IGNORE,   dp_interval)
+ *     NL_INTEGER(     73,     T_MAY_IGNORE,   throttle_th)
+ *     NL_INTEGER(     74,     T_MAY_IGNORE,   hold_off_th)
+ * feature will be reimplemented differently with 8.3.9 */
        NL_STRING(      52,     T_MAY_IGNORE,   verify_alg,     SHARED_SECRET_MAX)
        NL_STRING(      51,     T_MAY_IGNORE,   cpu_mask,       32)
        NL_STRING(      64,     T_MAY_IGNORE,   csums_alg,      SHARED_SECRET_MAX)
index a8ccf85..1542e0e 100644 (file)
@@ -8,6 +8,7 @@
 
 #include <linux/limits.h>
 #include <linux/ioctl.h>
+#include <linux/blk_types.h>
 
 /*
  * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -121,7 +122,7 @@ struct inodes_stat_t {
  *                     immediately wait on this read without caring about
  *                     unplugging.
  * READA               Used for read-ahead operations. Lower priority, and the
- *                      block layer could (in theory) choose to ignore this
+ *                     block layer could (in theory) choose to ignore this
  *                     request if it runs into resource problems.
  * WRITE               A normal async write. Device will be plugged.
  * SWRITE              Like WRITE, but a special case for ll_rw_block() that
@@ -140,7 +141,7 @@ struct inodes_stat_t {
  * SWRITE_SYNC
  * SWRITE_SYNC_PLUG    Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer.
  *                     See SWRITE.
- * WRITE_BARRIER       Like WRITE, but tells the block layer that all
+ * WRITE_BARRIER       Like WRITE_SYNC, but tells the block layer that all
  *                     previously submitted writes must be safely on storage
  *                     before this one is started. Also guarantees that when
  *                     this write is complete, it itself is also safely on
@@ -148,29 +149,31 @@ struct inodes_stat_t {
  *                     of this IO.
  *
  */
-#define RW_MASK                1
-#define RWA_MASK       2
-#define READ 0
-#define WRITE 1
-#define READA 2                /* read-ahead  - don't block if no resources */
-#define SWRITE 3       /* for ll_rw_block() - wait for buffer lock */
-#define READ_SYNC      (READ | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG))
-#define READ_META      (READ | (1 << BIO_RW_META))
-#define WRITE_SYNC_PLUG        (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
-#define WRITE_SYNC     (WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
-#define WRITE_ODIRECT_PLUG     (WRITE | (1 << BIO_RW_SYNCIO))
-#define WRITE_META     (WRITE | (1 << BIO_RW_META))
-#define SWRITE_SYNC_PLUG       \
-                       (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
-#define SWRITE_SYNC    (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
-#define WRITE_BARRIER  (WRITE | (1 << BIO_RW_BARRIER))
+#define RW_MASK                        REQ_WRITE
+#define RWA_MASK               REQ_RAHEAD
+
+#define READ                   0
+#define WRITE                  RW_MASK
+#define READA                  RWA_MASK
+#define SWRITE                 (WRITE | READA)
+
+#define READ_SYNC              (READ | REQ_SYNC | REQ_UNPLUG)
+#define READ_META              (READ | REQ_META)
+#define WRITE_SYNC_PLUG                (WRITE | REQ_SYNC | REQ_NOIDLE)
+#define WRITE_SYNC             (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG)
+#define WRITE_ODIRECT_PLUG     (WRITE | REQ_SYNC)
+#define WRITE_META             (WRITE | REQ_META)
+#define WRITE_BARRIER          (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
+                                REQ_HARDBARRIER)
+#define SWRITE_SYNC_PLUG       (SWRITE | REQ_SYNC | REQ_NOIDLE)
+#define SWRITE_SYNC            (SWRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG)
 
 /*
  * These aren't really reads or writes, they pass down information about
  * parts of device that are now unused by the file system.
  */
-#define DISCARD_NOBARRIER (WRITE | (1 << BIO_RW_DISCARD))
-#define DISCARD_BARRIER (DISCARD_NOBARRIER | (1 << BIO_RW_BARRIER))
+#define DISCARD_NOBARRIER      (WRITE | REQ_DISCARD)
+#define DISCARD_BARRIER                (WRITE | REQ_DISCARD | REQ_HARDBARRIER)
 
 #define SEL_IN         1
 #define SEL_OUT                2
@@ -2196,7 +2199,6 @@ static inline void insert_inode_hash(struct inode *inode) {
 extern void file_move(struct file *f, struct list_head *list);
 extern void file_kill(struct file *f);
 #ifdef CONFIG_BLOCK
-struct bio;
 extern void submit_bio(int, struct bio *);
 extern int bdev_read_only(struct block_device *);
 #endif
@@ -2263,7 +2265,6 @@ static inline int xip_truncate_page(struct address_space *mapping, loff_t from)
 #endif
 
 #ifdef CONFIG_BLOCK
-struct bio;
 typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode,
                            loff_t file_offset);
 
index d870a91..d8ce278 100644 (file)
@@ -25,8 +25,10 @@ DECLARE_EVENT_CLASS(block_rq_with_error,
 
        TP_fast_assign(
                __entry->dev       = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
-               __entry->sector    = blk_pc_request(rq) ? 0 : blk_rq_pos(rq);
-               __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq);
+               __entry->sector    = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                                       0 : blk_rq_pos(rq);
+               __entry->nr_sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                                       0 : blk_rq_sectors(rq);
                __entry->errors    = rq->errors;
 
                blk_fill_rwbs_rq(__entry->rwbs, rq);
@@ -109,9 +111,12 @@ DECLARE_EVENT_CLASS(block_rq,
 
        TP_fast_assign(
                __entry->dev       = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
-               __entry->sector    = blk_pc_request(rq) ? 0 : blk_rq_pos(rq);
-               __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq);
-               __entry->bytes     = blk_pc_request(rq) ? blk_rq_bytes(rq) : 0;
+               __entry->sector    = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                                       0 : blk_rq_pos(rq);
+               __entry->nr_sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                                       0 : blk_rq_sectors(rq);
+               __entry->bytes     = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                                       blk_rq_bytes(rq) : 0;
 
                blk_fill_rwbs_rq(__entry->rwbs, rq);
                blk_dump_cmd(__get_str(cmd), rq);
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
new file mode 100644 (file)
index 0000000..f345f66
--- /dev/null
@@ -0,0 +1,159 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM writeback
+
+#if !defined(_TRACE_WRITEBACK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_WRITEBACK_H
+
+#include <linux/backing-dev.h>
+#include <linux/device.h>
+#include <linux/writeback.h>
+
+struct wb_writeback_work;
+
+DECLARE_EVENT_CLASS(writeback_work_class,
+       TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work),
+       TP_ARGS(bdi, work),
+       TP_STRUCT__entry(
+               __array(char, name, 32)
+               __field(long, nr_pages)
+               __field(dev_t, sb_dev)
+               __field(int, sync_mode)
+               __field(int, for_kupdate)
+               __field(int, range_cyclic)
+               __field(int, for_background)
+       ),
+       TP_fast_assign(
+               strncpy(__entry->name, dev_name(bdi->dev), 32);
+               __entry->nr_pages = work->nr_pages;
+               __entry->sb_dev = work->sb ? work->sb->s_dev : 0;
+               __entry->sync_mode = work->sync_mode;
+               __entry->for_kupdate = work->for_kupdate;
+               __entry->range_cyclic = work->range_cyclic;
+               __entry->for_background = work->for_background;
+       ),
+       TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
+                 "kupdate=%d range_cyclic=%d background=%d",
+                 __entry->name,
+                 MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
+                 __entry->nr_pages,
+                 __entry->sync_mode,
+                 __entry->for_kupdate,
+                 __entry->range_cyclic,
+                 __entry->for_background
+       )
+);
+#define DEFINE_WRITEBACK_WORK_EVENT(name) \
+DEFINE_EVENT(writeback_work_class, name, \
+       TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), \
+       TP_ARGS(bdi, work))
+DEFINE_WRITEBACK_WORK_EVENT(writeback_nothread);
+DEFINE_WRITEBACK_WORK_EVENT(writeback_queue);
+DEFINE_WRITEBACK_WORK_EVENT(writeback_exec);
+
+TRACE_EVENT(writeback_pages_written,
+       TP_PROTO(long pages_written),
+       TP_ARGS(pages_written),
+       TP_STRUCT__entry(
+               __field(long,           pages)
+       ),
+       TP_fast_assign(
+               __entry->pages          = pages_written;
+       ),
+       TP_printk("%ld", __entry->pages)
+);
+
+DECLARE_EVENT_CLASS(writeback_class,
+       TP_PROTO(struct backing_dev_info *bdi),
+       TP_ARGS(bdi),
+       TP_STRUCT__entry(
+               __array(char, name, 32)
+       ),
+       TP_fast_assign(
+               strncpy(__entry->name, dev_name(bdi->dev), 32);
+       ),
+       TP_printk("bdi %s",
+                 __entry->name
+       )
+);
+#define DEFINE_WRITEBACK_EVENT(name) \
+DEFINE_EVENT(writeback_class, name, \
+       TP_PROTO(struct backing_dev_info *bdi), \
+       TP_ARGS(bdi))
+
+DEFINE_WRITEBACK_EVENT(writeback_nowork);
+DEFINE_WRITEBACK_EVENT(writeback_wake_thread);
+DEFINE_WRITEBACK_EVENT(writeback_wake_forker_thread);
+DEFINE_WRITEBACK_EVENT(writeback_bdi_register);
+DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister);
+DEFINE_WRITEBACK_EVENT(writeback_thread_start);
+DEFINE_WRITEBACK_EVENT(writeback_thread_stop);
+
+DECLARE_EVENT_CLASS(wbc_class,
+       TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi),
+       TP_ARGS(wbc, bdi),
+       TP_STRUCT__entry(
+               __array(char, name, 32)
+               __field(long, nr_to_write)
+               __field(long, pages_skipped)
+               __field(int, sync_mode)
+               __field(int, nonblocking)
+               __field(int, encountered_congestion)
+               __field(int, for_kupdate)
+               __field(int, for_background)
+               __field(int, for_reclaim)
+               __field(int, range_cyclic)
+               __field(int, more_io)
+               __field(unsigned long, older_than_this)
+               __field(long, range_start)
+               __field(long, range_end)
+       ),
+
+       TP_fast_assign(
+               strncpy(__entry->name, dev_name(bdi->dev), 32);
+               __entry->nr_to_write    = wbc->nr_to_write;
+               __entry->pages_skipped  = wbc->pages_skipped;
+               __entry->sync_mode      = wbc->sync_mode;
+               __entry->for_kupdate    = wbc->for_kupdate;
+               __entry->for_background = wbc->for_background;
+               __entry->for_reclaim    = wbc->for_reclaim;
+               __entry->range_cyclic   = wbc->range_cyclic;
+               __entry->more_io        = wbc->more_io;
+               __entry->older_than_this = wbc->older_than_this ?
+                                               *wbc->older_than_this : 0;
+               __entry->range_start    = (long)wbc->range_start;
+               __entry->range_end      = (long)wbc->range_end;
+       ),
+
+       TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d "
+               "bgrd=%d reclm=%d cyclic=%d more=%d older=0x%lx "
+               "start=0x%lx end=0x%lx",
+               __entry->name,
+               __entry->nr_to_write,
+               __entry->pages_skipped,
+               __entry->sync_mode,
+               __entry->for_kupdate,
+               __entry->for_background,
+               __entry->for_reclaim,
+               __entry->range_cyclic,
+               __entry->more_io,
+               __entry->older_than_this,
+               __entry->range_start,
+               __entry->range_end)
+)
+
+#define DEFINE_WBC_EVENT(name) \
+DEFINE_EVENT(wbc_class, name, \
+       TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), \
+       TP_ARGS(wbc, bdi))
+DEFINE_WBC_EVENT(wbc_writeback_start);
+DEFINE_WBC_EVENT(wbc_writeback_written);
+DEFINE_WBC_EVENT(wbc_writeback_wait);
+DEFINE_WBC_EVENT(wbc_balance_dirty_start);
+DEFINE_WBC_EVENT(wbc_balance_dirty_written);
+DEFINE_WBC_EVENT(wbc_balance_dirty_wait);
+DEFINE_WBC_EVENT(wbc_writepage);
+
+#endif /* _TRACE_WRITEBACK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
index 97024fd..83bbc7c 100644 (file)
@@ -28,7 +28,7 @@
 static int submit(int rw, struct block_device *bdev, sector_t sector,
                struct page *page, struct bio **bio_chain)
 {
-       const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
+       const int bio_rw = rw | REQ_SYNC | REQ_UNPLUG;
        struct bio *bio;
 
        bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
index 638711c..82499a5 100644 (file)
@@ -169,9 +169,12 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
 static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
                                 BLK_TC_ACT(BLK_TC_WRITE) };
 
+#define BLK_TC_HARDBARRIER     BLK_TC_BARRIER
+#define BLK_TC_RAHEAD          BLK_TC_AHEAD
+
 /* The ilog2() calls fall out because they're constant */
-#define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \
-         (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name))
+#define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \
+         (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ## __name))
 
 /*
  * The worker for the various blk_add_trace*() types. Fills out a
@@ -194,9 +197,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
                return;
 
        what |= ddir_act[rw & WRITE];
-       what |= MASK_TC_BIT(rw, BARRIER);
-       what |= MASK_TC_BIT(rw, SYNCIO);
-       what |= MASK_TC_BIT(rw, AHEAD);
+       what |= MASK_TC_BIT(rw, HARDBARRIER);
+       what |= MASK_TC_BIT(rw, SYNC);
+       what |= MASK_TC_BIT(rw, RAHEAD);
        what |= MASK_TC_BIT(rw, META);
        what |= MASK_TC_BIT(rw, DISCARD);
 
@@ -549,6 +552,41 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
 }
 EXPORT_SYMBOL_GPL(blk_trace_setup);
 
+#if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
+static int compat_blk_trace_setup(struct request_queue *q, char *name,
+                                 dev_t dev, struct block_device *bdev,
+                                 char __user *arg)
+{
+       struct blk_user_trace_setup buts;
+       struct compat_blk_user_trace_setup cbuts;
+       int ret;
+
+       if (copy_from_user(&cbuts, arg, sizeof(cbuts)))
+               return -EFAULT;
+
+       buts = (struct blk_user_trace_setup) {
+               .act_mask = cbuts.act_mask,
+               .buf_size = cbuts.buf_size,
+               .buf_nr = cbuts.buf_nr,
+               .start_lba = cbuts.start_lba,
+               .end_lba = cbuts.end_lba,
+               .pid = cbuts.pid,
+       };
+       memcpy(&buts.name, &cbuts.name, 32);
+
+       ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
+       if (ret)
+               return ret;
+
+       if (copy_to_user(arg, &buts.name, 32)) {
+               blk_trace_remove(q);
+               return -EFAULT;
+       }
+
+       return 0;
+}
+#endif
+
 int blk_trace_startstop(struct request_queue *q, int start)
 {
        int ret;
@@ -601,6 +639,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
        if (!q)
                return -ENXIO;
 
+       lock_kernel();
        mutex_lock(&bdev->bd_mutex);
 
        switch (cmd) {
@@ -608,6 +647,12 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
                bdevname(bdev, b);
                ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
                break;
+#if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
+       case BLKTRACESETUP32:
+               bdevname(bdev, b);
+               ret = compat_blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
+               break;
+#endif
        case BLKTRACESTART:
                start = 1;
        case BLKTRACESTOP:
@@ -622,6 +667,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
        }
 
        mutex_unlock(&bdev->bd_mutex);
+       unlock_kernel();
        return ret;
 }
 
@@ -661,10 +707,10 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
        if (likely(!bt))
                return;
 
-       if (blk_discard_rq(rq))
-               rw |= (1 << BIO_RW_DISCARD);
+       if (rq->cmd_flags & REQ_DISCARD)
+               rw |= REQ_DISCARD;
 
-       if (blk_pc_request(rq)) {
+       if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
                what |= BLK_TC_ACT(BLK_TC_PC);
                __blk_add_trace(bt, 0, blk_rq_bytes(rq), rw,
                                what, rq->errors, rq->cmd_len, rq->cmd);
@@ -925,7 +971,7 @@ void blk_add_driver_data(struct request_queue *q,
        if (likely(!bt))
                return;
 
-       if (blk_pc_request(rq))
+       if (rq->cmd_type == REQ_TYPE_BLOCK_PC)
                __blk_add_trace(bt, 0, blk_rq_bytes(rq), 0,
                                BLK_TA_DRV_DATA, rq->errors, len, data);
        else
@@ -1730,7 +1776,7 @@ void blk_dump_cmd(char *buf, struct request *rq)
        int len = rq->cmd_len;
        unsigned char *cmd = rq->cmd;
 
-       if (!blk_pc_request(rq)) {
+       if (rq->cmd_type != REQ_TYPE_BLOCK_PC) {
                buf[0] = '\0';
                return;
        }
@@ -1755,20 +1801,20 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
 
        if (rw & WRITE)
                rwbs[i++] = 'W';
-       else if (rw & 1 << BIO_RW_DISCARD)
+       else if (rw & REQ_DISCARD)
                rwbs[i++] = 'D';
        else if (bytes)
                rwbs[i++] = 'R';
        else
                rwbs[i++] = 'N';
 
-       if (rw & 1 << BIO_RW_AHEAD)
+       if (rw & REQ_RAHEAD)
                rwbs[i++] = 'A';
-       if (rw & 1 << BIO_RW_BARRIER)
+       if (rw & REQ_HARDBARRIER)
                rwbs[i++] = 'B';
-       if (rw & 1 << BIO_RW_SYNCIO)
+       if (rw & REQ_SYNC)
                rwbs[i++] = 'S';
-       if (rw & 1 << BIO_RW_META)
+       if (rw & REQ_META)
                rwbs[i++] = 'M';
 
        rwbs[i] = '\0';
@@ -1779,8 +1825,8 @@ void blk_fill_rwbs_rq(char *rwbs, struct request *rq)
        int rw = rq->cmd_flags & 0x03;
        int bytes;
 
-       if (blk_discard_rq(rq))
-               rw |= (1 << BIO_RW_DISCARD);
+       if (rq->cmd_flags & REQ_DISCARD)
+               rw |= REQ_DISCARD;
 
        bytes = blk_rq_bytes(rq);
 
index f9fd3dd..08d3575 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/writeback.h>
 #include <linux/device.h>
+#include <trace/events/writeback.h>
 
 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
 
@@ -49,8 +50,6 @@ static struct timer_list sync_supers_timer;
 static int bdi_sync_supers(void *);
 static void sync_supers_timer_fn(unsigned long);
 
-static void bdi_add_default_flusher_task(struct backing_dev_info *bdi);
-
 #ifdef CONFIG_DEBUG_FS
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
@@ -65,28 +64,21 @@ static void bdi_debug_init(void)
 static int bdi_debug_stats_show(struct seq_file *m, void *v)
 {
        struct backing_dev_info *bdi = m->private;
-       struct bdi_writeback *wb;
+       struct bdi_writeback *wb = &bdi->wb;
        unsigned long background_thresh;
        unsigned long dirty_thresh;
        unsigned long bdi_thresh;
        unsigned long nr_dirty, nr_io, nr_more_io, nr_wb;
        struct inode *inode;
 
-       /*
-        * inode lock is enough here, the bdi->wb_list is protected by
-        * RCU on the reader side
-        */
        nr_wb = nr_dirty = nr_io = nr_more_io = 0;
        spin_lock(&inode_lock);
-       list_for_each_entry(wb, &bdi->wb_list, list) {
-               nr_wb++;
-               list_for_each_entry(inode, &wb->b_dirty, i_list)
-                       nr_dirty++;
-               list_for_each_entry(inode, &wb->b_io, i_list)
-                       nr_io++;
-               list_for_each_entry(inode, &wb->b_more_io, i_list)
-                       nr_more_io++;
-       }
+       list_for_each_entry(inode, &wb->b_dirty, i_list)
+               nr_dirty++;
+       list_for_each_entry(inode, &wb->b_io, i_list)
+               nr_io++;
+       list_for_each_entry(inode, &wb->b_more_io, i_list)
+               nr_more_io++;
        spin_unlock(&inode_lock);
 
        get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi);
@@ -98,19 +90,16 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
                   "BdiDirtyThresh:   %8lu kB\n"
                   "DirtyThresh:      %8lu kB\n"
                   "BackgroundThresh: %8lu kB\n"
-                  "WritebackThreads: %8lu\n"
                   "b_dirty:          %8lu\n"
                   "b_io:             %8lu\n"
                   "b_more_io:        %8lu\n"
                   "bdi_list:         %8u\n"
-                  "state:            %8lx\n"
-                  "wb_list:          %8u\n",
+                  "state:            %8lx\n",
                   (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
                   (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)),
                   K(bdi_thresh), K(dirty_thresh),
-                  K(background_thresh), nr_wb, nr_dirty, nr_io, nr_more_io,
-                  !list_empty(&bdi->bdi_list), bdi->state,
-                  !list_empty(&bdi->wb_list));
+                  K(background_thresh), nr_dirty, nr_io, nr_more_io,
+                  !list_empty(&bdi->bdi_list), bdi->state);
 #undef K
 
        return 0;
@@ -247,7 +236,6 @@ static int __init default_bdi_init(void)
        sync_supers_tsk = kthread_run(bdi_sync_supers, NULL, "sync_supers");
        BUG_ON(IS_ERR(sync_supers_tsk));
 
-       init_timer(&sync_supers_timer);
        setup_timer(&sync_supers_timer, sync_supers_timer_fn, 0);
        bdi_arm_supers_timer();
 
@@ -259,77 +247,6 @@ static int __init default_bdi_init(void)
 }
 subsys_initcall(default_bdi_init);
 
-static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
-{
-       memset(wb, 0, sizeof(*wb));
-
-       wb->bdi = bdi;
-       wb->last_old_flush = jiffies;
-       INIT_LIST_HEAD(&wb->b_dirty);
-       INIT_LIST_HEAD(&wb->b_io);
-       INIT_LIST_HEAD(&wb->b_more_io);
-}
-
-static void bdi_task_init(struct backing_dev_info *bdi,
-                         struct bdi_writeback *wb)
-{
-       struct task_struct *tsk = current;
-
-       spin_lock(&bdi->wb_lock);
-       list_add_tail_rcu(&wb->list, &bdi->wb_list);
-       spin_unlock(&bdi->wb_lock);
-
-       tsk->flags |= PF_FLUSHER | PF_SWAPWRITE;
-       set_freezable();
-
-       /*
-        * Our parent may run at a different priority, just set us to normal
-        */
-       set_user_nice(tsk, 0);
-}
-
-static int bdi_start_fn(void *ptr)
-{
-       struct bdi_writeback *wb = ptr;
-       struct backing_dev_info *bdi = wb->bdi;
-       int ret;
-
-       /*
-        * Add us to the active bdi_list
-        */
-       spin_lock_bh(&bdi_lock);
-       list_add_rcu(&bdi->bdi_list, &bdi_list);
-       spin_unlock_bh(&bdi_lock);
-
-       bdi_task_init(bdi, wb);
-
-       /*
-        * Clear pending bit and wakeup anybody waiting to tear us down
-        */
-       clear_bit(BDI_pending, &bdi->state);
-       smp_mb__after_clear_bit();
-       wake_up_bit(&bdi->state, BDI_pending);
-
-       ret = bdi_writeback_task(wb);
-
-       /*
-        * Remove us from the list
-        */
-       spin_lock(&bdi->wb_lock);
-       list_del_rcu(&wb->list);
-       spin_unlock(&bdi->wb_lock);
-
-       /*
-        * Flush any work that raced with us exiting. No new work
-        * will be added, since this bdi isn't discoverable anymore.
-        */
-       if (!list_empty(&bdi->work_list))
-               wb_do_writeback(wb, 1);
-
-       wb->task = NULL;
-       return ret;
-}
-
 int bdi_has_dirty_io(struct backing_dev_info *bdi)
 {
        return wb_has_dirty_io(&bdi->wb);
@@ -348,10 +265,10 @@ static void bdi_flush_io(struct backing_dev_info *bdi)
 }
 
 /*
- * kupdated() used to do this. We cannot do it from the bdi_forker_task()
+ * kupdated() used to do this. We cannot do it from the bdi_forker_thread()
  * or we risk deadlocking on ->s_umount. The longer term solution would be
  * to implement sync_supers_bdi() or similar and simply do it from the
- * bdi writeback tasks individually.
+ * bdi writeback thread individually.
  */
 static int bdi_sync_supers(void *unused)
 {
@@ -387,144 +304,198 @@ static void sync_supers_timer_fn(unsigned long unused)
        bdi_arm_supers_timer();
 }
 
-static int bdi_forker_task(void *ptr)
+static void wakeup_timer_fn(unsigned long data)
+{
+       struct backing_dev_info *bdi = (struct backing_dev_info *)data;
+
+       spin_lock_bh(&bdi->wb_lock);
+       if (bdi->wb.task) {
+               trace_writeback_wake_thread(bdi);
+               wake_up_process(bdi->wb.task);
+       } else {
+               /*
+                * When bdi tasks are inactive for long time, they are killed.
+                * In this case we have to wake-up the forker thread which
+                * should create and run the bdi thread.
+                */
+               trace_writeback_wake_forker_thread(bdi);
+               wake_up_process(default_backing_dev_info.wb.task);
+       }
+       spin_unlock_bh(&bdi->wb_lock);
+}
+
+/*
+ * This function is used when the first inode for this bdi is marked dirty. It
+ * wakes-up the corresponding bdi thread which should then take care of the
+ * periodic background write-out of dirty inodes. Since the write-out would
+ * starts only 'dirty_writeback_interval' centisecs from now anyway, we just
+ * set up a timer which wakes the bdi thread up later.
+ *
+ * Note, we wouldn't bother setting up the timer, but this function is on the
+ * fast-path (used by '__mark_inode_dirty()'), so we save few context switches
+ * by delaying the wake-up.
+ */
+void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi)
+{
+       unsigned long timeout;
+
+       timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
+       mod_timer(&bdi->wb.wakeup_timer, jiffies + timeout);
+}
+
+/*
+ * Calculate the longest interval (jiffies) bdi threads are allowed to be
+ * inactive.
+ */
+static unsigned long bdi_longest_inactive(void)
+{
+       unsigned long interval;
+
+       interval = msecs_to_jiffies(dirty_writeback_interval * 10);
+       return max(5UL * 60 * HZ, interval);
+}
+
+static int bdi_forker_thread(void *ptr)
 {
        struct bdi_writeback *me = ptr;
 
-       bdi_task_init(me->bdi, me);
+       current->flags |= PF_FLUSHER | PF_SWAPWRITE;
+       set_freezable();
+
+       /*
+        * Our parent may run at a different priority, just set us to normal
+        */
+       set_user_nice(current, 0);
 
        for (;;) {
-               struct backing_dev_info *bdi, *tmp;
-               struct bdi_writeback *wb;
+               struct task_struct *task = NULL;
+               struct backing_dev_info *bdi;
+               enum {
+                       NO_ACTION,   /* Nothing to do */
+                       FORK_THREAD, /* Fork bdi thread */
+                       KILL_THREAD, /* Kill inactive bdi thread */
+               } action = NO_ACTION;
 
                /*
                 * Temporary measure, we want to make sure we don't see
                 * dirty data on the default backing_dev_info
                 */
-               if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list))
+               if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) {
+                       del_timer(&me->wakeup_timer);
                        wb_do_writeback(me, 0);
+               }
 
                spin_lock_bh(&bdi_lock);
+               set_current_state(TASK_INTERRUPTIBLE);
 
-               /*
-                * Check if any existing bdi's have dirty data without
-                * a thread registered. If so, set that up.
-                */
-               list_for_each_entry_safe(bdi, tmp, &bdi_list, bdi_list) {
-                       if (bdi->wb.task)
-                               continue;
-                       if (list_empty(&bdi->work_list) &&
-                           !bdi_has_dirty_io(bdi))
+               list_for_each_entry(bdi, &bdi_list, bdi_list) {
+                       bool have_dirty_io;
+
+                       if (!bdi_cap_writeback_dirty(bdi) ||
+                            bdi_cap_flush_forker(bdi))
                                continue;
 
-                       bdi_add_default_flusher_task(bdi);
-               }
+                       WARN(!test_bit(BDI_registered, &bdi->state),
+                            "bdi %p/%s is not registered!\n", bdi, bdi->name);
 
-               set_current_state(TASK_INTERRUPTIBLE);
+                       have_dirty_io = !list_empty(&bdi->work_list) ||
+                                       wb_has_dirty_io(&bdi->wb);
 
-               if (list_empty(&bdi_pending_list)) {
-                       unsigned long wait;
+                       /*
+                        * If the bdi has work to do, but the thread does not
+                        * exist - create it.
+                        */
+                       if (!bdi->wb.task && have_dirty_io) {
+                               /*
+                                * Set the pending bit - if someone will try to
+                                * unregister this bdi - it'll wait on this bit.
+                                */
+                               set_bit(BDI_pending, &bdi->state);
+                               action = FORK_THREAD;
+                               break;
+                       }
+
+                       spin_lock(&bdi->wb_lock);
+
+                       /*
+                        * If there is no work to do and the bdi thread was
+                        * inactive long enough - kill it. The wb_lock is taken
+                        * to make sure no-one adds more work to this bdi and
+                        * wakes the bdi thread up.
+                        */
+                       if (bdi->wb.task && !have_dirty_io &&
+                           time_after(jiffies, bdi->wb.last_active +
+                                               bdi_longest_inactive())) {
+                               task = bdi->wb.task;
+                               bdi->wb.task = NULL;
+                               spin_unlock(&bdi->wb_lock);
+                               set_bit(BDI_pending, &bdi->state);
+                               action = KILL_THREAD;
+                               break;
+                       }
+                       spin_unlock(&bdi->wb_lock);
+               }
+               spin_unlock_bh(&bdi_lock);
 
-                       spin_unlock_bh(&bdi_lock);
-                       wait = msecs_to_jiffies(dirty_writeback_interval * 10);
-                       if (wait)
-                               schedule_timeout(wait);
+               /* Keep working if default bdi still has things to do */
+               if (!list_empty(&me->bdi->work_list))
+                       __set_current_state(TASK_RUNNING);
+
+               switch (action) {
+               case FORK_THREAD:
+                       __set_current_state(TASK_RUNNING);
+                       task = kthread_run(bdi_writeback_thread, &bdi->wb, "flush-%s",
+                                          dev_name(bdi->dev));
+                       if (IS_ERR(task)) {
+                               /*
+                                * If thread creation fails, force writeout of
+                                * the bdi from the thread.
+                                */
+                               bdi_flush_io(bdi);
+                       } else {
+                               /*
+                                * The spinlock makes sure we do not lose
+                                * wake-ups when racing with 'bdi_queue_work()'.
+                                */
+                               spin_lock_bh(&bdi->wb_lock);
+                               bdi->wb.task = task;
+                               spin_unlock_bh(&bdi->wb_lock);
+                       }
+                       break;
+
+               case KILL_THREAD:
+                       __set_current_state(TASK_RUNNING);
+                       kthread_stop(task);
+                       break;
+
+               case NO_ACTION:
+                       if (!wb_has_dirty_io(me) || !dirty_writeback_interval)
+                               /*
+                                * There are no dirty data. The only thing we
+                                * should now care about is checking for
+                                * inactive bdi threads and killing them. Thus,
+                                * let's sleep for longer time, save energy and
+                                * be friendly for battery-driven devices.
+                                */
+                               schedule_timeout(bdi_longest_inactive());
                        else
-                               schedule();
+                               schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
                        try_to_freeze();
+                       /* Back to the main loop */
                        continue;
                }
 
-               __set_current_state(TASK_RUNNING);
-
-               /*
-                * This is our real job - check for pending entries in
-                * bdi_pending_list, and create the tasks that got added
-                */
-               bdi = list_entry(bdi_pending_list.next, struct backing_dev_info,
-                                bdi_list);
-               list_del_init(&bdi->bdi_list);
-               spin_unlock_bh(&bdi_lock);
-
-               wb = &bdi->wb;
-               wb->task = kthread_run(bdi_start_fn, wb, "flush-%s",
-                                       dev_name(bdi->dev));
                /*
-                * If task creation fails, then readd the bdi to
-                * the pending list and force writeout of the bdi
-                * from this forker thread. That will free some memory
-                * and we can try again.
+                * Clear pending bit and wakeup anybody waiting to tear us down.
                 */
-               if (IS_ERR(wb->task)) {
-                       wb->task = NULL;
-
-                       /*
-                        * Add this 'bdi' to the back, so we get
-                        * a chance to flush other bdi's to free
-                        * memory.
-                        */
-                       spin_lock_bh(&bdi_lock);
-                       list_add_tail(&bdi->bdi_list, &bdi_pending_list);
-                       spin_unlock_bh(&bdi_lock);
-
-                       bdi_flush_io(bdi);
-               }
+               clear_bit(BDI_pending, &bdi->state);
+               smp_mb__after_clear_bit();
+               wake_up_bit(&bdi->state, BDI_pending);
        }
 
        return 0;
 }
 
-static void bdi_add_to_pending(struct rcu_head *head)
-{
-       struct backing_dev_info *bdi;
-
-       bdi = container_of(head, struct backing_dev_info, rcu_head);
-       INIT_LIST_HEAD(&bdi->bdi_list);
-
-       spin_lock(&bdi_lock);
-       list_add_tail(&bdi->bdi_list, &bdi_pending_list);
-       spin_unlock(&bdi_lock);
-
-       /*
-        * We are now on the pending list, wake up bdi_forker_task()
-        * to finish the job and add us back to the active bdi_list
-        */
-       wake_up_process(default_backing_dev_info.wb.task);
-}
-
-/*
- * Add the default flusher task that gets created for any bdi
- * that has dirty data pending writeout
- */
-void static bdi_add_default_flusher_task(struct backing_dev_info *bdi)
-{
-       if (!bdi_cap_writeback_dirty(bdi))
-               return;
-
-       if (WARN_ON(!test_bit(BDI_registered, &bdi->state))) {
-               printk(KERN_ERR "bdi %p/%s is not registered!\n",
-                                                       bdi, bdi->name);
-               return;
-       }
-
-       /*
-        * Check with the helper whether to proceed adding a task. Will only
-        * abort if we two or more simultanous calls to
-        * bdi_add_default_flusher_task() occured, further additions will block
-        * waiting for previous additions to finish.
-        */
-       if (!test_and_set_bit(BDI_pending, &bdi->state)) {
-               list_del_rcu(&bdi->bdi_list);
-
-               /*
-                * We must wait for the current RCU period to end before
-                * moving to the pending list. So schedule that operation
-                * from an RCU callback.
-                */
-               call_rcu(&bdi->rcu_head, bdi_add_to_pending);
-       }
-}
-
 /*
  * Remove bdi from bdi_list, and ensure that it is no longer visible
  */
@@ -541,23 +512,16 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
                const char *fmt, ...)
 {
        va_list args;
-       int ret = 0;
        struct device *dev;
 
        if (bdi->dev)   /* The driver needs to use separate queues per device */
-               goto exit;
+               return 0;
 
        va_start(args, fmt);
        dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args);
        va_end(args);
-       if (IS_ERR(dev)) {
-               ret = PTR_ERR(dev);
-               goto exit;
-       }
-
-       spin_lock_bh(&bdi_lock);
-       list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
-       spin_unlock_bh(&bdi_lock);
+       if (IS_ERR(dev))
+               return PTR_ERR(dev);
 
        bdi->dev = dev;
 
@@ -569,21 +533,21 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
        if (bdi_cap_flush_forker(bdi)) {
                struct bdi_writeback *wb = &bdi->wb;
 
-               wb->task = kthread_run(bdi_forker_task, wb, "bdi-%s",
+               wb->task = kthread_run(bdi_forker_thread, wb, "bdi-%s",
                                                dev_name(dev));
-               if (IS_ERR(wb->task)) {
-                       wb->task = NULL;
-                       ret = -ENOMEM;
-
-                       bdi_remove_from_list(bdi);
-                       goto exit;
-               }
+               if (IS_ERR(wb->task))
+                       return PTR_ERR(wb->task);
        }
 
        bdi_debug_register(bdi, dev_name(dev));
        set_bit(BDI_registered, &bdi->state);
-exit:
-       return ret;
+
+       spin_lock_bh(&bdi_lock);
+       list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
+       spin_unlock_bh(&bdi_lock);
+
+       trace_writeback_bdi_register(bdi);
+       return 0;
 }
 EXPORT_SYMBOL(bdi_register);
 
@@ -598,31 +562,29 @@ EXPORT_SYMBOL(bdi_register_dev);
  */
 static void bdi_wb_shutdown(struct backing_dev_info *bdi)
 {
-       struct bdi_writeback *wb;
-
        if (!bdi_cap_writeback_dirty(bdi))
                return;
 
        /*
-        * If setup is pending, wait for that to complete first
+        * Make sure nobody finds us on the bdi_list anymore
         */
-       wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait,
-                       TASK_UNINTERRUPTIBLE);
+       bdi_remove_from_list(bdi);
 
        /*
-        * Make sure nobody finds us on the bdi_list anymore
+        * If setup is pending, wait for that to complete first
         */
-       bdi_remove_from_list(bdi);
+       wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait,
+                       TASK_UNINTERRUPTIBLE);
 
        /*
-        * Finally, kill the kernel threads. We don't need to be RCU
+        * Finally, kill the kernel thread. We don't need to be RCU
         * safe anymore, since the bdi is gone from visibility. Force
         * unfreeze of the thread before calling kthread_stop(), otherwise
         * it would never exet if it is currently stuck in the refrigerator.
         */
-       list_for_each_entry(wb, &bdi->wb_list, list) {
-               thaw_process(wb->task);
-               kthread_stop(wb->task);
+       if (bdi->wb.task) {
+               thaw_process(bdi->wb.task);
+               kthread_stop(bdi->wb.task);
        }
 }
 
@@ -644,7 +606,9 @@ static void bdi_prune_sb(struct backing_dev_info *bdi)
 void bdi_unregister(struct backing_dev_info *bdi)
 {
        if (bdi->dev) {
+               trace_writeback_bdi_unregister(bdi);
                bdi_prune_sb(bdi);
+               del_timer_sync(&bdi->wb.wakeup_timer);
 
                if (!bdi_cap_flush_forker(bdi))
                        bdi_wb_shutdown(bdi);
@@ -655,6 +619,18 @@ void bdi_unregister(struct backing_dev_info *bdi)
 }
 EXPORT_SYMBOL(bdi_unregister);
 
+static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
+{
+       memset(wb, 0, sizeof(*wb));
+
+       wb->bdi = bdi;
+       wb->last_old_flush = jiffies;
+       INIT_LIST_HEAD(&wb->b_dirty);
+       INIT_LIST_HEAD(&wb->b_io);
+       INIT_LIST_HEAD(&wb->b_more_io);
+       setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi);
+}
+
 int bdi_init(struct backing_dev_info *bdi)
 {
        int i, err;
@@ -666,7 +642,6 @@ int bdi_init(struct backing_dev_info *bdi)
        bdi->max_prop_frac = PROP_FRAC_BASE;
        spin_lock_init(&bdi->wb_lock);
        INIT_LIST_HEAD(&bdi->bdi_list);
-       INIT_LIST_HEAD(&bdi->wb_list);
        INIT_LIST_HEAD(&bdi->work_list);
 
        bdi_wb_init(&bdi->wb, bdi);
index df8202e..0c6258b 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/syscalls.h>
 #include <linux/buffer_head.h>
 #include <linux/pagevec.h>
+#include <trace/events/writeback.h>
 
 /*
  * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited
@@ -535,11 +536,13 @@ static void balance_dirty_pages(struct address_space *mapping,
                 * threshold otherwise wait until the disk writes catch
                 * up.
                 */
+               trace_wbc_balance_dirty_start(&wbc, bdi);
                if (bdi_nr_reclaimable > bdi_thresh) {
                        writeback_inodes_wb(&bdi->wb, &wbc);
                        pages_written += write_chunk - wbc.nr_to_write;
                        get_dirty_limits(&background_thresh, &dirty_thresh,
                                       &bdi_thresh, bdi);
+                       trace_wbc_balance_dirty_written(&wbc, bdi);
                }
 
                /*
@@ -565,6 +568,7 @@ static void balance_dirty_pages(struct address_space *mapping,
                if (pages_written >= write_chunk)
                        break;          /* We've done our duty */
 
+               trace_wbc_balance_dirty_wait(&wbc, bdi);
                __set_current_state(TASK_INTERRUPTIBLE);
                io_schedule_timeout(pause);
 
@@ -962,6 +966,7 @@ continue_unlock:
                        if (!clear_page_dirty_for_io(page))
                                goto continue_unlock;
 
+                       trace_wbc_writepage(wbc, mapping->backing_dev_info);
                        ret = (*writepage)(page, wbc, data);
                        if (unlikely(ret)) {
                                if (ret == AOP_WRITEPAGE_ACTIVATE) {
index 31a3b96..2dee975 100644 (file)
@@ -106,7 +106,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
                goto out;
        }
        if (wbc->sync_mode == WB_SYNC_ALL)
-               rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
+               rw |= REQ_SYNC | REQ_UNPLUG;
        count_vm_event(PSWPOUT);
        set_page_writeback(page);
        unlock_page(page);