2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <rdma/ib_cache.h>
45 #include <linux/atomic.h>
47 #include <scsi/scsi.h>
48 #include <scsi/scsi_device.h>
49 #include <scsi/scsi_dbg.h>
50 #include <scsi/scsi_tcq.h>
52 #include <scsi/scsi_transport_srp.h>
56 #define DRV_NAME "ib_srp"
57 #define PFX DRV_NAME ": "
58 #define DRV_VERSION "2.0"
59 #define DRV_RELDATE "July 26, 2015"
61 MODULE_AUTHOR("Roland Dreier");
62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63 MODULE_LICENSE("Dual BSD/GPL");
64 MODULE_VERSION(DRV_VERSION);
65 MODULE_INFO(release_date, DRV_RELDATE);
67 static unsigned int srp_sg_tablesize;
68 static unsigned int cmd_sg_entries;
69 static unsigned int indirect_sg_entries;
70 static bool allow_ext_sg;
71 static bool prefer_fr = true;
72 static bool register_always = true;
73 static int topspin_workarounds = 1;
75 module_param(srp_sg_tablesize, uint, 0444);
76 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
78 module_param(cmd_sg_entries, uint, 0444);
79 MODULE_PARM_DESC(cmd_sg_entries,
80 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
82 module_param(indirect_sg_entries, uint, 0444);
83 MODULE_PARM_DESC(indirect_sg_entries,
84 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SCSI_MAX_SG_CHAIN_SEGMENTS) ")");
86 module_param(allow_ext_sg, bool, 0444);
87 MODULE_PARM_DESC(allow_ext_sg,
88 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
90 module_param(topspin_workarounds, int, 0444);
91 MODULE_PARM_DESC(topspin_workarounds,
92 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
94 module_param(prefer_fr, bool, 0444);
95 MODULE_PARM_DESC(prefer_fr,
96 "Whether to use fast registration if both FMR and fast registration are supported");
98 module_param(register_always, bool, 0444);
99 MODULE_PARM_DESC(register_always,
100 "Use memory registration even for contiguous memory regions");
102 static const struct kernel_param_ops srp_tmo_ops;
104 static int srp_reconnect_delay = 10;
105 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
107 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
109 static int srp_fast_io_fail_tmo = 15;
110 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
112 MODULE_PARM_DESC(fast_io_fail_tmo,
113 "Number of seconds between the observation of a transport"
114 " layer error and failing all I/O. \"off\" means that this"
115 " functionality is disabled.");
117 static int srp_dev_loss_tmo = 600;
118 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
120 MODULE_PARM_DESC(dev_loss_tmo,
121 "Maximum number of seconds that the SRP transport should"
122 " insulate transport layer errors. After this time has been"
123 " exceeded the SCSI host is removed. Should be"
124 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
125 " if fast_io_fail_tmo has not been set. \"off\" means that"
126 " this functionality is disabled.");
128 static unsigned ch_count;
129 module_param(ch_count, uint, 0444);
130 MODULE_PARM_DESC(ch_count,
131 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
133 static void srp_add_one(struct ib_device *device);
134 static void srp_remove_one(struct ib_device *device, void *client_data);
135 static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr);
136 static void srp_send_completion(struct ib_cq *cq, void *ch_ptr);
137 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
139 static struct scsi_transport_template *ib_srp_transport_template;
140 static struct workqueue_struct *srp_remove_wq;
142 static struct ib_client srp_client = {
145 .remove = srp_remove_one
148 static struct ib_sa_client srp_sa_client;
150 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
152 int tmo = *(int *)kp->arg;
155 return sprintf(buffer, "%d", tmo);
157 return sprintf(buffer, "off");
160 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
164 res = srp_parse_tmo(&tmo, val);
168 if (kp->arg == &srp_reconnect_delay)
169 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
171 else if (kp->arg == &srp_fast_io_fail_tmo)
172 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
174 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
178 *(int *)kp->arg = tmo;
184 static const struct kernel_param_ops srp_tmo_ops = {
189 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
191 return (struct srp_target_port *) host->hostdata;
194 static const char *srp_target_info(struct Scsi_Host *host)
196 return host_to_target(host)->target_name;
199 static int srp_target_is_topspin(struct srp_target_port *target)
201 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
202 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d };
204 return topspin_workarounds &&
205 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
206 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
209 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
211 enum dma_data_direction direction)
215 iu = kmalloc(sizeof *iu, gfp_mask);
219 iu->buf = kzalloc(size, gfp_mask);
223 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
225 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
229 iu->direction = direction;
241 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
246 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
252 static void srp_qp_event(struct ib_event *event, void *context)
254 pr_debug("QP event %s (%d)\n",
255 ib_event_msg(event->event), event->event);
258 static int srp_init_qp(struct srp_target_port *target,
261 struct ib_qp_attr *attr;
264 attr = kmalloc(sizeof *attr, GFP_KERNEL);
268 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
269 target->srp_host->port,
270 be16_to_cpu(target->pkey),
275 attr->qp_state = IB_QPS_INIT;
276 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
277 IB_ACCESS_REMOTE_WRITE);
278 attr->port_num = target->srp_host->port;
280 ret = ib_modify_qp(qp, attr,
291 static int srp_new_cm_id(struct srp_rdma_ch *ch)
293 struct srp_target_port *target = ch->target;
294 struct ib_cm_id *new_cm_id;
296 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
298 if (IS_ERR(new_cm_id))
299 return PTR_ERR(new_cm_id);
302 ib_destroy_cm_id(ch->cm_id);
303 ch->cm_id = new_cm_id;
304 ch->path.sgid = target->sgid;
305 ch->path.dgid = target->orig_dgid;
306 ch->path.pkey = target->pkey;
307 ch->path.service_id = target->service_id;
312 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
314 struct srp_device *dev = target->srp_host->srp_dev;
315 struct ib_fmr_pool_param fmr_param;
317 memset(&fmr_param, 0, sizeof(fmr_param));
318 fmr_param.pool_size = target->scsi_host->can_queue;
319 fmr_param.dirty_watermark = fmr_param.pool_size / 4;
321 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
322 fmr_param.page_shift = ilog2(dev->mr_page_size);
323 fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
324 IB_ACCESS_REMOTE_WRITE |
325 IB_ACCESS_REMOTE_READ);
327 return ib_create_fmr_pool(dev->pd, &fmr_param);
331 * srp_destroy_fr_pool() - free the resources owned by a pool
332 * @pool: Fast registration pool to be destroyed.
334 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
337 struct srp_fr_desc *d;
342 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
350 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
351 * @device: IB device to allocate fast registration descriptors for.
352 * @pd: Protection domain associated with the FR descriptors.
353 * @pool_size: Number of descriptors to allocate.
354 * @max_page_list_len: Maximum fast registration work request page list length.
356 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
357 struct ib_pd *pd, int pool_size,
358 int max_page_list_len)
360 struct srp_fr_pool *pool;
361 struct srp_fr_desc *d;
363 int i, ret = -EINVAL;
368 pool = kzalloc(sizeof(struct srp_fr_pool) +
369 pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
372 pool->size = pool_size;
373 pool->max_page_list_len = max_page_list_len;
374 spin_lock_init(&pool->lock);
375 INIT_LIST_HEAD(&pool->free_list);
377 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
378 mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
385 list_add_tail(&d->entry, &pool->free_list);
392 srp_destroy_fr_pool(pool);
400 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
401 * @pool: Pool to obtain descriptor from.
403 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
405 struct srp_fr_desc *d = NULL;
408 spin_lock_irqsave(&pool->lock, flags);
409 if (!list_empty(&pool->free_list)) {
410 d = list_first_entry(&pool->free_list, typeof(*d), entry);
413 spin_unlock_irqrestore(&pool->lock, flags);
419 * srp_fr_pool_put() - put an FR descriptor back in the free list
420 * @pool: Pool the descriptor was allocated from.
421 * @desc: Pointer to an array of fast registration descriptor pointers.
422 * @n: Number of descriptors to put back.
424 * Note: The caller must already have queued an invalidation request for
425 * desc->mr->rkey before calling this function.
427 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
433 spin_lock_irqsave(&pool->lock, flags);
434 for (i = 0; i < n; i++)
435 list_add(&desc[i]->entry, &pool->free_list);
436 spin_unlock_irqrestore(&pool->lock, flags);
439 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
441 struct srp_device *dev = target->srp_host->srp_dev;
443 return srp_create_fr_pool(dev->dev, dev->pd,
444 target->scsi_host->can_queue,
445 dev->max_pages_per_mr);
449 * srp_destroy_qp() - destroy an RDMA queue pair
450 * @ch: SRP RDMA channel.
452 * Change a queue pair into the error state and wait until all receive
453 * completions have been processed before destroying it. This avoids that
454 * the receive completion handler can access the queue pair while it is
457 static void srp_destroy_qp(struct srp_rdma_ch *ch)
459 static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
460 static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID };
461 struct ib_recv_wr *bad_wr;
464 /* Destroying a QP and reusing ch->done is only safe if not connected */
465 WARN_ON_ONCE(ch->connected);
467 ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE);
468 WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret);
472 init_completion(&ch->done);
473 ret = ib_post_recv(ch->qp, &wr, &bad_wr);
474 WARN_ONCE(ret, "ib_post_recv() returned %d\n", ret);
476 wait_for_completion(&ch->done);
479 ib_destroy_qp(ch->qp);
482 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
484 struct srp_target_port *target = ch->target;
485 struct srp_device *dev = target->srp_host->srp_dev;
486 struct ib_qp_init_attr *init_attr;
487 struct ib_cq *recv_cq, *send_cq;
489 struct ib_fmr_pool *fmr_pool = NULL;
490 struct srp_fr_pool *fr_pool = NULL;
491 const int m = 1 + dev->use_fast_reg;
492 struct ib_cq_init_attr cq_attr = {};
495 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
499 /* + 1 for SRP_LAST_WR_ID */
500 cq_attr.cqe = target->queue_size + 1;
501 cq_attr.comp_vector = ch->comp_vector;
502 recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch,
504 if (IS_ERR(recv_cq)) {
505 ret = PTR_ERR(recv_cq);
509 cq_attr.cqe = m * target->queue_size;
510 cq_attr.comp_vector = ch->comp_vector;
511 send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch,
513 if (IS_ERR(send_cq)) {
514 ret = PTR_ERR(send_cq);
518 ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
520 init_attr->event_handler = srp_qp_event;
521 init_attr->cap.max_send_wr = m * target->queue_size;
522 init_attr->cap.max_recv_wr = target->queue_size + 1;
523 init_attr->cap.max_recv_sge = 1;
524 init_attr->cap.max_send_sge = 1;
525 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
526 init_attr->qp_type = IB_QPT_RC;
527 init_attr->send_cq = send_cq;
528 init_attr->recv_cq = recv_cq;
530 qp = ib_create_qp(dev->pd, init_attr);
536 ret = srp_init_qp(target, qp);
540 if (dev->use_fast_reg) {
541 fr_pool = srp_alloc_fr_pool(target);
542 if (IS_ERR(fr_pool)) {
543 ret = PTR_ERR(fr_pool);
544 shost_printk(KERN_WARNING, target->scsi_host, PFX
545 "FR pool allocation failed (%d)\n", ret);
548 } else if (dev->use_fmr) {
549 fmr_pool = srp_alloc_fmr_pool(target);
550 if (IS_ERR(fmr_pool)) {
551 ret = PTR_ERR(fmr_pool);
552 shost_printk(KERN_WARNING, target->scsi_host, PFX
553 "FMR pool allocation failed (%d)\n", ret);
561 ib_destroy_cq(ch->recv_cq);
563 ib_destroy_cq(ch->send_cq);
566 ch->recv_cq = recv_cq;
567 ch->send_cq = send_cq;
569 if (dev->use_fast_reg) {
571 srp_destroy_fr_pool(ch->fr_pool);
572 ch->fr_pool = fr_pool;
573 } else if (dev->use_fmr) {
575 ib_destroy_fmr_pool(ch->fmr_pool);
576 ch->fmr_pool = fmr_pool;
586 ib_destroy_cq(send_cq);
589 ib_destroy_cq(recv_cq);
597 * Note: this function may be called without srp_alloc_iu_bufs() having been
598 * invoked. Hence the ch->[rt]x_ring checks.
600 static void srp_free_ch_ib(struct srp_target_port *target,
601 struct srp_rdma_ch *ch)
603 struct srp_device *dev = target->srp_host->srp_dev;
610 ib_destroy_cm_id(ch->cm_id);
614 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
618 if (dev->use_fast_reg) {
620 srp_destroy_fr_pool(ch->fr_pool);
621 } else if (dev->use_fmr) {
623 ib_destroy_fmr_pool(ch->fmr_pool);
626 ib_destroy_cq(ch->send_cq);
627 ib_destroy_cq(ch->recv_cq);
630 * Avoid that the SCSI error handler tries to use this channel after
631 * it has been freed. The SCSI error handler can namely continue
632 * trying to perform recovery actions after scsi_remove_host()
638 ch->send_cq = ch->recv_cq = NULL;
641 for (i = 0; i < target->queue_size; ++i)
642 srp_free_iu(target->srp_host, ch->rx_ring[i]);
647 for (i = 0; i < target->queue_size; ++i)
648 srp_free_iu(target->srp_host, ch->tx_ring[i]);
654 static void srp_path_rec_completion(int status,
655 struct ib_sa_path_rec *pathrec,
658 struct srp_rdma_ch *ch = ch_ptr;
659 struct srp_target_port *target = ch->target;
663 shost_printk(KERN_ERR, target->scsi_host,
664 PFX "Got failed path rec status %d\n", status);
670 static int srp_lookup_path(struct srp_rdma_ch *ch)
672 struct srp_target_port *target = ch->target;
675 ch->path.numb_path = 1;
677 init_completion(&ch->done);
679 ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
680 target->srp_host->srp_dev->dev,
681 target->srp_host->port,
683 IB_SA_PATH_REC_SERVICE_ID |
684 IB_SA_PATH_REC_DGID |
685 IB_SA_PATH_REC_SGID |
686 IB_SA_PATH_REC_NUMB_PATH |
688 SRP_PATH_REC_TIMEOUT_MS,
690 srp_path_rec_completion,
691 ch, &ch->path_query);
692 if (ch->path_query_id < 0)
693 return ch->path_query_id;
695 ret = wait_for_completion_interruptible(&ch->done);
700 shost_printk(KERN_WARNING, target->scsi_host,
701 PFX "Path record query failed\n");
706 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
708 struct srp_target_port *target = ch->target;
710 struct ib_cm_req_param param;
711 struct srp_login_req priv;
715 req = kzalloc(sizeof *req, GFP_KERNEL);
719 req->param.primary_path = &ch->path;
720 req->param.alternate_path = NULL;
721 req->param.service_id = target->service_id;
722 req->param.qp_num = ch->qp->qp_num;
723 req->param.qp_type = ch->qp->qp_type;
724 req->param.private_data = &req->priv;
725 req->param.private_data_len = sizeof req->priv;
726 req->param.flow_control = 1;
728 get_random_bytes(&req->param.starting_psn, 4);
729 req->param.starting_psn &= 0xffffff;
732 * Pick some arbitrary defaults here; we could make these
733 * module parameters if anyone cared about setting them.
735 req->param.responder_resources = 4;
736 req->param.remote_cm_response_timeout = 20;
737 req->param.local_cm_response_timeout = 20;
738 req->param.retry_count = target->tl_retry_count;
739 req->param.rnr_retry_count = 7;
740 req->param.max_cm_retries = 15;
742 req->priv.opcode = SRP_LOGIN_REQ;
744 req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
745 req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
746 SRP_BUF_FORMAT_INDIRECT);
747 req->priv.req_flags = (multich ? SRP_MULTICHAN_MULTI :
748 SRP_MULTICHAN_SINGLE);
750 * In the published SRP specification (draft rev. 16a), the
751 * port identifier format is 8 bytes of ID extension followed
752 * by 8 bytes of GUID. Older drafts put the two halves in the
753 * opposite order, so that the GUID comes first.
755 * Targets conforming to these obsolete drafts can be
756 * recognized by the I/O Class they report.
758 if (target->io_class == SRP_REV10_IB_IO_CLASS) {
759 memcpy(req->priv.initiator_port_id,
760 &target->sgid.global.interface_id, 8);
761 memcpy(req->priv.initiator_port_id + 8,
762 &target->initiator_ext, 8);
763 memcpy(req->priv.target_port_id, &target->ioc_guid, 8);
764 memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
766 memcpy(req->priv.initiator_port_id,
767 &target->initiator_ext, 8);
768 memcpy(req->priv.initiator_port_id + 8,
769 &target->sgid.global.interface_id, 8);
770 memcpy(req->priv.target_port_id, &target->id_ext, 8);
771 memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
775 * Topspin/Cisco SRP targets will reject our login unless we
776 * zero out the first 8 bytes of our initiator port ID and set
777 * the second 8 bytes to the local node GUID.
779 if (srp_target_is_topspin(target)) {
780 shost_printk(KERN_DEBUG, target->scsi_host,
781 PFX "Topspin/Cisco initiator port ID workaround "
782 "activated for target GUID %016llx\n",
783 be64_to_cpu(target->ioc_guid));
784 memset(req->priv.initiator_port_id, 0, 8);
785 memcpy(req->priv.initiator_port_id + 8,
786 &target->srp_host->srp_dev->dev->node_guid, 8);
789 status = ib_send_cm_req(ch->cm_id, &req->param);
796 static bool srp_queue_remove_work(struct srp_target_port *target)
798 bool changed = false;
800 spin_lock_irq(&target->lock);
801 if (target->state != SRP_TARGET_REMOVED) {
802 target->state = SRP_TARGET_REMOVED;
805 spin_unlock_irq(&target->lock);
808 queue_work(srp_remove_wq, &target->remove_work);
813 static void srp_disconnect_target(struct srp_target_port *target)
815 struct srp_rdma_ch *ch;
818 /* XXX should send SRP_I_LOGOUT request */
820 for (i = 0; i < target->ch_count; i++) {
822 ch->connected = false;
823 if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
824 shost_printk(KERN_DEBUG, target->scsi_host,
825 PFX "Sending CM DREQ failed\n");
830 static void srp_free_req_data(struct srp_target_port *target,
831 struct srp_rdma_ch *ch)
833 struct srp_device *dev = target->srp_host->srp_dev;
834 struct ib_device *ibdev = dev->dev;
835 struct srp_request *req;
841 for (i = 0; i < target->req_ring_size; ++i) {
842 req = &ch->req_ring[i];
843 if (dev->use_fast_reg) {
846 kfree(req->fmr_list);
847 kfree(req->map_page);
849 if (req->indirect_dma_addr) {
850 ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
851 target->indirect_size,
854 kfree(req->indirect_desc);
861 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
863 struct srp_target_port *target = ch->target;
864 struct srp_device *srp_dev = target->srp_host->srp_dev;
865 struct ib_device *ibdev = srp_dev->dev;
866 struct srp_request *req;
869 int i, ret = -ENOMEM;
871 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
876 for (i = 0; i < target->req_ring_size; ++i) {
877 req = &ch->req_ring[i];
878 mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
882 if (srp_dev->use_fast_reg) {
883 req->fr_list = mr_list;
885 req->fmr_list = mr_list;
886 req->map_page = kmalloc(srp_dev->max_pages_per_mr *
887 sizeof(void *), GFP_KERNEL);
891 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
892 if (!req->indirect_desc)
895 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
896 target->indirect_size,
898 if (ib_dma_mapping_error(ibdev, dma_addr))
901 req->indirect_dma_addr = dma_addr;
910 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
911 * @shost: SCSI host whose attributes to remove from sysfs.
913 * Note: Any attributes defined in the host template and that did not exist
914 * before invocation of this function will be ignored.
916 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
918 struct device_attribute **attr;
920 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
921 device_remove_file(&shost->shost_dev, *attr);
924 static void srp_remove_target(struct srp_target_port *target)
926 struct srp_rdma_ch *ch;
929 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
931 srp_del_scsi_host_attr(target->scsi_host);
932 srp_rport_get(target->rport);
933 srp_remove_host(target->scsi_host);
934 scsi_remove_host(target->scsi_host);
935 srp_stop_rport_timers(target->rport);
936 srp_disconnect_target(target);
937 for (i = 0; i < target->ch_count; i++) {
939 srp_free_ch_ib(target, ch);
941 cancel_work_sync(&target->tl_err_work);
942 srp_rport_put(target->rport);
943 for (i = 0; i < target->ch_count; i++) {
945 srp_free_req_data(target, ch);
950 spin_lock(&target->srp_host->target_lock);
951 list_del(&target->list);
952 spin_unlock(&target->srp_host->target_lock);
954 scsi_host_put(target->scsi_host);
957 static void srp_remove_work(struct work_struct *work)
959 struct srp_target_port *target =
960 container_of(work, struct srp_target_port, remove_work);
962 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
964 srp_remove_target(target);
967 static void srp_rport_delete(struct srp_rport *rport)
969 struct srp_target_port *target = rport->lld_data;
971 srp_queue_remove_work(target);
975 * srp_connected_ch() - number of connected channels
976 * @target: SRP target port.
978 static int srp_connected_ch(struct srp_target_port *target)
982 for (i = 0; i < target->ch_count; i++)
983 c += target->ch[i].connected;
988 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
990 struct srp_target_port *target = ch->target;
993 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
995 ret = srp_lookup_path(ch);
1000 init_completion(&ch->done);
1001 ret = srp_send_req(ch, multich);
1004 ret = wait_for_completion_interruptible(&ch->done);
1009 * The CM event handling code will set status to
1010 * SRP_PORT_REDIRECT if we get a port redirect REJ
1011 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1012 * redirect REJ back.
1014 switch (ch->status) {
1016 ch->connected = true;
1019 case SRP_PORT_REDIRECT:
1020 ret = srp_lookup_path(ch);
1025 case SRP_DLID_REDIRECT:
1028 case SRP_STALE_CONN:
1029 shost_printk(KERN_ERR, target->scsi_host, PFX
1030 "giving up on stale connection\n");
1031 ch->status = -ECONNRESET;
1040 static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey)
1042 struct ib_send_wr *bad_wr;
1043 struct ib_send_wr wr = {
1044 .opcode = IB_WR_LOCAL_INV,
1045 .wr_id = LOCAL_INV_WR_ID_MASK,
1049 .ex.invalidate_rkey = rkey,
1052 return ib_post_send(ch->qp, &wr, &bad_wr);
1055 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1056 struct srp_rdma_ch *ch,
1057 struct srp_request *req)
1059 struct srp_target_port *target = ch->target;
1060 struct srp_device *dev = target->srp_host->srp_dev;
1061 struct ib_device *ibdev = dev->dev;
1064 if (!scsi_sglist(scmnd) ||
1065 (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1066 scmnd->sc_data_direction != DMA_FROM_DEVICE))
1069 if (dev->use_fast_reg) {
1070 struct srp_fr_desc **pfr;
1072 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1073 res = srp_inv_rkey(ch, (*pfr)->mr->rkey);
1075 shost_printk(KERN_ERR, target->scsi_host, PFX
1076 "Queueing INV WR for rkey %#x failed (%d)\n",
1077 (*pfr)->mr->rkey, res);
1078 queue_work(system_long_wq,
1079 &target->tl_err_work);
1083 srp_fr_pool_put(ch->fr_pool, req->fr_list,
1085 } else if (dev->use_fmr) {
1086 struct ib_pool_fmr **pfmr;
1088 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1089 ib_fmr_pool_unmap(*pfmr);
1092 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1093 scmnd->sc_data_direction);
1097 * srp_claim_req - Take ownership of the scmnd associated with a request.
1098 * @ch: SRP RDMA channel.
1099 * @req: SRP request.
1100 * @sdev: If not NULL, only take ownership for this SCSI device.
1101 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1102 * ownership of @req->scmnd if it equals @scmnd.
1105 * Either NULL or a pointer to the SCSI command the caller became owner of.
1107 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1108 struct srp_request *req,
1109 struct scsi_device *sdev,
1110 struct scsi_cmnd *scmnd)
1112 unsigned long flags;
1114 spin_lock_irqsave(&ch->lock, flags);
1116 (!sdev || req->scmnd->device == sdev) &&
1117 (!scmnd || req->scmnd == scmnd)) {
1123 spin_unlock_irqrestore(&ch->lock, flags);
1129 * srp_free_req() - Unmap data and add request to the free request list.
1130 * @ch: SRP RDMA channel.
1131 * @req: Request to be freed.
1132 * @scmnd: SCSI command associated with @req.
1133 * @req_lim_delta: Amount to be added to @target->req_lim.
1135 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1136 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1138 unsigned long flags;
1140 srp_unmap_data(scmnd, ch, req);
1142 spin_lock_irqsave(&ch->lock, flags);
1143 ch->req_lim += req_lim_delta;
1144 spin_unlock_irqrestore(&ch->lock, flags);
1147 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1148 struct scsi_device *sdev, int result)
1150 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1153 srp_free_req(ch, req, scmnd, 0);
1154 scmnd->result = result;
1155 scmnd->scsi_done(scmnd);
1159 static void srp_terminate_io(struct srp_rport *rport)
1161 struct srp_target_port *target = rport->lld_data;
1162 struct srp_rdma_ch *ch;
1163 struct Scsi_Host *shost = target->scsi_host;
1164 struct scsi_device *sdev;
1168 * Invoking srp_terminate_io() while srp_queuecommand() is running
1169 * is not safe. Hence the warning statement below.
1171 shost_for_each_device(sdev, shost)
1172 WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1174 for (i = 0; i < target->ch_count; i++) {
1175 ch = &target->ch[i];
1177 for (j = 0; j < target->req_ring_size; ++j) {
1178 struct srp_request *req = &ch->req_ring[j];
1180 srp_finish_req(ch, req, NULL,
1181 DID_TRANSPORT_FAILFAST << 16);
1187 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1188 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1189 * srp_reset_device() or srp_reset_host() calls will occur while this function
1190 * is in progress. One way to realize that is not to call this function
1191 * directly but to call srp_reconnect_rport() instead since that last function
1192 * serializes calls of this function via rport->mutex and also blocks
1193 * srp_queuecommand() calls before invoking this function.
1195 static int srp_rport_reconnect(struct srp_rport *rport)
1197 struct srp_target_port *target = rport->lld_data;
1198 struct srp_rdma_ch *ch;
1200 bool multich = false;
1202 srp_disconnect_target(target);
1204 if (target->state == SRP_TARGET_SCANNING)
1208 * Now get a new local CM ID so that we avoid confusing the target in
1209 * case things are really fouled up. Doing so also ensures that all CM
1210 * callbacks will have finished before a new QP is allocated.
1212 for (i = 0; i < target->ch_count; i++) {
1213 ch = &target->ch[i];
1214 ret += srp_new_cm_id(ch);
1216 for (i = 0; i < target->ch_count; i++) {
1217 ch = &target->ch[i];
1218 for (j = 0; j < target->req_ring_size; ++j) {
1219 struct srp_request *req = &ch->req_ring[j];
1221 srp_finish_req(ch, req, NULL, DID_RESET << 16);
1224 for (i = 0; i < target->ch_count; i++) {
1225 ch = &target->ch[i];
1227 * Whether or not creating a new CM ID succeeded, create a new
1228 * QP. This guarantees that all completion callback function
1229 * invocations have finished before request resetting starts.
1231 ret += srp_create_ch_ib(ch);
1233 INIT_LIST_HEAD(&ch->free_tx);
1234 for (j = 0; j < target->queue_size; ++j)
1235 list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1238 target->qp_in_error = false;
1240 for (i = 0; i < target->ch_count; i++) {
1241 ch = &target->ch[i];
1244 ret = srp_connect_ch(ch, multich);
1249 shost_printk(KERN_INFO, target->scsi_host,
1250 PFX "reconnect succeeded\n");
1255 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1256 unsigned int dma_len, u32 rkey)
1258 struct srp_direct_buf *desc = state->desc;
1260 WARN_ON_ONCE(!dma_len);
1262 desc->va = cpu_to_be64(dma_addr);
1263 desc->key = cpu_to_be32(rkey);
1264 desc->len = cpu_to_be32(dma_len);
1266 state->total_len += dma_len;
1271 static int srp_map_finish_fmr(struct srp_map_state *state,
1272 struct srp_rdma_ch *ch)
1274 struct srp_target_port *target = ch->target;
1275 struct srp_device *dev = target->srp_host->srp_dev;
1276 struct ib_pool_fmr *fmr;
1279 if (state->fmr.next >= state->fmr.end)
1282 WARN_ON_ONCE(!dev->use_fmr);
1284 if (state->npages == 0)
1287 if (state->npages == 1 && target->global_mr) {
1288 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1289 target->global_mr->rkey);
1293 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1294 state->npages, io_addr);
1296 return PTR_ERR(fmr);
1298 *state->fmr.next++ = fmr;
1301 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1302 state->dma_len, fmr->fmr->rkey);
1311 static int srp_map_finish_fr(struct srp_map_state *state,
1312 struct srp_rdma_ch *ch)
1314 struct srp_target_port *target = ch->target;
1315 struct srp_device *dev = target->srp_host->srp_dev;
1316 struct ib_send_wr *bad_wr;
1317 struct ib_reg_wr wr;
1318 struct srp_fr_desc *desc;
1322 if (state->fr.next >= state->fr.end)
1325 WARN_ON_ONCE(!dev->use_fast_reg);
1327 if (state->sg_nents == 0)
1330 if (state->sg_nents == 1 && target->global_mr) {
1331 srp_map_desc(state, sg_dma_address(state->sg),
1332 sg_dma_len(state->sg),
1333 target->global_mr->rkey);
1337 desc = srp_fr_pool_get(ch->fr_pool);
1341 rkey = ib_inc_rkey(desc->mr->rkey);
1342 ib_update_fast_reg_key(desc->mr, rkey);
1344 n = ib_map_mr_sg(desc->mr, state->sg, state->sg_nents,
1346 if (unlikely(n < 0))
1350 wr.wr.opcode = IB_WR_REG_MR;
1351 wr.wr.wr_id = FAST_REG_WR_ID_MASK;
1353 wr.wr.send_flags = 0;
1355 wr.key = desc->mr->rkey;
1356 wr.access = (IB_ACCESS_LOCAL_WRITE |
1357 IB_ACCESS_REMOTE_READ |
1358 IB_ACCESS_REMOTE_WRITE);
1360 *state->fr.next++ = desc;
1363 srp_map_desc(state, desc->mr->iova,
1364 desc->mr->length, desc->mr->rkey);
1366 err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
1373 static int srp_map_sg_entry(struct srp_map_state *state,
1374 struct srp_rdma_ch *ch,
1375 struct scatterlist *sg, int sg_index)
1377 struct srp_target_port *target = ch->target;
1378 struct srp_device *dev = target->srp_host->srp_dev;
1379 struct ib_device *ibdev = dev->dev;
1380 dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1381 unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1382 unsigned int len = 0;
1385 WARN_ON_ONCE(!dma_len);
1388 unsigned offset = dma_addr & ~dev->mr_page_mask;
1389 if (state->npages == dev->max_pages_per_mr || offset != 0) {
1390 ret = srp_map_finish_fmr(state, ch);
1395 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1398 state->base_dma_addr = dma_addr;
1399 state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1400 state->dma_len += len;
1406 * If the last entry of the MR wasn't a full page, then we need to
1407 * close it out and start a new one -- we can only merge at page
1411 if (len != dev->mr_page_size)
1412 ret = srp_map_finish_fmr(state, ch);
1416 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1417 struct srp_request *req, struct scatterlist *scat,
1420 struct scatterlist *sg;
1423 state->desc = req->indirect_desc;
1424 state->pages = req->map_page;
1425 state->fmr.next = req->fmr_list;
1426 state->fmr.end = req->fmr_list + ch->target->cmd_sg_cnt;
1428 for_each_sg(scat, sg, count, i) {
1429 ret = srp_map_sg_entry(state, ch, sg, i);
1434 ret = srp_map_finish_fmr(state, ch);
1438 req->nmdesc = state->nmdesc;
1443 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1444 struct srp_request *req, struct scatterlist *scat,
1447 state->desc = req->indirect_desc;
1448 state->fr.next = req->fr_list;
1449 state->fr.end = req->fr_list + ch->target->cmd_sg_cnt;
1451 state->sg_nents = scsi_sg_count(req->scmnd);
1453 while (state->sg_nents) {
1456 n = srp_map_finish_fr(state, ch);
1457 if (unlikely(n < 0))
1460 state->sg_nents -= n;
1461 for (i = 0; i < n; i++)
1462 state->sg = sg_next(state->sg);
1465 req->nmdesc = state->nmdesc;
1470 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1471 struct srp_request *req, struct scatterlist *scat,
1474 struct srp_target_port *target = ch->target;
1475 struct srp_device *dev = target->srp_host->srp_dev;
1476 struct scatterlist *sg;
1479 state->desc = req->indirect_desc;
1480 for_each_sg(scat, sg, count, i) {
1481 srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1482 ib_sg_dma_len(dev->dev, sg),
1483 target->global_mr->rkey);
1486 req->nmdesc = state->nmdesc;
1492 * Register the indirect data buffer descriptor with the HCA.
1494 * Note: since the indirect data buffer descriptor has been allocated with
1495 * kmalloc() it is guaranteed that this buffer is a physically contiguous
1498 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1499 void **next_mr, void **end_mr, u32 idb_len,
1502 struct srp_target_port *target = ch->target;
1503 struct srp_device *dev = target->srp_host->srp_dev;
1504 struct srp_map_state state;
1505 struct srp_direct_buf idb_desc;
1507 struct scatterlist idb_sg[1];
1510 memset(&state, 0, sizeof(state));
1511 memset(&idb_desc, 0, sizeof(idb_desc));
1512 state.gen.next = next_mr;
1513 state.gen.end = end_mr;
1514 state.desc = &idb_desc;
1515 state.base_dma_addr = req->indirect_dma_addr;
1516 state.dma_len = idb_len;
1518 if (dev->use_fast_reg) {
1521 sg_set_buf(idb_sg, req->indirect_desc, idb_len);
1522 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1523 ret = srp_map_finish_fr(&state, ch);
1526 } else if (dev->use_fmr) {
1527 state.pages = idb_pages;
1528 state.pages[0] = (req->indirect_dma_addr &
1531 ret = srp_map_finish_fmr(&state, ch);
1538 *idb_rkey = idb_desc.key;
1543 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1544 struct srp_request *req)
1546 struct srp_target_port *target = ch->target;
1547 struct scatterlist *scat;
1548 struct srp_cmd *cmd = req->cmd->buf;
1549 int len, nents, count, ret;
1550 struct srp_device *dev;
1551 struct ib_device *ibdev;
1552 struct srp_map_state state;
1553 struct srp_indirect_buf *indirect_hdr;
1554 u32 idb_len, table_len;
1558 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1559 return sizeof (struct srp_cmd);
1561 if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1562 scmnd->sc_data_direction != DMA_TO_DEVICE) {
1563 shost_printk(KERN_WARNING, target->scsi_host,
1564 PFX "Unhandled data direction %d\n",
1565 scmnd->sc_data_direction);
1569 nents = scsi_sg_count(scmnd);
1570 scat = scsi_sglist(scmnd);
1572 dev = target->srp_host->srp_dev;
1575 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1576 if (unlikely(count == 0))
1579 fmt = SRP_DATA_DESC_DIRECT;
1580 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
1582 if (count == 1 && target->global_mr) {
1584 * The midlayer only generated a single gather/scatter
1585 * entry, or DMA mapping coalesced everything to a
1586 * single entry. So a direct descriptor along with
1587 * the DMA MR suffices.
1589 struct srp_direct_buf *buf = (void *) cmd->add_data;
1591 buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1592 buf->key = cpu_to_be32(target->global_mr->rkey);
1593 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1600 * We have more than one scatter/gather entry, so build our indirect
1601 * descriptor table, trying to merge as many entries as we can.
1603 indirect_hdr = (void *) cmd->add_data;
1605 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1606 target->indirect_size, DMA_TO_DEVICE);
1608 memset(&state, 0, sizeof(state));
1609 if (dev->use_fast_reg)
1610 srp_map_sg_fr(&state, ch, req, scat, count);
1611 else if (dev->use_fmr)
1612 srp_map_sg_fmr(&state, ch, req, scat, count);
1614 srp_map_sg_dma(&state, ch, req, scat, count);
1616 /* We've mapped the request, now pull as much of the indirect
1617 * descriptor table as we can into the command buffer. If this
1618 * target is not using an external indirect table, we are
1619 * guaranteed to fit into the command, as the SCSI layer won't
1620 * give us more S/G entries than we allow.
1622 if (state.ndesc == 1) {
1624 * Memory registration collapsed the sg-list into one entry,
1625 * so use a direct descriptor.
1627 struct srp_direct_buf *buf = (void *) cmd->add_data;
1629 *buf = req->indirect_desc[0];
1633 if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1634 !target->allow_ext_sg)) {
1635 shost_printk(KERN_ERR, target->scsi_host,
1636 "Could not fit S/G list into SRP_CMD\n");
1640 count = min(state.ndesc, target->cmd_sg_cnt);
1641 table_len = state.ndesc * sizeof (struct srp_direct_buf);
1642 idb_len = sizeof(struct srp_indirect_buf) + table_len;
1644 fmt = SRP_DATA_DESC_INDIRECT;
1645 len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1646 len += count * sizeof (struct srp_direct_buf);
1648 memcpy(indirect_hdr->desc_list, req->indirect_desc,
1649 count * sizeof (struct srp_direct_buf));
1651 if (!target->global_mr) {
1652 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1653 idb_len, &idb_rkey);
1658 idb_rkey = target->global_mr->rkey;
1661 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1662 indirect_hdr->table_desc.key = idb_rkey;
1663 indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1664 indirect_hdr->len = cpu_to_be32(state.total_len);
1666 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1667 cmd->data_out_desc_cnt = count;
1669 cmd->data_in_desc_cnt = count;
1671 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1675 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1676 cmd->buf_fmt = fmt << 4;
1684 * Return an IU and possible credit to the free pool
1686 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1687 enum srp_iu_type iu_type)
1689 unsigned long flags;
1691 spin_lock_irqsave(&ch->lock, flags);
1692 list_add(&iu->list, &ch->free_tx);
1693 if (iu_type != SRP_IU_RSP)
1695 spin_unlock_irqrestore(&ch->lock, flags);
1699 * Must be called with ch->lock held to protect req_lim and free_tx.
1700 * If IU is not sent, it must be returned using srp_put_tx_iu().
1703 * An upper limit for the number of allocated information units for each
1705 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1706 * more than Scsi_Host.can_queue requests.
1707 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1708 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1709 * one unanswered SRP request to an initiator.
1711 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1712 enum srp_iu_type iu_type)
1714 struct srp_target_port *target = ch->target;
1715 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1718 srp_send_completion(ch->send_cq, ch);
1720 if (list_empty(&ch->free_tx))
1723 /* Initiator responses to target requests do not consume credits */
1724 if (iu_type != SRP_IU_RSP) {
1725 if (ch->req_lim <= rsv) {
1726 ++target->zero_req_lim;
1733 iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1734 list_del(&iu->list);
1738 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1740 struct srp_target_port *target = ch->target;
1742 struct ib_send_wr wr, *bad_wr;
1744 list.addr = iu->dma;
1746 list.lkey = target->lkey;
1749 wr.wr_id = (uintptr_t) iu;
1752 wr.opcode = IB_WR_SEND;
1753 wr.send_flags = IB_SEND_SIGNALED;
1755 return ib_post_send(ch->qp, &wr, &bad_wr);
1758 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1760 struct srp_target_port *target = ch->target;
1761 struct ib_recv_wr wr, *bad_wr;
1764 list.addr = iu->dma;
1765 list.length = iu->size;
1766 list.lkey = target->lkey;
1769 wr.wr_id = (uintptr_t) iu;
1773 return ib_post_recv(ch->qp, &wr, &bad_wr);
1776 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1778 struct srp_target_port *target = ch->target;
1779 struct srp_request *req;
1780 struct scsi_cmnd *scmnd;
1781 unsigned long flags;
1783 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1784 spin_lock_irqsave(&ch->lock, flags);
1785 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1786 spin_unlock_irqrestore(&ch->lock, flags);
1788 ch->tsk_mgmt_status = -1;
1789 if (be32_to_cpu(rsp->resp_data_len) >= 4)
1790 ch->tsk_mgmt_status = rsp->data[3];
1791 complete(&ch->tsk_mgmt_done);
1793 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1795 req = (void *)scmnd->host_scribble;
1796 scmnd = srp_claim_req(ch, req, NULL, scmnd);
1799 shost_printk(KERN_ERR, target->scsi_host,
1800 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1801 rsp->tag, ch - target->ch, ch->qp->qp_num);
1803 spin_lock_irqsave(&ch->lock, flags);
1804 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1805 spin_unlock_irqrestore(&ch->lock, flags);
1809 scmnd->result = rsp->status;
1811 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1812 memcpy(scmnd->sense_buffer, rsp->data +
1813 be32_to_cpu(rsp->resp_data_len),
1814 min_t(int, be32_to_cpu(rsp->sense_data_len),
1815 SCSI_SENSE_BUFFERSIZE));
1818 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1819 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1820 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1821 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1822 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1823 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1824 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1825 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1827 srp_free_req(ch, req, scmnd,
1828 be32_to_cpu(rsp->req_lim_delta));
1830 scmnd->host_scribble = NULL;
1831 scmnd->scsi_done(scmnd);
1835 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1838 struct srp_target_port *target = ch->target;
1839 struct ib_device *dev = target->srp_host->srp_dev->dev;
1840 unsigned long flags;
1844 spin_lock_irqsave(&ch->lock, flags);
1845 ch->req_lim += req_delta;
1846 iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
1847 spin_unlock_irqrestore(&ch->lock, flags);
1850 shost_printk(KERN_ERR, target->scsi_host, PFX
1851 "no IU available to send response\n");
1855 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1856 memcpy(iu->buf, rsp, len);
1857 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1859 err = srp_post_send(ch, iu, len);
1861 shost_printk(KERN_ERR, target->scsi_host, PFX
1862 "unable to post response: %d\n", err);
1863 srp_put_tx_iu(ch, iu, SRP_IU_RSP);
1869 static void srp_process_cred_req(struct srp_rdma_ch *ch,
1870 struct srp_cred_req *req)
1872 struct srp_cred_rsp rsp = {
1873 .opcode = SRP_CRED_RSP,
1876 s32 delta = be32_to_cpu(req->req_lim_delta);
1878 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1879 shost_printk(KERN_ERR, ch->target->scsi_host, PFX
1880 "problems processing SRP_CRED_REQ\n");
1883 static void srp_process_aer_req(struct srp_rdma_ch *ch,
1884 struct srp_aer_req *req)
1886 struct srp_target_port *target = ch->target;
1887 struct srp_aer_rsp rsp = {
1888 .opcode = SRP_AER_RSP,
1891 s32 delta = be32_to_cpu(req->req_lim_delta);
1893 shost_printk(KERN_ERR, target->scsi_host, PFX
1894 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
1896 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1897 shost_printk(KERN_ERR, target->scsi_host, PFX
1898 "problems processing SRP_AER_REQ\n");
1901 static void srp_handle_recv(struct srp_rdma_ch *ch, struct ib_wc *wc)
1903 struct srp_target_port *target = ch->target;
1904 struct ib_device *dev = target->srp_host->srp_dev->dev;
1905 struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id;
1909 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
1912 opcode = *(u8 *) iu->buf;
1915 shost_printk(KERN_ERR, target->scsi_host,
1916 PFX "recv completion, opcode 0x%02x\n", opcode);
1917 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
1918 iu->buf, wc->byte_len, true);
1923 srp_process_rsp(ch, iu->buf);
1927 srp_process_cred_req(ch, iu->buf);
1931 srp_process_aer_req(ch, iu->buf);
1935 /* XXX Handle target logout */
1936 shost_printk(KERN_WARNING, target->scsi_host,
1937 PFX "Got target logout request\n");
1941 shost_printk(KERN_WARNING, target->scsi_host,
1942 PFX "Unhandled SRP opcode 0x%02x\n", opcode);
1946 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
1949 res = srp_post_recv(ch, iu);
1951 shost_printk(KERN_ERR, target->scsi_host,
1952 PFX "Recv failed with error code %d\n", res);
1956 * srp_tl_err_work() - handle a transport layer error
1957 * @work: Work structure embedded in an SRP target port.
1959 * Note: This function may get invoked before the rport has been created,
1960 * hence the target->rport test.
1962 static void srp_tl_err_work(struct work_struct *work)
1964 struct srp_target_port *target;
1966 target = container_of(work, struct srp_target_port, tl_err_work);
1968 srp_start_tl_fail_timers(target->rport);
1971 static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
1972 bool send_err, struct srp_rdma_ch *ch)
1974 struct srp_target_port *target = ch->target;
1976 if (wr_id == SRP_LAST_WR_ID) {
1977 complete(&ch->done);
1981 if (ch->connected && !target->qp_in_error) {
1982 if (wr_id & LOCAL_INV_WR_ID_MASK) {
1983 shost_printk(KERN_ERR, target->scsi_host, PFX
1984 "LOCAL_INV failed with status %s (%d)\n",
1985 ib_wc_status_msg(wc_status), wc_status);
1986 } else if (wr_id & FAST_REG_WR_ID_MASK) {
1987 shost_printk(KERN_ERR, target->scsi_host, PFX
1988 "FAST_REG_MR failed status %s (%d)\n",
1989 ib_wc_status_msg(wc_status), wc_status);
1991 shost_printk(KERN_ERR, target->scsi_host,
1992 PFX "failed %s status %s (%d) for iu %p\n",
1993 send_err ? "send" : "receive",
1994 ib_wc_status_msg(wc_status), wc_status,
1995 (void *)(uintptr_t)wr_id);
1997 queue_work(system_long_wq, &target->tl_err_work);
1999 target->qp_in_error = true;
2002 static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr)
2004 struct srp_rdma_ch *ch = ch_ptr;
2007 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
2008 while (ib_poll_cq(cq, 1, &wc) > 0) {
2009 if (likely(wc.status == IB_WC_SUCCESS)) {
2010 srp_handle_recv(ch, &wc);
2012 srp_handle_qp_err(wc.wr_id, wc.status, false, ch);
2017 static void srp_send_completion(struct ib_cq *cq, void *ch_ptr)
2019 struct srp_rdma_ch *ch = ch_ptr;
2023 while (ib_poll_cq(cq, 1, &wc) > 0) {
2024 if (likely(wc.status == IB_WC_SUCCESS)) {
2025 iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
2026 list_add(&iu->list, &ch->free_tx);
2028 srp_handle_qp_err(wc.wr_id, wc.status, true, ch);
2033 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2035 struct srp_target_port *target = host_to_target(shost);
2036 struct srp_rport *rport = target->rport;
2037 struct srp_rdma_ch *ch;
2038 struct srp_request *req;
2040 struct srp_cmd *cmd;
2041 struct ib_device *dev;
2042 unsigned long flags;
2046 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
2049 * The SCSI EH thread is the only context from which srp_queuecommand()
2050 * can get invoked for blocked devices (SDEV_BLOCK /
2051 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2052 * locking the rport mutex if invoked from inside the SCSI EH.
2055 mutex_lock(&rport->mutex);
2057 scmnd->result = srp_chkready(target->rport);
2058 if (unlikely(scmnd->result))
2061 WARN_ON_ONCE(scmnd->request->tag < 0);
2062 tag = blk_mq_unique_tag(scmnd->request);
2063 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2064 idx = blk_mq_unique_tag_to_tag(tag);
2065 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2066 dev_name(&shost->shost_gendev), tag, idx,
2067 target->req_ring_size);
2069 spin_lock_irqsave(&ch->lock, flags);
2070 iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2071 spin_unlock_irqrestore(&ch->lock, flags);
2076 req = &ch->req_ring[idx];
2077 dev = target->srp_host->srp_dev->dev;
2078 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2081 scmnd->host_scribble = (void *) req;
2084 memset(cmd, 0, sizeof *cmd);
2086 cmd->opcode = SRP_CMD;
2087 int_to_scsilun(scmnd->device->lun, &cmd->lun);
2089 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2094 len = srp_map_data(scmnd, ch, req);
2096 shost_printk(KERN_ERR, target->scsi_host,
2097 PFX "Failed to map data (%d)\n", len);
2099 * If we ran out of memory descriptors (-ENOMEM) because an
2100 * application is queuing many requests with more than
2101 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2102 * to reduce queue depth temporarily.
2104 scmnd->result = len == -ENOMEM ?
2105 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2109 ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2112 if (srp_post_send(ch, iu, len)) {
2113 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2121 mutex_unlock(&rport->mutex);
2126 srp_unmap_data(scmnd, ch, req);
2129 srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2132 * Avoid that the loops that iterate over the request ring can
2133 * encounter a dangling SCSI command pointer.
2138 if (scmnd->result) {
2139 scmnd->scsi_done(scmnd);
2142 ret = SCSI_MLQUEUE_HOST_BUSY;
2149 * Note: the resources allocated in this function are freed in
2152 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2154 struct srp_target_port *target = ch->target;
2157 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2161 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2166 for (i = 0; i < target->queue_size; ++i) {
2167 ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2169 GFP_KERNEL, DMA_FROM_DEVICE);
2170 if (!ch->rx_ring[i])
2174 for (i = 0; i < target->queue_size; ++i) {
2175 ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2177 GFP_KERNEL, DMA_TO_DEVICE);
2178 if (!ch->tx_ring[i])
2181 list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2187 for (i = 0; i < target->queue_size; ++i) {
2188 srp_free_iu(target->srp_host, ch->rx_ring[i]);
2189 srp_free_iu(target->srp_host, ch->tx_ring[i]);
2202 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2204 uint64_t T_tr_ns, max_compl_time_ms;
2205 uint32_t rq_tmo_jiffies;
2208 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2209 * table 91), both the QP timeout and the retry count have to be set
2210 * for RC QP's during the RTR to RTS transition.
2212 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2213 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2216 * Set target->rq_tmo_jiffies to one second more than the largest time
2217 * it can take before an error completion is generated. See also
2218 * C9-140..142 in the IBTA spec for more information about how to
2219 * convert the QP Local ACK Timeout value to nanoseconds.
2221 T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2222 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2223 do_div(max_compl_time_ms, NSEC_PER_MSEC);
2224 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2226 return rq_tmo_jiffies;
2229 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2230 const struct srp_login_rsp *lrsp,
2231 struct srp_rdma_ch *ch)
2233 struct srp_target_port *target = ch->target;
2234 struct ib_qp_attr *qp_attr = NULL;
2239 if (lrsp->opcode == SRP_LOGIN_RSP) {
2240 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2241 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta);
2244 * Reserve credits for task management so we don't
2245 * bounce requests back to the SCSI mid-layer.
2247 target->scsi_host->can_queue
2248 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2249 target->scsi_host->can_queue);
2250 target->scsi_host->cmd_per_lun
2251 = min_t(int, target->scsi_host->can_queue,
2252 target->scsi_host->cmd_per_lun);
2254 shost_printk(KERN_WARNING, target->scsi_host,
2255 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2261 ret = srp_alloc_iu_bufs(ch);
2267 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2271 qp_attr->qp_state = IB_QPS_RTR;
2272 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2276 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2280 for (i = 0; i < target->queue_size; i++) {
2281 struct srp_iu *iu = ch->rx_ring[i];
2283 ret = srp_post_recv(ch, iu);
2288 qp_attr->qp_state = IB_QPS_RTS;
2289 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2293 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2295 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2299 ret = ib_send_cm_rtu(cm_id, NULL, 0);
2308 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2309 struct ib_cm_event *event,
2310 struct srp_rdma_ch *ch)
2312 struct srp_target_port *target = ch->target;
2313 struct Scsi_Host *shost = target->scsi_host;
2314 struct ib_class_port_info *cpi;
2317 switch (event->param.rej_rcvd.reason) {
2318 case IB_CM_REJ_PORT_CM_REDIRECT:
2319 cpi = event->param.rej_rcvd.ari;
2320 ch->path.dlid = cpi->redirect_lid;
2321 ch->path.pkey = cpi->redirect_pkey;
2322 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2323 memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2325 ch->status = ch->path.dlid ?
2326 SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2329 case IB_CM_REJ_PORT_REDIRECT:
2330 if (srp_target_is_topspin(target)) {
2332 * Topspin/Cisco SRP gateways incorrectly send
2333 * reject reason code 25 when they mean 24
2336 memcpy(ch->path.dgid.raw,
2337 event->param.rej_rcvd.ari, 16);
2339 shost_printk(KERN_DEBUG, shost,
2340 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2341 be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2342 be64_to_cpu(ch->path.dgid.global.interface_id));
2344 ch->status = SRP_PORT_REDIRECT;
2346 shost_printk(KERN_WARNING, shost,
2347 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2348 ch->status = -ECONNRESET;
2352 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2353 shost_printk(KERN_WARNING, shost,
2354 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2355 ch->status = -ECONNRESET;
2358 case IB_CM_REJ_CONSUMER_DEFINED:
2359 opcode = *(u8 *) event->private_data;
2360 if (opcode == SRP_LOGIN_REJ) {
2361 struct srp_login_rej *rej = event->private_data;
2362 u32 reason = be32_to_cpu(rej->reason);
2364 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2365 shost_printk(KERN_WARNING, shost,
2366 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2368 shost_printk(KERN_WARNING, shost, PFX
2369 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2371 target->orig_dgid.raw, reason);
2373 shost_printk(KERN_WARNING, shost,
2374 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2375 " opcode 0x%02x\n", opcode);
2376 ch->status = -ECONNRESET;
2379 case IB_CM_REJ_STALE_CONN:
2380 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n");
2381 ch->status = SRP_STALE_CONN;
2385 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2386 event->param.rej_rcvd.reason);
2387 ch->status = -ECONNRESET;
2391 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2393 struct srp_rdma_ch *ch = cm_id->context;
2394 struct srp_target_port *target = ch->target;
2397 switch (event->event) {
2398 case IB_CM_REQ_ERROR:
2399 shost_printk(KERN_DEBUG, target->scsi_host,
2400 PFX "Sending CM REQ failed\n");
2402 ch->status = -ECONNRESET;
2405 case IB_CM_REP_RECEIVED:
2407 srp_cm_rep_handler(cm_id, event->private_data, ch);
2410 case IB_CM_REJ_RECEIVED:
2411 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2414 srp_cm_rej_handler(cm_id, event, ch);
2417 case IB_CM_DREQ_RECEIVED:
2418 shost_printk(KERN_WARNING, target->scsi_host,
2419 PFX "DREQ received - connection closed\n");
2420 ch->connected = false;
2421 if (ib_send_cm_drep(cm_id, NULL, 0))
2422 shost_printk(KERN_ERR, target->scsi_host,
2423 PFX "Sending CM DREP failed\n");
2424 queue_work(system_long_wq, &target->tl_err_work);
2427 case IB_CM_TIMEWAIT_EXIT:
2428 shost_printk(KERN_ERR, target->scsi_host,
2429 PFX "connection closed\n");
2435 case IB_CM_MRA_RECEIVED:
2436 case IB_CM_DREQ_ERROR:
2437 case IB_CM_DREP_RECEIVED:
2441 shost_printk(KERN_WARNING, target->scsi_host,
2442 PFX "Unhandled CM event %d\n", event->event);
2447 complete(&ch->done);
2453 * srp_change_queue_depth - setting device queue depth
2454 * @sdev: scsi device struct
2455 * @qdepth: requested queue depth
2457 * Returns queue depth.
2460 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2462 if (!sdev->tagged_supported)
2464 return scsi_change_queue_depth(sdev, qdepth);
2467 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2470 struct srp_target_port *target = ch->target;
2471 struct srp_rport *rport = target->rport;
2472 struct ib_device *dev = target->srp_host->srp_dev->dev;
2474 struct srp_tsk_mgmt *tsk_mgmt;
2476 if (!ch->connected || target->qp_in_error)
2479 init_completion(&ch->tsk_mgmt_done);
2482 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2483 * invoked while a task management function is being sent.
2485 mutex_lock(&rport->mutex);
2486 spin_lock_irq(&ch->lock);
2487 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2488 spin_unlock_irq(&ch->lock);
2491 mutex_unlock(&rport->mutex);
2496 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2499 memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2501 tsk_mgmt->opcode = SRP_TSK_MGMT;
2502 int_to_scsilun(lun, &tsk_mgmt->lun);
2503 tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT;
2504 tsk_mgmt->tsk_mgmt_func = func;
2505 tsk_mgmt->task_tag = req_tag;
2507 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2509 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2510 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2511 mutex_unlock(&rport->mutex);
2515 mutex_unlock(&rport->mutex);
2517 if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
2518 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
2524 static int srp_abort(struct scsi_cmnd *scmnd)
2526 struct srp_target_port *target = host_to_target(scmnd->device->host);
2527 struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2530 struct srp_rdma_ch *ch;
2533 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2537 tag = blk_mq_unique_tag(scmnd->request);
2538 ch_idx = blk_mq_unique_tag_to_hwq(tag);
2539 if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2541 ch = &target->ch[ch_idx];
2542 if (!srp_claim_req(ch, req, NULL, scmnd))
2544 shost_printk(KERN_ERR, target->scsi_host,
2545 "Sending SRP abort for tag %#x\n", tag);
2546 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2547 SRP_TSK_ABORT_TASK) == 0)
2549 else if (target->rport->state == SRP_RPORT_LOST)
2553 srp_free_req(ch, req, scmnd, 0);
2554 scmnd->result = DID_ABORT << 16;
2555 scmnd->scsi_done(scmnd);
2560 static int srp_reset_device(struct scsi_cmnd *scmnd)
2562 struct srp_target_port *target = host_to_target(scmnd->device->host);
2563 struct srp_rdma_ch *ch;
2566 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2568 ch = &target->ch[0];
2569 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2572 if (ch->tsk_mgmt_status)
2575 for (i = 0; i < target->ch_count; i++) {
2576 ch = &target->ch[i];
2577 for (i = 0; i < target->req_ring_size; ++i) {
2578 struct srp_request *req = &ch->req_ring[i];
2580 srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2587 static int srp_reset_host(struct scsi_cmnd *scmnd)
2589 struct srp_target_port *target = host_to_target(scmnd->device->host);
2591 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2593 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2596 static int srp_slave_configure(struct scsi_device *sdev)
2598 struct Scsi_Host *shost = sdev->host;
2599 struct srp_target_port *target = host_to_target(shost);
2600 struct request_queue *q = sdev->request_queue;
2601 unsigned long timeout;
2603 if (sdev->type == TYPE_DISK) {
2604 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2605 blk_queue_rq_timeout(q, timeout);
2611 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2614 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2616 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2619 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2622 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2624 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2627 static ssize_t show_service_id(struct device *dev,
2628 struct device_attribute *attr, char *buf)
2630 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2632 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id));
2635 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2638 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2640 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2643 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2646 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2648 return sprintf(buf, "%pI6\n", target->sgid.raw);
2651 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2654 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2655 struct srp_rdma_ch *ch = &target->ch[0];
2657 return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2660 static ssize_t show_orig_dgid(struct device *dev,
2661 struct device_attribute *attr, char *buf)
2663 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2665 return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2668 static ssize_t show_req_lim(struct device *dev,
2669 struct device_attribute *attr, char *buf)
2671 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2672 struct srp_rdma_ch *ch;
2673 int i, req_lim = INT_MAX;
2675 for (i = 0; i < target->ch_count; i++) {
2676 ch = &target->ch[i];
2677 req_lim = min(req_lim, ch->req_lim);
2679 return sprintf(buf, "%d\n", req_lim);
2682 static ssize_t show_zero_req_lim(struct device *dev,
2683 struct device_attribute *attr, char *buf)
2685 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2687 return sprintf(buf, "%d\n", target->zero_req_lim);
2690 static ssize_t show_local_ib_port(struct device *dev,
2691 struct device_attribute *attr, char *buf)
2693 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2695 return sprintf(buf, "%d\n", target->srp_host->port);
2698 static ssize_t show_local_ib_device(struct device *dev,
2699 struct device_attribute *attr, char *buf)
2701 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2703 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2706 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2709 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2711 return sprintf(buf, "%d\n", target->ch_count);
2714 static ssize_t show_comp_vector(struct device *dev,
2715 struct device_attribute *attr, char *buf)
2717 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2719 return sprintf(buf, "%d\n", target->comp_vector);
2722 static ssize_t show_tl_retry_count(struct device *dev,
2723 struct device_attribute *attr, char *buf)
2725 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2727 return sprintf(buf, "%d\n", target->tl_retry_count);
2730 static ssize_t show_cmd_sg_entries(struct device *dev,
2731 struct device_attribute *attr, char *buf)
2733 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2735 return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2738 static ssize_t show_allow_ext_sg(struct device *dev,
2739 struct device_attribute *attr, char *buf)
2741 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2743 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2746 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
2747 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
2748 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
2749 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
2750 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL);
2751 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
2752 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL);
2753 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
2754 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
2755 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
2756 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2757 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL);
2758 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
2759 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
2760 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
2761 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
2763 static struct device_attribute *srp_host_attrs[] = {
2766 &dev_attr_service_id,
2770 &dev_attr_orig_dgid,
2772 &dev_attr_zero_req_lim,
2773 &dev_attr_local_ib_port,
2774 &dev_attr_local_ib_device,
2776 &dev_attr_comp_vector,
2777 &dev_attr_tl_retry_count,
2778 &dev_attr_cmd_sg_entries,
2779 &dev_attr_allow_ext_sg,
2783 static struct scsi_host_template srp_template = {
2784 .module = THIS_MODULE,
2785 .name = "InfiniBand SRP initiator",
2786 .proc_name = DRV_NAME,
2787 .slave_configure = srp_slave_configure,
2788 .info = srp_target_info,
2789 .queuecommand = srp_queuecommand,
2790 .change_queue_depth = srp_change_queue_depth,
2791 .eh_abort_handler = srp_abort,
2792 .eh_device_reset_handler = srp_reset_device,
2793 .eh_host_reset_handler = srp_reset_host,
2794 .skip_settle_delay = true,
2795 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
2796 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
2798 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
2799 .use_clustering = ENABLE_CLUSTERING,
2800 .shost_attrs = srp_host_attrs,
2802 .track_queue_depth = 1,
2805 static int srp_sdev_count(struct Scsi_Host *host)
2807 struct scsi_device *sdev;
2810 shost_for_each_device(sdev, host)
2818 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
2819 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
2820 * removal has been scheduled.
2821 * 0 and target->state != SRP_TARGET_REMOVED upon success.
2823 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2825 struct srp_rport_identifiers ids;
2826 struct srp_rport *rport;
2828 target->state = SRP_TARGET_SCANNING;
2829 sprintf(target->target_name, "SRP.T10:%016llX",
2830 be64_to_cpu(target->id_ext));
2832 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
2835 memcpy(ids.port_id, &target->id_ext, 8);
2836 memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2837 ids.roles = SRP_RPORT_ROLE_TARGET;
2838 rport = srp_rport_add(target->scsi_host, &ids);
2839 if (IS_ERR(rport)) {
2840 scsi_remove_host(target->scsi_host);
2841 return PTR_ERR(rport);
2844 rport->lld_data = target;
2845 target->rport = rport;
2847 spin_lock(&host->target_lock);
2848 list_add_tail(&target->list, &host->target_list);
2849 spin_unlock(&host->target_lock);
2851 scsi_scan_target(&target->scsi_host->shost_gendev,
2852 0, target->scsi_id, SCAN_WILD_CARD, 0);
2854 if (srp_connected_ch(target) < target->ch_count ||
2855 target->qp_in_error) {
2856 shost_printk(KERN_INFO, target->scsi_host,
2857 PFX "SCSI scan failed - removing SCSI host\n");
2858 srp_queue_remove_work(target);
2862 pr_debug(PFX "%s: SCSI scan succeeded - detected %d LUNs\n",
2863 dev_name(&target->scsi_host->shost_gendev),
2864 srp_sdev_count(target->scsi_host));
2866 spin_lock_irq(&target->lock);
2867 if (target->state == SRP_TARGET_SCANNING)
2868 target->state = SRP_TARGET_LIVE;
2869 spin_unlock_irq(&target->lock);
2875 static void srp_release_dev(struct device *dev)
2877 struct srp_host *host =
2878 container_of(dev, struct srp_host, dev);
2880 complete(&host->released);
2883 static struct class srp_class = {
2884 .name = "infiniband_srp",
2885 .dev_release = srp_release_dev
2889 * srp_conn_unique() - check whether the connection to a target is unique
2891 * @target: SRP target port.
2893 static bool srp_conn_unique(struct srp_host *host,
2894 struct srp_target_port *target)
2896 struct srp_target_port *t;
2899 if (target->state == SRP_TARGET_REMOVED)
2904 spin_lock(&host->target_lock);
2905 list_for_each_entry(t, &host->target_list, list) {
2907 target->id_ext == t->id_ext &&
2908 target->ioc_guid == t->ioc_guid &&
2909 target->initiator_ext == t->initiator_ext) {
2914 spin_unlock(&host->target_lock);
2921 * Target ports are added by writing
2923 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
2924 * pkey=<P_Key>,service_id=<service ID>
2926 * to the add_target sysfs attribute.
2930 SRP_OPT_ID_EXT = 1 << 0,
2931 SRP_OPT_IOC_GUID = 1 << 1,
2932 SRP_OPT_DGID = 1 << 2,
2933 SRP_OPT_PKEY = 1 << 3,
2934 SRP_OPT_SERVICE_ID = 1 << 4,
2935 SRP_OPT_MAX_SECT = 1 << 5,
2936 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
2937 SRP_OPT_IO_CLASS = 1 << 7,
2938 SRP_OPT_INITIATOR_EXT = 1 << 8,
2939 SRP_OPT_CMD_SG_ENTRIES = 1 << 9,
2940 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
2941 SRP_OPT_SG_TABLESIZE = 1 << 11,
2942 SRP_OPT_COMP_VECTOR = 1 << 12,
2943 SRP_OPT_TL_RETRY_COUNT = 1 << 13,
2944 SRP_OPT_QUEUE_SIZE = 1 << 14,
2945 SRP_OPT_ALL = (SRP_OPT_ID_EXT |
2949 SRP_OPT_SERVICE_ID),
2952 static const match_table_t srp_opt_tokens = {
2953 { SRP_OPT_ID_EXT, "id_ext=%s" },
2954 { SRP_OPT_IOC_GUID, "ioc_guid=%s" },
2955 { SRP_OPT_DGID, "dgid=%s" },
2956 { SRP_OPT_PKEY, "pkey=%x" },
2957 { SRP_OPT_SERVICE_ID, "service_id=%s" },
2958 { SRP_OPT_MAX_SECT, "max_sect=%d" },
2959 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" },
2960 { SRP_OPT_IO_CLASS, "io_class=%x" },
2961 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" },
2962 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
2963 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
2964 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
2965 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
2966 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
2967 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
2968 { SRP_OPT_ERR, NULL }
2971 static int srp_parse_options(const char *buf, struct srp_target_port *target)
2973 char *options, *sep_opt;
2976 substring_t args[MAX_OPT_ARGS];
2982 options = kstrdup(buf, GFP_KERNEL);
2987 while ((p = strsep(&sep_opt, ",\n")) != NULL) {
2991 token = match_token(p, srp_opt_tokens, args);
2995 case SRP_OPT_ID_EXT:
2996 p = match_strdup(args);
3001 target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3005 case SRP_OPT_IOC_GUID:
3006 p = match_strdup(args);
3011 target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
3016 p = match_strdup(args);
3021 if (strlen(p) != 32) {
3022 pr_warn("bad dest GID parameter '%s'\n", p);
3027 for (i = 0; i < 16; ++i) {
3028 strlcpy(dgid, p + i * 2, sizeof(dgid));
3029 if (sscanf(dgid, "%hhx",
3030 &target->orig_dgid.raw[i]) < 1) {
3040 if (match_hex(args, &token)) {
3041 pr_warn("bad P_Key parameter '%s'\n", p);
3044 target->pkey = cpu_to_be16(token);
3047 case SRP_OPT_SERVICE_ID:
3048 p = match_strdup(args);
3053 target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
3057 case SRP_OPT_MAX_SECT:
3058 if (match_int(args, &token)) {
3059 pr_warn("bad max sect parameter '%s'\n", p);
3062 target->scsi_host->max_sectors = token;
3065 case SRP_OPT_QUEUE_SIZE:
3066 if (match_int(args, &token) || token < 1) {
3067 pr_warn("bad queue_size parameter '%s'\n", p);
3070 target->scsi_host->can_queue = token;
3071 target->queue_size = token + SRP_RSP_SQ_SIZE +
3072 SRP_TSK_MGMT_SQ_SIZE;
3073 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3074 target->scsi_host->cmd_per_lun = token;
3077 case SRP_OPT_MAX_CMD_PER_LUN:
3078 if (match_int(args, &token) || token < 1) {
3079 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3083 target->scsi_host->cmd_per_lun = token;
3086 case SRP_OPT_IO_CLASS:
3087 if (match_hex(args, &token)) {
3088 pr_warn("bad IO class parameter '%s'\n", p);
3091 if (token != SRP_REV10_IB_IO_CLASS &&
3092 token != SRP_REV16A_IB_IO_CLASS) {
3093 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3094 token, SRP_REV10_IB_IO_CLASS,
3095 SRP_REV16A_IB_IO_CLASS);
3098 target->io_class = token;
3101 case SRP_OPT_INITIATOR_EXT:
3102 p = match_strdup(args);
3107 target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3111 case SRP_OPT_CMD_SG_ENTRIES:
3112 if (match_int(args, &token) || token < 1 || token > 255) {
3113 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3117 target->cmd_sg_cnt = token;
3120 case SRP_OPT_ALLOW_EXT_SG:
3121 if (match_int(args, &token)) {
3122 pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3125 target->allow_ext_sg = !!token;
3128 case SRP_OPT_SG_TABLESIZE:
3129 if (match_int(args, &token) || token < 1 ||
3130 token > SCSI_MAX_SG_CHAIN_SEGMENTS) {
3131 pr_warn("bad max sg_tablesize parameter '%s'\n",
3135 target->sg_tablesize = token;
3138 case SRP_OPT_COMP_VECTOR:
3139 if (match_int(args, &token) || token < 0) {
3140 pr_warn("bad comp_vector parameter '%s'\n", p);
3143 target->comp_vector = token;
3146 case SRP_OPT_TL_RETRY_COUNT:
3147 if (match_int(args, &token) || token < 2 || token > 7) {
3148 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3152 target->tl_retry_count = token;
3156 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3162 if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3165 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3166 if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3167 !(srp_opt_tokens[i].token & opt_mask))
3168 pr_warn("target creation request is missing parameter '%s'\n",
3169 srp_opt_tokens[i].pattern);
3171 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3172 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3173 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3174 target->scsi_host->cmd_per_lun,
3175 target->scsi_host->can_queue);
3182 static ssize_t srp_create_target(struct device *dev,
3183 struct device_attribute *attr,
3184 const char *buf, size_t count)
3186 struct srp_host *host =
3187 container_of(dev, struct srp_host, dev);
3188 struct Scsi_Host *target_host;
3189 struct srp_target_port *target;
3190 struct srp_rdma_ch *ch;
3191 struct srp_device *srp_dev = host->srp_dev;
3192 struct ib_device *ibdev = srp_dev->dev;
3193 int ret, node_idx, node, cpu, i;
3194 bool multich = false;
3196 target_host = scsi_host_alloc(&srp_template,
3197 sizeof (struct srp_target_port));
3201 target_host->transportt = ib_srp_transport_template;
3202 target_host->max_channel = 0;
3203 target_host->max_id = 1;
3204 target_host->max_lun = -1LL;
3205 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3207 target = host_to_target(target_host);
3209 target->io_class = SRP_REV16A_IB_IO_CLASS;
3210 target->scsi_host = target_host;
3211 target->srp_host = host;
3212 target->lkey = host->srp_dev->pd->local_dma_lkey;
3213 target->global_mr = host->srp_dev->global_mr;
3214 target->cmd_sg_cnt = cmd_sg_entries;
3215 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
3216 target->allow_ext_sg = allow_ext_sg;
3217 target->tl_retry_count = 7;
3218 target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
3221 * Avoid that the SCSI host can be removed by srp_remove_target()
3222 * before this function returns.
3224 scsi_host_get(target->scsi_host);
3226 mutex_lock(&host->add_target_mutex);
3228 ret = srp_parse_options(buf, target);
3232 ret = scsi_init_shared_tag_map(target_host, target_host->can_queue);
3236 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3238 if (!srp_conn_unique(target->srp_host, target)) {
3239 shost_printk(KERN_INFO, target->scsi_host,
3240 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3241 be64_to_cpu(target->id_ext),
3242 be64_to_cpu(target->ioc_guid),
3243 be64_to_cpu(target->initiator_ext));
3248 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3249 target->cmd_sg_cnt < target->sg_tablesize) {
3250 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3251 target->sg_tablesize = target->cmd_sg_cnt;
3254 target_host->sg_tablesize = target->sg_tablesize;
3255 target->indirect_size = target->sg_tablesize *
3256 sizeof (struct srp_direct_buf);
3257 target->max_iu_len = sizeof (struct srp_cmd) +
3258 sizeof (struct srp_indirect_buf) +
3259 target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3261 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3262 INIT_WORK(&target->remove_work, srp_remove_work);
3263 spin_lock_init(&target->lock);
3264 ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
3269 target->ch_count = max_t(unsigned, num_online_nodes(),
3271 min(4 * num_online_nodes(),
3272 ibdev->num_comp_vectors),
3273 num_online_cpus()));
3274 target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3280 for_each_online_node(node) {
3281 const int ch_start = (node_idx * target->ch_count /
3282 num_online_nodes());
3283 const int ch_end = ((node_idx + 1) * target->ch_count /
3284 num_online_nodes());
3285 const int cv_start = (node_idx * ibdev->num_comp_vectors /
3286 num_online_nodes() + target->comp_vector)
3287 % ibdev->num_comp_vectors;
3288 const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
3289 num_online_nodes() + target->comp_vector)
3290 % ibdev->num_comp_vectors;
3293 for_each_online_cpu(cpu) {
3294 if (cpu_to_node(cpu) != node)
3296 if (ch_start + cpu_idx >= ch_end)
3298 ch = &target->ch[ch_start + cpu_idx];
3299 ch->target = target;
3300 ch->comp_vector = cv_start == cv_end ? cv_start :
3301 cv_start + cpu_idx % (cv_end - cv_start);
3302 spin_lock_init(&ch->lock);
3303 INIT_LIST_HEAD(&ch->free_tx);
3304 ret = srp_new_cm_id(ch);
3306 goto err_disconnect;
3308 ret = srp_create_ch_ib(ch);
3310 goto err_disconnect;
3312 ret = srp_alloc_req_data(ch);
3314 goto err_disconnect;
3316 ret = srp_connect_ch(ch, multich);
3318 shost_printk(KERN_ERR, target->scsi_host,
3319 PFX "Connection %d/%d failed\n",
3322 if (node_idx == 0 && cpu_idx == 0) {
3323 goto err_disconnect;
3325 srp_free_ch_ib(target, ch);
3326 srp_free_req_data(target, ch);
3327 target->ch_count = ch - target->ch;
3339 target->scsi_host->nr_hw_queues = target->ch_count;
3341 ret = srp_add_target(host, target);
3343 goto err_disconnect;
3345 if (target->state != SRP_TARGET_REMOVED) {
3346 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3347 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3348 be64_to_cpu(target->id_ext),
3349 be64_to_cpu(target->ioc_guid),
3350 be16_to_cpu(target->pkey),
3351 be64_to_cpu(target->service_id),
3352 target->sgid.raw, target->orig_dgid.raw);
3358 mutex_unlock(&host->add_target_mutex);
3360 scsi_host_put(target->scsi_host);
3362 scsi_host_put(target->scsi_host);
3367 srp_disconnect_target(target);
3369 for (i = 0; i < target->ch_count; i++) {
3370 ch = &target->ch[i];
3371 srp_free_ch_ib(target, ch);
3372 srp_free_req_data(target, ch);
3379 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3381 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3384 struct srp_host *host = container_of(dev, struct srp_host, dev);
3386 return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3389 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3391 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3394 struct srp_host *host = container_of(dev, struct srp_host, dev);
3396 return sprintf(buf, "%d\n", host->port);
3399 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3401 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3403 struct srp_host *host;
3405 host = kzalloc(sizeof *host, GFP_KERNEL);
3409 INIT_LIST_HEAD(&host->target_list);
3410 spin_lock_init(&host->target_lock);
3411 init_completion(&host->released);
3412 mutex_init(&host->add_target_mutex);
3413 host->srp_dev = device;
3416 host->dev.class = &srp_class;
3417 host->dev.parent = device->dev->dma_device;
3418 dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3420 if (device_register(&host->dev))
3422 if (device_create_file(&host->dev, &dev_attr_add_target))
3424 if (device_create_file(&host->dev, &dev_attr_ibdev))
3426 if (device_create_file(&host->dev, &dev_attr_port))
3432 device_unregister(&host->dev);
3440 static void srp_add_one(struct ib_device *device)
3442 struct srp_device *srp_dev;
3443 struct ib_device_attr *dev_attr;
3444 struct srp_host *host;
3445 int mr_page_shift, p;
3446 u64 max_pages_per_mr;
3448 dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
3452 if (ib_query_device(device, dev_attr)) {
3453 pr_warn("Query device failed for %s\n", device->name);
3457 srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
3461 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3462 device->map_phys_fmr && device->unmap_fmr);
3463 srp_dev->has_fr = (dev_attr->device_cap_flags &
3464 IB_DEVICE_MEM_MGT_EXTENSIONS);
3465 if (!srp_dev->has_fmr && !srp_dev->has_fr)
3466 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3468 srp_dev->use_fast_reg = (srp_dev->has_fr &&
3469 (!srp_dev->has_fmr || prefer_fr));
3470 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
3473 * Use the smallest page size supported by the HCA, down to a
3474 * minimum of 4096 bytes. We're unlikely to build large sglists
3475 * out of smaller entries.
3477 mr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1);
3478 srp_dev->mr_page_size = 1 << mr_page_shift;
3479 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
3480 max_pages_per_mr = dev_attr->max_mr_size;
3481 do_div(max_pages_per_mr, srp_dev->mr_page_size);
3482 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3484 if (srp_dev->use_fast_reg) {
3485 srp_dev->max_pages_per_mr =
3486 min_t(u32, srp_dev->max_pages_per_mr,
3487 dev_attr->max_fast_reg_page_list_len);
3489 srp_dev->mr_max_size = srp_dev->mr_page_size *
3490 srp_dev->max_pages_per_mr;
3491 pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3492 device->name, mr_page_shift, dev_attr->max_mr_size,
3493 dev_attr->max_fast_reg_page_list_len,
3494 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3496 INIT_LIST_HEAD(&srp_dev->dev_list);
3498 srp_dev->dev = device;
3499 srp_dev->pd = ib_alloc_pd(device);
3500 if (IS_ERR(srp_dev->pd))
3503 if (!register_always || (!srp_dev->has_fmr && !srp_dev->has_fr)) {
3504 srp_dev->global_mr = ib_get_dma_mr(srp_dev->pd,
3505 IB_ACCESS_LOCAL_WRITE |
3506 IB_ACCESS_REMOTE_READ |
3507 IB_ACCESS_REMOTE_WRITE);
3508 if (IS_ERR(srp_dev->global_mr))
3511 srp_dev->global_mr = NULL;
3514 for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
3515 host = srp_add_port(srp_dev, p);
3517 list_add_tail(&host->list, &srp_dev->dev_list);
3520 ib_set_client_data(device, &srp_client, srp_dev);
3525 ib_dealloc_pd(srp_dev->pd);
3534 static void srp_remove_one(struct ib_device *device, void *client_data)
3536 struct srp_device *srp_dev;
3537 struct srp_host *host, *tmp_host;
3538 struct srp_target_port *target;
3540 srp_dev = client_data;
3544 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3545 device_unregister(&host->dev);
3547 * Wait for the sysfs entry to go away, so that no new
3548 * target ports can be created.
3550 wait_for_completion(&host->released);
3553 * Remove all target ports.
3555 spin_lock(&host->target_lock);
3556 list_for_each_entry(target, &host->target_list, list)
3557 srp_queue_remove_work(target);
3558 spin_unlock(&host->target_lock);
3561 * Wait for tl_err and target port removal tasks.
3563 flush_workqueue(system_long_wq);
3564 flush_workqueue(srp_remove_wq);
3569 if (srp_dev->global_mr)
3570 ib_dereg_mr(srp_dev->global_mr);
3571 ib_dealloc_pd(srp_dev->pd);
3576 static struct srp_function_template ib_srp_transport_functions = {
3577 .has_rport_state = true,
3578 .reset_timer_if_blocked = true,
3579 .reconnect_delay = &srp_reconnect_delay,
3580 .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
3581 .dev_loss_tmo = &srp_dev_loss_tmo,
3582 .reconnect = srp_rport_reconnect,
3583 .rport_delete = srp_rport_delete,
3584 .terminate_rport_io = srp_terminate_io,
3587 static int __init srp_init_module(void)
3591 BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
3593 if (srp_sg_tablesize) {
3594 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3595 if (!cmd_sg_entries)
3596 cmd_sg_entries = srp_sg_tablesize;
3599 if (!cmd_sg_entries)
3600 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3602 if (cmd_sg_entries > 255) {
3603 pr_warn("Clamping cmd_sg_entries to 255\n");
3604 cmd_sg_entries = 255;
3607 if (!indirect_sg_entries)
3608 indirect_sg_entries = cmd_sg_entries;
3609 else if (indirect_sg_entries < cmd_sg_entries) {
3610 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3612 indirect_sg_entries = cmd_sg_entries;
3615 srp_remove_wq = create_workqueue("srp_remove");
3616 if (!srp_remove_wq) {
3622 ib_srp_transport_template =
3623 srp_attach_transport(&ib_srp_transport_functions);
3624 if (!ib_srp_transport_template)
3627 ret = class_register(&srp_class);
3629 pr_err("couldn't register class infiniband_srp\n");
3633 ib_sa_register_client(&srp_sa_client);
3635 ret = ib_register_client(&srp_client);
3637 pr_err("couldn't register IB client\n");
3645 ib_sa_unregister_client(&srp_sa_client);
3646 class_unregister(&srp_class);
3649 srp_release_transport(ib_srp_transport_template);
3652 destroy_workqueue(srp_remove_wq);
3656 static void __exit srp_cleanup_module(void)
3658 ib_unregister_client(&srp_client);
3659 ib_sa_unregister_client(&srp_sa_client);
3660 class_unregister(&srp_class);
3661 srp_release_transport(ib_srp_transport_template);
3662 destroy_workqueue(srp_remove_wq);
3665 module_init(srp_init_module);
3666 module_exit(srp_cleanup_module);