Merge branches 'cxgb4-2', 'i40iw-2', 'ipoib', 'misc-4.7' and 'mlx5-fcs' into k.o...
[cascardo/linux.git] / drivers / infiniband / ulp / srp / ib_srp.c
index 7f08cb1..7ecb197 100644 (file)
@@ -70,6 +70,7 @@ static unsigned int indirect_sg_entries;
 static bool allow_ext_sg;
 static bool prefer_fr = true;
 static bool register_always = true;
+static bool never_register;
 static int topspin_workarounds = 1;
 
 module_param(srp_sg_tablesize, uint, 0444);
@@ -99,6 +100,9 @@ module_param(register_always, bool, 0444);
 MODULE_PARM_DESC(register_always,
                 "Use memory registration even for contiguous memory regions");
 
+module_param(never_register, bool, 0444);
+MODULE_PARM_DESC(never_register, "Never register memory");
+
 static const struct kernel_param_ops srp_tmo_ops;
 
 static int srp_reconnect_delay = 10;
@@ -447,16 +451,16 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
 
 /**
  * srp_destroy_qp() - destroy an RDMA queue pair
- * @ch: SRP RDMA channel.
+ * @qp: RDMA queue pair.
  *
  * Drain the qp before destroying it.  This avoids that the receive
  * completion handler can access the queue pair while it is
  * being destroyed.
  */
-static void srp_destroy_qp(struct srp_rdma_ch *ch)
+static void srp_destroy_qp(struct ib_qp *qp)
 {
-       ib_drain_rq(ch->qp);
-       ib_destroy_qp(ch->qp);
+       ib_drain_rq(qp);
+       ib_destroy_qp(qp);
 }
 
 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
@@ -468,7 +472,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
        struct ib_qp *qp;
        struct ib_fmr_pool *fmr_pool = NULL;
        struct srp_fr_pool *fr_pool = NULL;
-       const int m = dev->use_fast_reg ? 3 : 1;
+       const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
        int ret;
 
        init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
@@ -529,7 +533,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
        }
 
        if (ch->qp)
-               srp_destroy_qp(ch);
+               srp_destroy_qp(ch->qp);
        if (ch->recv_cq)
                ib_free_cq(ch->recv_cq);
        if (ch->send_cq)
@@ -553,7 +557,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
        return 0;
 
 err_qp:
-       srp_destroy_qp(ch);
+       srp_destroy_qp(qp);
 
 err_send_cq:
        ib_free_cq(send_cq);
@@ -596,7 +600,7 @@ static void srp_free_ch_ib(struct srp_target_port *target,
                        ib_destroy_fmr_pool(ch->fmr_pool);
        }
 
-       srp_destroy_qp(ch);
+       srp_destroy_qp(ch->qp);
        ib_free_cq(ch->send_cq);
        ib_free_cq(ch->recv_cq);
 
@@ -849,7 +853,7 @@ static int srp_alloc_req_data(struct srp_rdma_ch *ch)
 
        for (i = 0; i < target->req_ring_size; ++i) {
                req = &ch->req_ring[i];
-               mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
+               mr_list = kmalloc(target->mr_per_cmd * sizeof(void *),
                                  GFP_KERNEL);
                if (!mr_list)
                        goto out;
@@ -1298,9 +1302,16 @@ static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
        srp_handle_qp_err(cq, wc, "FAST REG");
 }
 
+/*
+ * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
+ * where to start in the first element. If sg_offset_p != NULL then
+ * *sg_offset_p is updated to the offset in state->sg[retval] of the first
+ * byte that has not yet been mapped.
+ */
 static int srp_map_finish_fr(struct srp_map_state *state,
                             struct srp_request *req,
-                            struct srp_rdma_ch *ch, int sg_nents)
+                            struct srp_rdma_ch *ch, int sg_nents,
+                            unsigned int *sg_offset_p)
 {
        struct srp_target_port *target = ch->target;
        struct srp_device *dev = target->srp_host->srp_dev;
@@ -1316,9 +1327,13 @@ static int srp_map_finish_fr(struct srp_map_state *state,
        WARN_ON_ONCE(!dev->use_fast_reg);
 
        if (sg_nents == 1 && target->global_mr) {
-               srp_map_desc(state, sg_dma_address(state->sg),
-                            sg_dma_len(state->sg),
+               unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
+
+               srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
+                            sg_dma_len(state->sg) - sg_offset,
                             target->global_mr->rkey);
+               if (sg_offset_p)
+                       *sg_offset_p = 0;
                return 1;
        }
 
@@ -1329,9 +1344,17 @@ static int srp_map_finish_fr(struct srp_map_state *state,
        rkey = ib_inc_rkey(desc->mr->rkey);
        ib_update_fast_reg_key(desc->mr, rkey);
 
-       n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, dev->mr_page_size);
-       if (unlikely(n < 0))
+       n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
+                        dev->mr_page_size);
+       if (unlikely(n < 0)) {
+               srp_fr_pool_put(ch->fr_pool, &desc, 1);
+               pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
+                        dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
+                        sg_offset_p ? *sg_offset_p : -1, n);
                return n;
+       }
+
+       WARN_ON_ONCE(desc->mr->length == 0);
 
        req->reg_cqe.done = srp_reg_mr_err_done;
 
@@ -1353,8 +1376,10 @@ static int srp_map_finish_fr(struct srp_map_state *state,
                     desc->mr->length, desc->mr->rkey);
 
        err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
-       if (unlikely(err))
+       if (unlikely(err)) {
+               WARN_ON_ONCE(err == -ENOMEM);
                return err;
+       }
 
        return n;
 }
@@ -1411,7 +1436,7 @@ static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
 
        state->pages = req->map_page;
        state->fmr.next = req->fmr_list;
-       state->fmr.end = req->fmr_list + ch->target->cmd_sg_cnt;
+       state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
 
        for_each_sg(scat, sg, count, i) {
                ret = srp_map_sg_entry(state, ch, sg, i);
@@ -1430,9 +1455,11 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
                         struct srp_request *req, struct scatterlist *scat,
                         int count)
 {
+       unsigned int sg_offset = 0;
+
        state->desc = req->indirect_desc;
        state->fr.next = req->fr_list;
-       state->fr.end = req->fr_list + ch->target->cmd_sg_cnt;
+       state->fr.end = req->fr_list + ch->target->mr_per_cmd;
        state->sg = scat;
 
        if (count == 0)
@@ -1441,7 +1468,7 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
        while (count) {
                int i, n;
 
-               n = srp_map_finish_fr(state, req, ch, count);
+               n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
                if (unlikely(n < 0))
                        return n;
 
@@ -1506,9 +1533,10 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
 #ifdef CONFIG_NEED_SG_DMA_LENGTH
                idb_sg->dma_length = idb_sg->length;          /* hack^2 */
 #endif
-               ret = srp_map_finish_fr(&state, req, ch, 1);
+               ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
                if (ret < 0)
                        return ret;
+               WARN_ON_ONCE(ret < 1);
        } else if (dev->use_fmr) {
                state.pages = idb_pages;
                state.pages[0] = (req->indirect_dma_addr &
@@ -1526,6 +1554,32 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
        return 0;
 }
 
+#if defined(DYNAMIC_DATA_DEBUG)
+static void srp_check_mapping(struct srp_map_state *state,
+                             struct srp_rdma_ch *ch, struct srp_request *req,
+                             struct scatterlist *scat, int count)
+{
+       struct srp_device *dev = ch->target->srp_host->srp_dev;
+       struct srp_fr_desc **pfr;
+       u64 desc_len = 0, mr_len = 0;
+       int i;
+
+       for (i = 0; i < state->ndesc; i++)
+               desc_len += be32_to_cpu(req->indirect_desc[i].len);
+       if (dev->use_fast_reg)
+               for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
+                       mr_len += (*pfr)->mr->length;
+       else if (dev->use_fmr)
+               for (i = 0; i < state->nmdesc; i++)
+                       mr_len += be32_to_cpu(req->indirect_desc[i].len);
+       if (desc_len != scsi_bufflen(req->scmnd) ||
+           mr_len > scsi_bufflen(req->scmnd))
+               pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
+                      scsi_bufflen(req->scmnd), desc_len, mr_len,
+                      state->ndesc, state->nmdesc);
+}
+#endif
+
 /**
  * srp_map_data() - map SCSI data buffer onto an SRP request
  * @scmnd: SCSI command to map
@@ -1611,6 +1665,15 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
        if (ret < 0)
                goto unmap;
 
+#if defined(DYNAMIC_DEBUG)
+       {
+               DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
+                       "Memory mapping consistency check");
+               if (unlikely(ddm.flags & _DPRINTK_FLAGS_PRINT))
+                       srp_check_mapping(&state, ch, req, scat, count);
+       }
+#endif
+
        /* We've mapped the request, now pull as much of the indirect
         * descriptor table as we can into the command buffer. If this
         * target is not using an external indirect table, we are
@@ -2575,6 +2638,20 @@ static int srp_reset_host(struct scsi_cmnd *scmnd)
        return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
 }
 
+static int srp_slave_alloc(struct scsi_device *sdev)
+{
+       struct Scsi_Host *shost = sdev->host;
+       struct srp_target_port *target = host_to_target(shost);
+       struct srp_device *srp_dev = target->srp_host->srp_dev;
+       struct ib_device *ibdev = srp_dev->dev;
+
+       if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
+               blk_queue_virt_boundary(sdev->request_queue,
+                                       ~srp_dev->mr_page_mask);
+
+       return 0;
+}
+
 static int srp_slave_configure(struct scsi_device *sdev)
 {
        struct Scsi_Host *shost = sdev->host;
@@ -2766,6 +2843,7 @@ static struct scsi_host_template srp_template = {
        .module                         = THIS_MODULE,
        .name                           = "InfiniBand SRP initiator",
        .proc_name                      = DRV_NAME,
+       .slave_alloc                    = srp_slave_alloc,
        .slave_configure                = srp_slave_configure,
        .info                           = srp_target_info,
        .queuecommand                   = srp_queuecommand,
@@ -2840,7 +2918,7 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
                goto out;
        }
 
-       pr_debug(PFX "%s: SCSI scan succeeded - detected %d LUNs\n",
+       pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
                 dev_name(&target->scsi_host->shost_gendev),
                 srp_sdev_count(target->scsi_host));
 
@@ -3172,6 +3250,7 @@ static ssize_t srp_create_target(struct device *dev,
        struct srp_device *srp_dev = host->srp_dev;
        struct ib_device *ibdev = srp_dev->dev;
        int ret, node_idx, node, cpu, i;
+       unsigned int max_sectors_per_mr, mr_per_cmd = 0;
        bool multich = false;
 
        target_host = scsi_host_alloc(&srp_template,
@@ -3228,8 +3307,33 @@ static ssize_t srp_create_target(struct device *dev,
                target->sg_tablesize = target->cmd_sg_cnt;
        }
 
+       if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
+               /*
+                * FR and FMR can only map one HCA page per entry. If the
+                * start address is not aligned on a HCA page boundary two
+                * entries will be used for the head and the tail although
+                * these two entries combined contain at most one HCA page of
+                * data. Hence the "+ 1" in the calculation below.
+                *
+                * The indirect data buffer descriptor is contiguous so the
+                * memory for that buffer will only be registered if
+                * register_always is true. Hence add one to mr_per_cmd if
+                * register_always has been set.
+                */
+               max_sectors_per_mr = srp_dev->max_pages_per_mr <<
+                                 (ilog2(srp_dev->mr_page_size) - 9);
+               mr_per_cmd = register_always +
+                       (target->scsi_host->max_sectors + 1 +
+                        max_sectors_per_mr - 1) / max_sectors_per_mr;
+               pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
+                        target->scsi_host->max_sectors,
+                        srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
+                        max_sectors_per_mr, mr_per_cmd);
+       }
+
        target_host->sg_tablesize = target->sg_tablesize;
-       target->mr_pool_size = target->scsi_host->can_queue;
+       target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
+       target->mr_per_cmd = mr_per_cmd;
        target->indirect_size = target->sg_tablesize *
                                sizeof (struct srp_direct_buf);
        target->max_iu_len = sizeof (struct srp_cmd) +
@@ -3426,17 +3530,6 @@ static void srp_add_one(struct ib_device *device)
        if (!srp_dev)
                return;
 
-       srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
-                           device->map_phys_fmr && device->unmap_fmr);
-       srp_dev->has_fr = (device->attrs.device_cap_flags &
-                          IB_DEVICE_MEM_MGT_EXTENSIONS);
-       if (!srp_dev->has_fmr && !srp_dev->has_fr)
-               dev_warn(&device->dev, "neither FMR nor FR is supported\n");
-
-       srp_dev->use_fast_reg = (srp_dev->has_fr &&
-                                (!srp_dev->has_fmr || prefer_fr));
-       srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
-
        /*
         * Use the smallest page size supported by the HCA, down to a
         * minimum of 4096 bytes. We're unlikely to build large sglists
@@ -3447,8 +3540,25 @@ static void srp_add_one(struct ib_device *device)
        srp_dev->mr_page_mask   = ~((u64) srp_dev->mr_page_size - 1);
        max_pages_per_mr        = device->attrs.max_mr_size;
        do_div(max_pages_per_mr, srp_dev->mr_page_size);
+       pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
+                device->attrs.max_mr_size, srp_dev->mr_page_size,
+                max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
        srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
                                          max_pages_per_mr);
+
+       srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
+                           device->map_phys_fmr && device->unmap_fmr);
+       srp_dev->has_fr = (device->attrs.device_cap_flags &
+                          IB_DEVICE_MEM_MGT_EXTENSIONS);
+       if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
+               dev_warn(&device->dev, "neither FMR nor FR is supported\n");
+       } else if (!never_register &&
+                  device->attrs.max_mr_size >= 2 * srp_dev->mr_page_size) {
+               srp_dev->use_fast_reg = (srp_dev->has_fr &&
+                                        (!srp_dev->has_fmr || prefer_fr));
+               srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
+       }
+
        if (srp_dev->use_fast_reg) {
                srp_dev->max_pages_per_mr =
                        min_t(u32, srp_dev->max_pages_per_mr,
@@ -3468,7 +3578,8 @@ static void srp_add_one(struct ib_device *device)
        if (IS_ERR(srp_dev->pd))
                goto free_dev;
 
-       if (!register_always || (!srp_dev->has_fmr && !srp_dev->has_fr)) {
+       if (never_register || !register_always ||
+           (!srp_dev->has_fmr && !srp_dev->has_fr)) {
                srp_dev->global_mr = ib_get_dma_mr(srp_dev->pd,
                                                   IB_ACCESS_LOCAL_WRITE |
                                                   IB_ACCESS_REMOTE_READ |