Merge git://git.infradead.org/users/eparis/audit
[cascardo/linux.git] / drivers / infiniband / hw / mlx5 / cq.c
1 /*
2  * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <linux/kref.h>
34 #include <rdma/ib_umem.h>
35 #include "mlx5_ib.h"
36 #include "user.h"
37
38 static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq)
39 {
40         struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
41
42         ibcq->comp_handler(ibcq, ibcq->cq_context);
43 }
44
45 static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type)
46 {
47         struct mlx5_ib_cq *cq = container_of(mcq, struct mlx5_ib_cq, mcq);
48         struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
49         struct ib_cq *ibcq = &cq->ibcq;
50         struct ib_event event;
51
52         if (type != MLX5_EVENT_TYPE_CQ_ERROR) {
53                 mlx5_ib_warn(dev, "Unexpected event type %d on CQ %06x\n",
54                              type, mcq->cqn);
55                 return;
56         }
57
58         if (ibcq->event_handler) {
59                 event.device     = &dev->ib_dev;
60                 event.event      = IB_EVENT_CQ_ERR;
61                 event.element.cq = ibcq;
62                 ibcq->event_handler(&event, ibcq->cq_context);
63         }
64 }
65
66 static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size)
67 {
68         return mlx5_buf_offset(&buf->buf, n * size);
69 }
70
71 static void *get_cqe(struct mlx5_ib_cq *cq, int n)
72 {
73         return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
74 }
75
76 static u8 sw_ownership_bit(int n, int nent)
77 {
78         return (n & nent) ? 1 : 0;
79 }
80
81 static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
82 {
83         void *cqe = get_cqe(cq, n & cq->ibcq.cqe);
84         struct mlx5_cqe64 *cqe64;
85
86         cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
87
88         if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) &&
89             !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) {
90                 return cqe;
91         } else {
92                 return NULL;
93         }
94 }
95
96 static void *next_cqe_sw(struct mlx5_ib_cq *cq)
97 {
98         return get_sw_cqe(cq, cq->mcq.cons_index);
99 }
100
101 static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx)
102 {
103         switch (wq->wr_data[idx]) {
104         case MLX5_IB_WR_UMR:
105                 return 0;
106
107         case IB_WR_LOCAL_INV:
108                 return IB_WC_LOCAL_INV;
109
110         case IB_WR_FAST_REG_MR:
111                 return IB_WC_FAST_REG_MR;
112
113         default:
114                 pr_warn("unknown completion status\n");
115                 return 0;
116         }
117 }
118
119 static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
120                             struct mlx5_ib_wq *wq, int idx)
121 {
122         wc->wc_flags = 0;
123         switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) {
124         case MLX5_OPCODE_RDMA_WRITE_IMM:
125                 wc->wc_flags |= IB_WC_WITH_IMM;
126         case MLX5_OPCODE_RDMA_WRITE:
127                 wc->opcode    = IB_WC_RDMA_WRITE;
128                 break;
129         case MLX5_OPCODE_SEND_IMM:
130                 wc->wc_flags |= IB_WC_WITH_IMM;
131         case MLX5_OPCODE_SEND:
132         case MLX5_OPCODE_SEND_INVAL:
133                 wc->opcode    = IB_WC_SEND;
134                 break;
135         case MLX5_OPCODE_RDMA_READ:
136                 wc->opcode    = IB_WC_RDMA_READ;
137                 wc->byte_len  = be32_to_cpu(cqe->byte_cnt);
138                 break;
139         case MLX5_OPCODE_ATOMIC_CS:
140                 wc->opcode    = IB_WC_COMP_SWAP;
141                 wc->byte_len  = 8;
142                 break;
143         case MLX5_OPCODE_ATOMIC_FA:
144                 wc->opcode    = IB_WC_FETCH_ADD;
145                 wc->byte_len  = 8;
146                 break;
147         case MLX5_OPCODE_ATOMIC_MASKED_CS:
148                 wc->opcode    = IB_WC_MASKED_COMP_SWAP;
149                 wc->byte_len  = 8;
150                 break;
151         case MLX5_OPCODE_ATOMIC_MASKED_FA:
152                 wc->opcode    = IB_WC_MASKED_FETCH_ADD;
153                 wc->byte_len  = 8;
154                 break;
155         case MLX5_OPCODE_BIND_MW:
156                 wc->opcode    = IB_WC_BIND_MW;
157                 break;
158         case MLX5_OPCODE_UMR:
159                 wc->opcode = get_umr_comp(wq, idx);
160                 break;
161         }
162 }
163
164 enum {
165         MLX5_GRH_IN_BUFFER = 1,
166         MLX5_GRH_IN_CQE    = 2,
167 };
168
169 static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
170                              struct mlx5_ib_qp *qp)
171 {
172         struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
173         struct mlx5_ib_srq *srq;
174         struct mlx5_ib_wq *wq;
175         u16 wqe_ctr;
176         u8 g;
177
178         if (qp->ibqp.srq || qp->ibqp.xrcd) {
179                 struct mlx5_core_srq *msrq = NULL;
180
181                 if (qp->ibqp.xrcd) {
182                         msrq = mlx5_core_get_srq(&dev->mdev,
183                                                  be32_to_cpu(cqe->srqn));
184                         srq = to_mibsrq(msrq);
185                 } else {
186                         srq = to_msrq(qp->ibqp.srq);
187                 }
188                 if (srq) {
189                         wqe_ctr = be16_to_cpu(cqe->wqe_counter);
190                         wc->wr_id = srq->wrid[wqe_ctr];
191                         mlx5_ib_free_srq_wqe(srq, wqe_ctr);
192                         if (msrq && atomic_dec_and_test(&msrq->refcount))
193                                 complete(&msrq->free);
194                 }
195         } else {
196                 wq        = &qp->rq;
197                 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
198                 ++wq->tail;
199         }
200         wc->byte_len = be32_to_cpu(cqe->byte_cnt);
201
202         switch (cqe->op_own >> 4) {
203         case MLX5_CQE_RESP_WR_IMM:
204                 wc->opcode      = IB_WC_RECV_RDMA_WITH_IMM;
205                 wc->wc_flags    = IB_WC_WITH_IMM;
206                 wc->ex.imm_data = cqe->imm_inval_pkey;
207                 break;
208         case MLX5_CQE_RESP_SEND:
209                 wc->opcode   = IB_WC_RECV;
210                 wc->wc_flags = 0;
211                 break;
212         case MLX5_CQE_RESP_SEND_IMM:
213                 wc->opcode      = IB_WC_RECV;
214                 wc->wc_flags    = IB_WC_WITH_IMM;
215                 wc->ex.imm_data = cqe->imm_inval_pkey;
216                 break;
217         case MLX5_CQE_RESP_SEND_INV:
218                 wc->opcode      = IB_WC_RECV;
219                 wc->wc_flags    = IB_WC_WITH_INVALIDATE;
220                 wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey);
221                 break;
222         }
223         wc->slid           = be16_to_cpu(cqe->slid);
224         wc->sl             = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
225         wc->src_qp         = be32_to_cpu(cqe->flags_rqpn) & 0xffffff;
226         wc->dlid_path_bits = cqe->ml_path;
227         g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
228         wc->wc_flags |= g ? IB_WC_GRH : 0;
229         wc->pkey_index     = be32_to_cpu(cqe->imm_inval_pkey) & 0xffff;
230 }
231
232 static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
233 {
234         __be32 *p = (__be32 *)cqe;
235         int i;
236
237         mlx5_ib_warn(dev, "dump error cqe\n");
238         for (i = 0; i < sizeof(*cqe) / 16; i++, p += 4)
239                 pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p[0]),
240                         be32_to_cpu(p[1]), be32_to_cpu(p[2]),
241                         be32_to_cpu(p[3]));
242 }
243
244 static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
245                                   struct mlx5_err_cqe *cqe,
246                                   struct ib_wc *wc)
247 {
248         int dump = 1;
249
250         switch (cqe->syndrome) {
251         case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
252                 wc->status = IB_WC_LOC_LEN_ERR;
253                 break;
254         case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR:
255                 wc->status = IB_WC_LOC_QP_OP_ERR;
256                 break;
257         case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
258                 wc->status = IB_WC_LOC_PROT_ERR;
259                 break;
260         case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
261                 dump = 0;
262                 wc->status = IB_WC_WR_FLUSH_ERR;
263                 break;
264         case MLX5_CQE_SYNDROME_MW_BIND_ERR:
265                 wc->status = IB_WC_MW_BIND_ERR;
266                 break;
267         case MLX5_CQE_SYNDROME_BAD_RESP_ERR:
268                 wc->status = IB_WC_BAD_RESP_ERR;
269                 break;
270         case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR:
271                 wc->status = IB_WC_LOC_ACCESS_ERR;
272                 break;
273         case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
274                 wc->status = IB_WC_REM_INV_REQ_ERR;
275                 break;
276         case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
277                 wc->status = IB_WC_REM_ACCESS_ERR;
278                 break;
279         case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
280                 wc->status = IB_WC_REM_OP_ERR;
281                 break;
282         case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
283                 wc->status = IB_WC_RETRY_EXC_ERR;
284                 dump = 0;
285                 break;
286         case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
287                 wc->status = IB_WC_RNR_RETRY_EXC_ERR;
288                 dump = 0;
289                 break;
290         case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
291                 wc->status = IB_WC_REM_ABORT_ERR;
292                 break;
293         default:
294                 wc->status = IB_WC_GENERAL_ERR;
295                 break;
296         }
297
298         wc->vendor_err = cqe->vendor_err_synd;
299         if (dump)
300                 dump_cqe(dev, cqe);
301 }
302
303 static int is_atomic_response(struct mlx5_ib_qp *qp, uint16_t idx)
304 {
305         /* TBD: waiting decision
306         */
307         return 0;
308 }
309
310 static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp *qp, uint16_t idx)
311 {
312         struct mlx5_wqe_data_seg *dpseg;
313         void *addr;
314
315         dpseg = mlx5_get_send_wqe(qp, idx) + sizeof(struct mlx5_wqe_ctrl_seg) +
316                 sizeof(struct mlx5_wqe_raddr_seg) +
317                 sizeof(struct mlx5_wqe_atomic_seg);
318         addr = (void *)(unsigned long)be64_to_cpu(dpseg->addr);
319         return addr;
320 }
321
322 static void handle_atomic(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
323                           uint16_t idx)
324 {
325         void *addr;
326         int byte_count;
327         int i;
328
329         if (!is_atomic_response(qp, idx))
330                 return;
331
332         byte_count = be32_to_cpu(cqe64->byte_cnt);
333         addr = mlx5_get_atomic_laddr(qp, idx);
334
335         if (byte_count == 4) {
336                 *(uint32_t *)addr = be32_to_cpu(*((__be32 *)addr));
337         } else {
338                 for (i = 0; i < byte_count; i += 8) {
339                         *(uint64_t *)addr = be64_to_cpu(*((__be64 *)addr));
340                         addr += 8;
341                 }
342         }
343
344         return;
345 }
346
347 static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
348                            u16 tail, u16 head)
349 {
350         int idx;
351
352         do {
353                 idx = tail & (qp->sq.wqe_cnt - 1);
354                 handle_atomic(qp, cqe64, idx);
355                 if (idx == head)
356                         break;
357
358                 tail = qp->sq.w_list[idx].next;
359         } while (1);
360         tail = qp->sq.w_list[idx].next;
361         qp->sq.last_poll = tail;
362 }
363
364 static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
365 {
366         mlx5_buf_free(&dev->mdev, &buf->buf);
367 }
368
369 static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
370                              struct ib_sig_err *item)
371 {
372         u16 syndrome = be16_to_cpu(cqe->syndrome);
373
374 #define GUARD_ERR   (1 << 13)
375 #define APPTAG_ERR  (1 << 12)
376 #define REFTAG_ERR  (1 << 11)
377
378         if (syndrome & GUARD_ERR) {
379                 item->err_type = IB_SIG_BAD_GUARD;
380                 item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16;
381                 item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16;
382         } else
383         if (syndrome & REFTAG_ERR) {
384                 item->err_type = IB_SIG_BAD_REFTAG;
385                 item->expected = be32_to_cpu(cqe->expected_reftag);
386                 item->actual = be32_to_cpu(cqe->actual_reftag);
387         } else
388         if (syndrome & APPTAG_ERR) {
389                 item->err_type = IB_SIG_BAD_APPTAG;
390                 item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff;
391                 item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff;
392         } else {
393                 pr_err("Got signature completion error with bad syndrome %04x\n",
394                        syndrome);
395         }
396
397         item->sig_err_offset = be64_to_cpu(cqe->err_offset);
398         item->key = be32_to_cpu(cqe->mkey);
399 }
400
401 static int mlx5_poll_one(struct mlx5_ib_cq *cq,
402                          struct mlx5_ib_qp **cur_qp,
403                          struct ib_wc *wc)
404 {
405         struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
406         struct mlx5_err_cqe *err_cqe;
407         struct mlx5_cqe64 *cqe64;
408         struct mlx5_core_qp *mqp;
409         struct mlx5_ib_wq *wq;
410         struct mlx5_sig_err_cqe *sig_err_cqe;
411         struct mlx5_core_mr *mmr;
412         struct mlx5_ib_mr *mr;
413         uint8_t opcode;
414         uint32_t qpn;
415         u16 wqe_ctr;
416         void *cqe;
417         int idx;
418
419 repoll:
420         cqe = next_cqe_sw(cq);
421         if (!cqe)
422                 return -EAGAIN;
423
424         cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
425
426         ++cq->mcq.cons_index;
427
428         /* Make sure we read CQ entry contents after we've checked the
429          * ownership bit.
430          */
431         rmb();
432
433         opcode = cqe64->op_own >> 4;
434         if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) {
435                 if (likely(cq->resize_buf)) {
436                         free_cq_buf(dev, &cq->buf);
437                         cq->buf = *cq->resize_buf;
438                         kfree(cq->resize_buf);
439                         cq->resize_buf = NULL;
440                         goto repoll;
441                 } else {
442                         mlx5_ib_warn(dev, "unexpected resize cqe\n");
443                 }
444         }
445
446         qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
447         if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
448                 /* We do not have to take the QP table lock here,
449                  * because CQs will be locked while QPs are removed
450                  * from the table.
451                  */
452                 mqp = __mlx5_qp_lookup(&dev->mdev, qpn);
453                 if (unlikely(!mqp)) {
454                         mlx5_ib_warn(dev, "CQE@CQ %06x for unknown QPN %6x\n",
455                                      cq->mcq.cqn, qpn);
456                         return -EINVAL;
457                 }
458
459                 *cur_qp = to_mibqp(mqp);
460         }
461
462         wc->qp  = &(*cur_qp)->ibqp;
463         switch (opcode) {
464         case MLX5_CQE_REQ:
465                 wq = &(*cur_qp)->sq;
466                 wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
467                 idx = wqe_ctr & (wq->wqe_cnt - 1);
468                 handle_good_req(wc, cqe64, wq, idx);
469                 handle_atomics(*cur_qp, cqe64, wq->last_poll, idx);
470                 wc->wr_id = wq->wrid[idx];
471                 wq->tail = wq->wqe_head[idx] + 1;
472                 wc->status = IB_WC_SUCCESS;
473                 break;
474         case MLX5_CQE_RESP_WR_IMM:
475         case MLX5_CQE_RESP_SEND:
476         case MLX5_CQE_RESP_SEND_IMM:
477         case MLX5_CQE_RESP_SEND_INV:
478                 handle_responder(wc, cqe64, *cur_qp);
479                 wc->status = IB_WC_SUCCESS;
480                 break;
481         case MLX5_CQE_RESIZE_CQ:
482                 break;
483         case MLX5_CQE_REQ_ERR:
484         case MLX5_CQE_RESP_ERR:
485                 err_cqe = (struct mlx5_err_cqe *)cqe64;
486                 mlx5_handle_error_cqe(dev, err_cqe, wc);
487                 mlx5_ib_dbg(dev, "%s error cqe on cqn 0x%x:\n",
488                             opcode == MLX5_CQE_REQ_ERR ?
489                             "Requestor" : "Responder", cq->mcq.cqn);
490                 mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n",
491                             err_cqe->syndrome, err_cqe->vendor_err_synd);
492                 if (opcode == MLX5_CQE_REQ_ERR) {
493                         wq = &(*cur_qp)->sq;
494                         wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
495                         idx = wqe_ctr & (wq->wqe_cnt - 1);
496                         wc->wr_id = wq->wrid[idx];
497                         wq->tail = wq->wqe_head[idx] + 1;
498                 } else {
499                         struct mlx5_ib_srq *srq;
500
501                         if ((*cur_qp)->ibqp.srq) {
502                                 srq = to_msrq((*cur_qp)->ibqp.srq);
503                                 wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
504                                 wc->wr_id = srq->wrid[wqe_ctr];
505                                 mlx5_ib_free_srq_wqe(srq, wqe_ctr);
506                         } else {
507                                 wq = &(*cur_qp)->rq;
508                                 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
509                                 ++wq->tail;
510                         }
511                 }
512                 break;
513         case MLX5_CQE_SIG_ERR:
514                 sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
515
516                 read_lock(&dev->mdev.priv.mr_table.lock);
517                 mmr = __mlx5_mr_lookup(&dev->mdev,
518                                        mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
519                 if (unlikely(!mmr)) {
520                         read_unlock(&dev->mdev.priv.mr_table.lock);
521                         mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n",
522                                      cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey));
523                         return -EINVAL;
524                 }
525
526                 mr = to_mibmr(mmr);
527                 get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
528                 mr->sig->sig_err_exists = true;
529                 mr->sig->sigerr_count++;
530
531                 mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n",
532                              cq->mcq.cqn, mr->sig->err_item.key,
533                              mr->sig->err_item.err_type,
534                              mr->sig->err_item.sig_err_offset,
535                              mr->sig->err_item.expected,
536                              mr->sig->err_item.actual);
537
538                 read_unlock(&dev->mdev.priv.mr_table.lock);
539                 goto repoll;
540         }
541
542         return 0;
543 }
544
545 int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
546 {
547         struct mlx5_ib_cq *cq = to_mcq(ibcq);
548         struct mlx5_ib_qp *cur_qp = NULL;
549         unsigned long flags;
550         int npolled;
551         int err = 0;
552
553         spin_lock_irqsave(&cq->lock, flags);
554
555         for (npolled = 0; npolled < num_entries; npolled++) {
556                 err = mlx5_poll_one(cq, &cur_qp, wc + npolled);
557                 if (err)
558                         break;
559         }
560
561         if (npolled)
562                 mlx5_cq_set_ci(&cq->mcq);
563
564         spin_unlock_irqrestore(&cq->lock, flags);
565
566         if (err == 0 || err == -EAGAIN)
567                 return npolled;
568         else
569                 return err;
570 }
571
572 int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
573 {
574         mlx5_cq_arm(&to_mcq(ibcq)->mcq,
575                     (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
576                     MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT,
577                     to_mdev(ibcq->device)->mdev.priv.uuari.uars[0].map,
578                     MLX5_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->mdev.priv.cq_uar_lock));
579
580         return 0;
581 }
582
583 static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
584                         int nent, int cqe_size)
585 {
586         int err;
587
588         err = mlx5_buf_alloc(&dev->mdev, nent * cqe_size,
589                              PAGE_SIZE * 2, &buf->buf);
590         if (err)
591                 return err;
592
593         buf->cqe_size = cqe_size;
594         buf->nent = nent;
595
596         return 0;
597 }
598
599 static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
600                           struct ib_ucontext *context, struct mlx5_ib_cq *cq,
601                           int entries, struct mlx5_create_cq_mbox_in **cqb,
602                           int *cqe_size, int *index, int *inlen)
603 {
604         struct mlx5_ib_create_cq ucmd;
605         int page_shift;
606         int npages;
607         int ncont;
608         int err;
609
610         if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
611                 return -EFAULT;
612
613         if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128)
614                 return -EINVAL;
615
616         *cqe_size = ucmd.cqe_size;
617
618         cq->buf.umem = ib_umem_get(context, ucmd.buf_addr,
619                                    entries * ucmd.cqe_size,
620                                    IB_ACCESS_LOCAL_WRITE, 1);
621         if (IS_ERR(cq->buf.umem)) {
622                 err = PTR_ERR(cq->buf.umem);
623                 return err;
624         }
625
626         err = mlx5_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
627                                   &cq->db);
628         if (err)
629                 goto err_umem;
630
631         mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, &npages, &page_shift,
632                            &ncont, NULL);
633         mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
634                     ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont);
635
636         *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * ncont;
637         *cqb = mlx5_vzalloc(*inlen);
638         if (!*cqb) {
639                 err = -ENOMEM;
640                 goto err_db;
641         }
642         mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0);
643         (*cqb)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
644
645         *index = to_mucontext(context)->uuari.uars[0].index;
646
647         return 0;
648
649 err_db:
650         mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
651
652 err_umem:
653         ib_umem_release(cq->buf.umem);
654         return err;
655 }
656
657 static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context)
658 {
659         mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
660         ib_umem_release(cq->buf.umem);
661 }
662
663 static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf)
664 {
665         int i;
666         void *cqe;
667         struct mlx5_cqe64 *cqe64;
668
669         for (i = 0; i < buf->nent; i++) {
670                 cqe = get_cqe_from_buf(buf, i, buf->cqe_size);
671                 cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
672                 cqe64->op_own = MLX5_CQE_INVALID << 4;
673         }
674 }
675
676 static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
677                             int entries, int cqe_size,
678                             struct mlx5_create_cq_mbox_in **cqb,
679                             int *index, int *inlen)
680 {
681         int err;
682
683         err = mlx5_db_alloc(&dev->mdev, &cq->db);
684         if (err)
685                 return err;
686
687         cq->mcq.set_ci_db  = cq->db.db;
688         cq->mcq.arm_db     = cq->db.db + 1;
689         *cq->mcq.set_ci_db = 0;
690         *cq->mcq.arm_db    = 0;
691         cq->mcq.cqe_sz = cqe_size;
692
693         err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size);
694         if (err)
695                 goto err_db;
696
697         init_cq_buf(cq, &cq->buf);
698
699         *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages;
700         *cqb = mlx5_vzalloc(*inlen);
701         if (!*cqb) {
702                 err = -ENOMEM;
703                 goto err_buf;
704         }
705         mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
706
707         (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
708         *index = dev->mdev.priv.uuari.uars[0].index;
709
710         return 0;
711
712 err_buf:
713         free_cq_buf(dev, &cq->buf);
714
715 err_db:
716         mlx5_db_free(&dev->mdev, &cq->db);
717         return err;
718 }
719
720 static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
721 {
722         free_cq_buf(dev, &cq->buf);
723         mlx5_db_free(&dev->mdev, &cq->db);
724 }
725
726 struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
727                                 int vector, struct ib_ucontext *context,
728                                 struct ib_udata *udata)
729 {
730         struct mlx5_create_cq_mbox_in *cqb = NULL;
731         struct mlx5_ib_dev *dev = to_mdev(ibdev);
732         struct mlx5_ib_cq *cq;
733         int uninitialized_var(index);
734         int uninitialized_var(inlen);
735         int cqe_size;
736         int irqn;
737         int eqn;
738         int err;
739
740         if (entries < 0)
741                 return ERR_PTR(-EINVAL);
742
743         entries = roundup_pow_of_two(entries + 1);
744         if (entries > dev->mdev.caps.max_cqes)
745                 return ERR_PTR(-EINVAL);
746
747         cq = kzalloc(sizeof(*cq), GFP_KERNEL);
748         if (!cq)
749                 return ERR_PTR(-ENOMEM);
750
751         cq->ibcq.cqe = entries - 1;
752         mutex_init(&cq->resize_mutex);
753         spin_lock_init(&cq->lock);
754         cq->resize_buf = NULL;
755         cq->resize_umem = NULL;
756
757         if (context) {
758                 err = create_cq_user(dev, udata, context, cq, entries,
759                                      &cqb, &cqe_size, &index, &inlen);
760                 if (err)
761                         goto err_create;
762         } else {
763                 /* for now choose 64 bytes till we have a proper interface */
764                 cqe_size = 64;
765                 err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb,
766                                        &index, &inlen);
767                 if (err)
768                         goto err_create;
769         }
770
771         cq->cqe_size = cqe_size;
772         cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
773         cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index);
774         err = mlx5_vector2eqn(dev, vector, &eqn, &irqn);
775         if (err)
776                 goto err_cqb;
777
778         cqb->ctx.c_eqn = cpu_to_be16(eqn);
779         cqb->ctx.db_record_addr = cpu_to_be64(cq->db.dma);
780
781         err = mlx5_core_create_cq(&dev->mdev, &cq->mcq, cqb, inlen);
782         if (err)
783                 goto err_cqb;
784
785         mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn);
786         cq->mcq.irqn = irqn;
787         cq->mcq.comp  = mlx5_ib_cq_comp;
788         cq->mcq.event = mlx5_ib_cq_event;
789
790         if (context)
791                 if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) {
792                         err = -EFAULT;
793                         goto err_cmd;
794                 }
795
796
797         mlx5_vfree(cqb);
798         return &cq->ibcq;
799
800 err_cmd:
801         mlx5_core_destroy_cq(&dev->mdev, &cq->mcq);
802
803 err_cqb:
804         mlx5_vfree(cqb);
805         if (context)
806                 destroy_cq_user(cq, context);
807         else
808                 destroy_cq_kernel(dev, cq);
809
810 err_create:
811         kfree(cq);
812
813         return ERR_PTR(err);
814 }
815
816
817 int mlx5_ib_destroy_cq(struct ib_cq *cq)
818 {
819         struct mlx5_ib_dev *dev = to_mdev(cq->device);
820         struct mlx5_ib_cq *mcq = to_mcq(cq);
821         struct ib_ucontext *context = NULL;
822
823         if (cq->uobject)
824                 context = cq->uobject->context;
825
826         mlx5_core_destroy_cq(&dev->mdev, &mcq->mcq);
827         if (context)
828                 destroy_cq_user(mcq, context);
829         else
830                 destroy_cq_kernel(dev, mcq);
831
832         kfree(mcq);
833
834         return 0;
835 }
836
837 static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)
838 {
839         return rsn == (ntohl(cqe64->sop_drop_qpn) & 0xffffff);
840 }
841
842 void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
843 {
844         struct mlx5_cqe64 *cqe64, *dest64;
845         void *cqe, *dest;
846         u32 prod_index;
847         int nfreed = 0;
848         u8 owner_bit;
849
850         if (!cq)
851                 return;
852
853         /* First we need to find the current producer index, so we
854          * know where to start cleaning from.  It doesn't matter if HW
855          * adds new entries after this loop -- the QP we're worried
856          * about is already in RESET, so the new entries won't come
857          * from our QP and therefore don't need to be checked.
858          */
859         for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); prod_index++)
860                 if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe)
861                         break;
862
863         /* Now sweep backwards through the CQ, removing CQ entries
864          * that match our QP by copying older entries on top of them.
865          */
866         while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
867                 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
868                 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
869                 if (is_equal_rsn(cqe64, rsn)) {
870                         if (srq && (ntohl(cqe64->srqn) & 0xffffff))
871                                 mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter));
872                         ++nfreed;
873                 } else if (nfreed) {
874                         dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
875                         dest64 = (cq->mcq.cqe_sz == 64) ? dest : dest + 64;
876                         owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK;
877                         memcpy(dest, cqe, cq->mcq.cqe_sz);
878                         dest64->op_own = owner_bit |
879                                 (dest64->op_own & ~MLX5_CQE_OWNER_MASK);
880                 }
881         }
882
883         if (nfreed) {
884                 cq->mcq.cons_index += nfreed;
885                 /* Make sure update of buffer contents is done before
886                  * updating consumer index.
887                  */
888                 wmb();
889                 mlx5_cq_set_ci(&cq->mcq);
890         }
891 }
892
893 void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq)
894 {
895         if (!cq)
896                 return;
897
898         spin_lock_irq(&cq->lock);
899         __mlx5_ib_cq_clean(cq, qpn, srq);
900         spin_unlock_irq(&cq->lock);
901 }
902
903 int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
904 {
905         struct mlx5_modify_cq_mbox_in *in;
906         struct mlx5_ib_dev *dev = to_mdev(cq->device);
907         struct mlx5_ib_cq *mcq = to_mcq(cq);
908         int err;
909         u32 fsel;
910
911         if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_CQ_MODER))
912                 return -ENOSYS;
913
914         in = kzalloc(sizeof(*in), GFP_KERNEL);
915         if (!in)
916                 return -ENOMEM;
917
918         in->cqn = cpu_to_be32(mcq->mcq.cqn);
919         fsel = (MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT);
920         in->ctx.cq_period = cpu_to_be16(cq_period);
921         in->ctx.cq_max_count = cpu_to_be16(cq_count);
922         in->field_select = cpu_to_be32(fsel);
923         err = mlx5_core_modify_cq(&dev->mdev, &mcq->mcq, in, sizeof(*in));
924         kfree(in);
925
926         if (err)
927                 mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn);
928
929         return err;
930 }
931
932 static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
933                        int entries, struct ib_udata *udata, int *npas,
934                        int *page_shift, int *cqe_size)
935 {
936         struct mlx5_ib_resize_cq ucmd;
937         struct ib_umem *umem;
938         int err;
939         int npages;
940         struct ib_ucontext *context = cq->buf.umem->context;
941
942         err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
943         if (err)
944                 return err;
945
946         if (ucmd.reserved0 || ucmd.reserved1)
947                 return -EINVAL;
948
949         umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size,
950                            IB_ACCESS_LOCAL_WRITE, 1);
951         if (IS_ERR(umem)) {
952                 err = PTR_ERR(umem);
953                 return err;
954         }
955
956         mlx5_ib_cont_pages(umem, ucmd.buf_addr, &npages, page_shift,
957                            npas, NULL);
958
959         cq->resize_umem = umem;
960         *cqe_size = ucmd.cqe_size;
961
962         return 0;
963 }
964
965 static void un_resize_user(struct mlx5_ib_cq *cq)
966 {
967         ib_umem_release(cq->resize_umem);
968 }
969
970 static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
971                          int entries, int cqe_size)
972 {
973         int err;
974
975         cq->resize_buf = kzalloc(sizeof(*cq->resize_buf), GFP_KERNEL);
976         if (!cq->resize_buf)
977                 return -ENOMEM;
978
979         err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size);
980         if (err)
981                 goto ex;
982
983         init_cq_buf(cq, cq->resize_buf);
984
985         return 0;
986
987 ex:
988         kfree(cq->resize_buf);
989         return err;
990 }
991
992 static void un_resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
993 {
994         free_cq_buf(dev, cq->resize_buf);
995         cq->resize_buf = NULL;
996 }
997
998 static int copy_resize_cqes(struct mlx5_ib_cq *cq)
999 {
1000         struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
1001         struct mlx5_cqe64 *scqe64;
1002         struct mlx5_cqe64 *dcqe64;
1003         void *start_cqe;
1004         void *scqe;
1005         void *dcqe;
1006         int ssize;
1007         int dsize;
1008         int i;
1009         u8 sw_own;
1010
1011         ssize = cq->buf.cqe_size;
1012         dsize = cq->resize_buf->cqe_size;
1013         if (ssize != dsize) {
1014                 mlx5_ib_warn(dev, "resize from different cqe size is not supported\n");
1015                 return -EINVAL;
1016         }
1017
1018         i = cq->mcq.cons_index;
1019         scqe = get_sw_cqe(cq, i);
1020         scqe64 = ssize == 64 ? scqe : scqe + 64;
1021         start_cqe = scqe;
1022         if (!scqe) {
1023                 mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
1024                 return -EINVAL;
1025         }
1026
1027         while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
1028                 dcqe = get_cqe_from_buf(cq->resize_buf,
1029                                         (i + 1) & (cq->resize_buf->nent),
1030                                         dsize);
1031                 dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
1032                 sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent);
1033                 memcpy(dcqe, scqe, dsize);
1034                 dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own;
1035
1036                 ++i;
1037                 scqe = get_sw_cqe(cq, i);
1038                 scqe64 = ssize == 64 ? scqe : scqe + 64;
1039                 if (!scqe) {
1040                         mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
1041                         return -EINVAL;
1042                 }
1043
1044                 if (scqe == start_cqe) {
1045                         pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n",
1046                                 cq->mcq.cqn);
1047                         return -ENOMEM;
1048                 }
1049         }
1050         ++cq->mcq.cons_index;
1051         return 0;
1052 }
1053
1054 int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
1055 {
1056         struct mlx5_ib_dev *dev = to_mdev(ibcq->device);
1057         struct mlx5_ib_cq *cq = to_mcq(ibcq);
1058         struct mlx5_modify_cq_mbox_in *in;
1059         int err;
1060         int npas;
1061         int page_shift;
1062         int inlen;
1063         int uninitialized_var(cqe_size);
1064         unsigned long flags;
1065
1066         if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) {
1067                 pr_info("Firmware does not support resize CQ\n");
1068                 return -ENOSYS;
1069         }
1070
1071         if (entries < 1)
1072                 return -EINVAL;
1073
1074         entries = roundup_pow_of_two(entries + 1);
1075         if (entries > dev->mdev.caps.max_cqes + 1)
1076                 return -EINVAL;
1077
1078         if (entries == ibcq->cqe + 1)
1079                 return 0;
1080
1081         mutex_lock(&cq->resize_mutex);
1082         if (udata) {
1083                 err = resize_user(dev, cq, entries, udata, &npas, &page_shift,
1084                                   &cqe_size);
1085         } else {
1086                 cqe_size = 64;
1087                 err = resize_kernel(dev, cq, entries, cqe_size);
1088                 if (!err) {
1089                         npas = cq->resize_buf->buf.npages;
1090                         page_shift = cq->resize_buf->buf.page_shift;
1091                 }
1092         }
1093
1094         if (err)
1095                 goto ex;
1096
1097         inlen = sizeof(*in) + npas * sizeof(in->pas[0]);
1098         in = mlx5_vzalloc(inlen);
1099         if (!in) {
1100                 err = -ENOMEM;
1101                 goto ex_resize;
1102         }
1103
1104         if (udata)
1105                 mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
1106                                      in->pas, 0);
1107         else
1108                 mlx5_fill_page_array(&cq->resize_buf->buf, in->pas);
1109
1110         in->field_select = cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE  |
1111                                        MLX5_MODIFY_CQ_MASK_PG_OFFSET |
1112                                        MLX5_MODIFY_CQ_MASK_PG_SIZE);
1113         in->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
1114         in->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
1115         in->ctx.page_offset = 0;
1116         in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(entries) << 24);
1117         in->hdr.opmod = cpu_to_be16(MLX5_CQ_OPMOD_RESIZE);
1118         in->cqn = cpu_to_be32(cq->mcq.cqn);
1119
1120         err = mlx5_core_modify_cq(&dev->mdev, &cq->mcq, in, inlen);
1121         if (err)
1122                 goto ex_alloc;
1123
1124         if (udata) {
1125                 cq->ibcq.cqe = entries - 1;
1126                 ib_umem_release(cq->buf.umem);
1127                 cq->buf.umem = cq->resize_umem;
1128                 cq->resize_umem = NULL;
1129         } else {
1130                 struct mlx5_ib_cq_buf tbuf;
1131                 int resized = 0;
1132
1133                 spin_lock_irqsave(&cq->lock, flags);
1134                 if (cq->resize_buf) {
1135                         err = copy_resize_cqes(cq);
1136                         if (!err) {
1137                                 tbuf = cq->buf;
1138                                 cq->buf = *cq->resize_buf;
1139                                 kfree(cq->resize_buf);
1140                                 cq->resize_buf = NULL;
1141                                 resized = 1;
1142                         }
1143                 }
1144                 cq->ibcq.cqe = entries - 1;
1145                 spin_unlock_irqrestore(&cq->lock, flags);
1146                 if (resized)
1147                         free_cq_buf(dev, &tbuf);
1148         }
1149         mutex_unlock(&cq->resize_mutex);
1150
1151         mlx5_vfree(in);
1152         return 0;
1153
1154 ex_alloc:
1155         mlx5_vfree(in);
1156
1157 ex_resize:
1158         if (udata)
1159                 un_resize_user(cq);
1160         else
1161                 un_resize_kernel(dev, cq);
1162 ex:
1163         mutex_unlock(&cq->resize_mutex);
1164         return err;
1165 }
1166
1167 int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq)
1168 {
1169         struct mlx5_ib_cq *cq;
1170
1171         if (!ibcq)
1172                 return 128;
1173
1174         cq = to_mcq(ibcq);
1175         return cq->cqe_size;
1176 }