mlx5: Add driver for Mellanox Connect-IB adapters
authorEli Cohen <eli@mellanox.com>
Sun, 7 Jul 2013 14:25:49 +0000 (17:25 +0300)
committerRoland Dreier <roland@purestorage.com>
Mon, 8 Jul 2013 17:32:24 +0000 (10:32 -0700)
The driver is comprised of two kernel modules: mlx5_ib and mlx5_core.
This partitioning resembles what we have for mlx4, except that mlx5_ib
is the pci device driver and not mlx5_core.

mlx5_core is essentially a library that provides general functionality
that is intended to be used by other Mellanox devices that will be
introduced in the future.  mlx5_ib has a similar role as any hardware
device under drivers/infiniband/hw.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
[ Merge in coccinelle fixes from Fengguang Wu <fengguang.wu@intel.com>.
  - Roland ]

Signed-off-by: Roland Dreier <roland@purestorage.com>
45 files changed:
MAINTAINERS
drivers/infiniband/Kconfig
drivers/infiniband/Makefile
drivers/infiniband/hw/mlx5/Kconfig [new file with mode: 0644]
drivers/infiniband/hw/mlx5/Makefile [new file with mode: 0644]
drivers/infiniband/hw/mlx5/ah.c [new file with mode: 0644]
drivers/infiniband/hw/mlx5/cq.c [new file with mode: 0644]
drivers/infiniband/hw/mlx5/doorbell.c [new file with mode: 0644]
drivers/infiniband/hw/mlx5/mad.c [new file with mode: 0644]
drivers/infiniband/hw/mlx5/main.c [new file with mode: 0644]
drivers/infiniband/hw/mlx5/mem.c [new file with mode: 0644]
drivers/infiniband/hw/mlx5/mlx5_ib.h [new file with mode: 0644]
drivers/infiniband/hw/mlx5/mr.c [new file with mode: 0644]
drivers/infiniband/hw/mlx5/qp.c [new file with mode: 0644]
drivers/infiniband/hw/mlx5/srq.c [new file with mode: 0644]
drivers/infiniband/hw/mlx5/user.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/Kconfig
drivers/net/ethernet/mellanox/Makefile
drivers/net/ethernet/mellanox/mlx5/core/Kconfig [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/Makefile [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/alloc.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/cmd.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/cq.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/debugfs.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/eq.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/fw.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/health.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/mad.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/main.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/mcg.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/mr.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/pd.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/port.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/qp.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/srq.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/uar.c [new file with mode: 0644]
include/linux/mlx5/cmd.h [new file with mode: 0644]
include/linux/mlx5/cq.h [new file with mode: 0644]
include/linux/mlx5/device.h [new file with mode: 0644]
include/linux/mlx5/doorbell.h [new file with mode: 0644]
include/linux/mlx5/driver.h [new file with mode: 0644]
include/linux/mlx5/qp.h [new file with mode: 0644]
include/linux/mlx5/srq.h [new file with mode: 0644]

index 60d6a33..b426536 100644 (file)
@@ -5365,6 +5365,28 @@ W:       http://linuxtv.org
 S:     Odd Fixes
 F:     drivers/media/radio/radio-miropcm20*
 
+Mellanox MLX5 core VPI driver
+M:     Eli Cohen <eli@mellanox.com>
+L:     netdev@vger.kernel.org
+L:     linux-rdma@vger.kernel.org
+W:     http://www.mellanox.com
+Q:     http://patchwork.ozlabs.org/project/netdev/list/
+Q:     http://patchwork.kernel.org/project/linux-rdma/list/
+T:     git://openfabrics.org/~eli/connect-ib.git
+S:     Supported
+F:     drivers/net/ethernet/mellanox/mlx5/core/
+F:     include/linux/mlx5/
+
+Mellanox MLX5 IB driver
+M:      Eli Cohen <eli@mellanox.com>
+L:      linux-rdma@vger.kernel.org
+W:      http://www.mellanox.com
+Q:      http://patchwork.kernel.org/project/linux-rdma/list/
+T:      git://openfabrics.org/~eli/connect-ib.git
+S:      Supported
+F:      include/linux/mlx5/
+F:      drivers/infiniband/hw/mlx5/
+
 MODULE SUPPORT
 M:     Rusty Russell <rusty@rustcorp.com.au>
 S:     Maintained
index c85b56c..5ceda71 100644 (file)
@@ -50,6 +50,7 @@ source "drivers/infiniband/hw/amso1100/Kconfig"
 source "drivers/infiniband/hw/cxgb3/Kconfig"
 source "drivers/infiniband/hw/cxgb4/Kconfig"
 source "drivers/infiniband/hw/mlx4/Kconfig"
+source "drivers/infiniband/hw/mlx5/Kconfig"
 source "drivers/infiniband/hw/nes/Kconfig"
 source "drivers/infiniband/hw/ocrdma/Kconfig"
 
index b126fef..1fe6988 100644 (file)
@@ -7,6 +7,7 @@ obj-$(CONFIG_INFINIBAND_AMSO1100)       += hw/amso1100/
 obj-$(CONFIG_INFINIBAND_CXGB3)         += hw/cxgb3/
 obj-$(CONFIG_INFINIBAND_CXGB4)         += hw/cxgb4/
 obj-$(CONFIG_MLX4_INFINIBAND)          += hw/mlx4/
+obj-$(CONFIG_MLX5_INFINIBAND)          += hw/mlx5/
 obj-$(CONFIG_INFINIBAND_NES)           += hw/nes/
 obj-$(CONFIG_INFINIBAND_OCRDMA)                += hw/ocrdma/
 obj-$(CONFIG_INFINIBAND_IPOIB)         += ulp/ipoib/
diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
new file mode 100644 (file)
index 0000000..8e6aebf
--- /dev/null
@@ -0,0 +1,10 @@
+config MLX5_INFINIBAND
+       tristate "Mellanox Connect-IB HCA support"
+       depends on NETDEVICES && ETHERNET && PCI && X86
+       select NET_VENDOR_MELLANOX
+       select MLX5_CORE
+       ---help---
+         This driver provides low-level InfiniBand support for
+         Mellanox Connect-IB PCI Express host channel adapters (HCAs).
+         This is required to use InfiniBand protocols such as
+         IP-over-IB or SRP with these devices.
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
new file mode 100644 (file)
index 0000000..4ea0135
--- /dev/null
@@ -0,0 +1,3 @@
+obj-$(CONFIG_MLX5_INFINIBAND)  += mlx5_ib.o
+
+mlx5_ib-y :=   main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
new file mode 100644 (file)
index 0000000..39ab0ca
--- /dev/null
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "mlx5_ib.h"
+
+struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
+                          struct mlx5_ib_ah *ah)
+{
+       if (ah_attr->ah_flags & IB_AH_GRH) {
+               memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16);
+               ah->av.grh_gid_fl = cpu_to_be32(ah_attr->grh.flow_label |
+                                               (1 << 30) |
+                                               ah_attr->grh.sgid_index << 20);
+               ah->av.hop_limit = ah_attr->grh.hop_limit;
+               ah->av.tclass = ah_attr->grh.traffic_class;
+       }
+
+       ah->av.rlid = cpu_to_be16(ah_attr->dlid);
+       ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
+       ah->av.stat_rate_sl = (ah_attr->static_rate << 4) | (ah_attr->sl & 0xf);
+
+       return &ah->ibah;
+}
+
+struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+       struct mlx5_ib_ah *ah;
+
+       ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
+       if (!ah)
+               return ERR_PTR(-ENOMEM);
+
+       return create_ib_ah(ah_attr, ah); /* never fails */
+}
+
+int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+{
+       struct mlx5_ib_ah *ah = to_mah(ibah);
+       u32 tmp;
+
+       memset(ah_attr, 0, sizeof(*ah_attr));
+
+       tmp = be32_to_cpu(ah->av.grh_gid_fl);
+       if (tmp & (1 << 30)) {
+               ah_attr->ah_flags = IB_AH_GRH;
+               ah_attr->grh.sgid_index = (tmp >> 20) & 0xff;
+               ah_attr->grh.flow_label = tmp & 0xfffff;
+               memcpy(&ah_attr->grh.dgid, ah->av.rgid, 16);
+               ah_attr->grh.hop_limit = ah->av.hop_limit;
+               ah_attr->grh.traffic_class = ah->av.tclass;
+       }
+       ah_attr->dlid = be16_to_cpu(ah->av.rlid);
+       ah_attr->static_rate = ah->av.stat_rate_sl >> 4;
+       ah_attr->sl = ah->av.stat_rate_sl & 0xf;
+
+       return 0;
+}
+
+int mlx5_ib_destroy_ah(struct ib_ah *ah)
+{
+       kfree(to_mah(ah));
+       return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
new file mode 100644 (file)
index 0000000..344ab03
--- /dev/null
@@ -0,0 +1,843 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kref.h>
+#include <rdma/ib_umem.h>
+#include "mlx5_ib.h"
+#include "user.h"
+
+static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq)
+{
+       struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
+
+       ibcq->comp_handler(ibcq, ibcq->cq_context);
+}
+
+static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type)
+{
+       struct mlx5_ib_cq *cq = container_of(mcq, struct mlx5_ib_cq, mcq);
+       struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
+       struct ib_cq *ibcq = &cq->ibcq;
+       struct ib_event event;
+
+       if (type != MLX5_EVENT_TYPE_CQ_ERROR) {
+               mlx5_ib_warn(dev, "Unexpected event type %d on CQ %06x\n",
+                            type, mcq->cqn);
+               return;
+       }
+
+       if (ibcq->event_handler) {
+               event.device     = &dev->ib_dev;
+               event.event      = IB_EVENT_CQ_ERR;
+               event.element.cq = ibcq;
+               ibcq->event_handler(&event, ibcq->cq_context);
+       }
+}
+
+static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size)
+{
+       return mlx5_buf_offset(&buf->buf, n * size);
+}
+
+static void *get_cqe(struct mlx5_ib_cq *cq, int n)
+{
+       return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
+}
+
+static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
+{
+       void *cqe = get_cqe(cq, n & cq->ibcq.cqe);
+       struct mlx5_cqe64 *cqe64;
+
+       cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
+       return ((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^
+               !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
+}
+
+static void *next_cqe_sw(struct mlx5_ib_cq *cq)
+{
+       return get_sw_cqe(cq, cq->mcq.cons_index);
+}
+
+static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx)
+{
+       switch (wq->wr_data[idx]) {
+       case MLX5_IB_WR_UMR:
+               return 0;
+
+       case IB_WR_LOCAL_INV:
+               return IB_WC_LOCAL_INV;
+
+       case IB_WR_FAST_REG_MR:
+               return IB_WC_FAST_REG_MR;
+
+       default:
+               pr_warn("unknown completion status\n");
+               return 0;
+       }
+}
+
+static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
+                           struct mlx5_ib_wq *wq, int idx)
+{
+       wc->wc_flags = 0;
+       switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) {
+       case MLX5_OPCODE_RDMA_WRITE_IMM:
+               wc->wc_flags |= IB_WC_WITH_IMM;
+       case MLX5_OPCODE_RDMA_WRITE:
+               wc->opcode    = IB_WC_RDMA_WRITE;
+               break;
+       case MLX5_OPCODE_SEND_IMM:
+               wc->wc_flags |= IB_WC_WITH_IMM;
+       case MLX5_OPCODE_SEND:
+       case MLX5_OPCODE_SEND_INVAL:
+               wc->opcode    = IB_WC_SEND;
+               break;
+       case MLX5_OPCODE_RDMA_READ:
+               wc->opcode    = IB_WC_RDMA_READ;
+               wc->byte_len  = be32_to_cpu(cqe->byte_cnt);
+               break;
+       case MLX5_OPCODE_ATOMIC_CS:
+               wc->opcode    = IB_WC_COMP_SWAP;
+               wc->byte_len  = 8;
+               break;
+       case MLX5_OPCODE_ATOMIC_FA:
+               wc->opcode    = IB_WC_FETCH_ADD;
+               wc->byte_len  = 8;
+               break;
+       case MLX5_OPCODE_ATOMIC_MASKED_CS:
+               wc->opcode    = IB_WC_MASKED_COMP_SWAP;
+               wc->byte_len  = 8;
+               break;
+       case MLX5_OPCODE_ATOMIC_MASKED_FA:
+               wc->opcode    = IB_WC_MASKED_FETCH_ADD;
+               wc->byte_len  = 8;
+               break;
+       case MLX5_OPCODE_BIND_MW:
+               wc->opcode    = IB_WC_BIND_MW;
+               break;
+       case MLX5_OPCODE_UMR:
+               wc->opcode = get_umr_comp(wq, idx);
+               break;
+       }
+}
+
+enum {
+       MLX5_GRH_IN_BUFFER = 1,
+       MLX5_GRH_IN_CQE    = 2,
+};
+
+static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
+                            struct mlx5_ib_qp *qp)
+{
+       struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
+       struct mlx5_ib_srq *srq;
+       struct mlx5_ib_wq *wq;
+       u16 wqe_ctr;
+       u8 g;
+
+       if (qp->ibqp.srq || qp->ibqp.xrcd) {
+               struct mlx5_core_srq *msrq = NULL;
+
+               if (qp->ibqp.xrcd) {
+                       msrq = mlx5_core_get_srq(&dev->mdev,
+                                                be32_to_cpu(cqe->srqn));
+                       srq = to_mibsrq(msrq);
+               } else {
+                       srq = to_msrq(qp->ibqp.srq);
+               }
+               if (srq) {
+                       wqe_ctr = be16_to_cpu(cqe->wqe_counter);
+                       wc->wr_id = srq->wrid[wqe_ctr];
+                       mlx5_ib_free_srq_wqe(srq, wqe_ctr);
+                       if (msrq && atomic_dec_and_test(&msrq->refcount))
+                               complete(&msrq->free);
+               }
+       } else {
+               wq        = &qp->rq;
+               wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+               ++wq->tail;
+       }
+       wc->byte_len = be32_to_cpu(cqe->byte_cnt);
+
+       switch (cqe->op_own >> 4) {
+       case MLX5_CQE_RESP_WR_IMM:
+               wc->opcode      = IB_WC_RECV_RDMA_WITH_IMM;
+               wc->wc_flags    = IB_WC_WITH_IMM;
+               wc->ex.imm_data = cqe->imm_inval_pkey;
+               break;
+       case MLX5_CQE_RESP_SEND:
+               wc->opcode   = IB_WC_RECV;
+               wc->wc_flags = 0;
+               break;
+       case MLX5_CQE_RESP_SEND_IMM:
+               wc->opcode      = IB_WC_RECV;
+               wc->wc_flags    = IB_WC_WITH_IMM;
+               wc->ex.imm_data = cqe->imm_inval_pkey;
+               break;
+       case MLX5_CQE_RESP_SEND_INV:
+               wc->opcode      = IB_WC_RECV;
+               wc->wc_flags    = IB_WC_WITH_INVALIDATE;
+               wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey);
+               break;
+       }
+       wc->slid           = be16_to_cpu(cqe->slid);
+       wc->sl             = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
+       wc->src_qp         = be32_to_cpu(cqe->flags_rqpn) & 0xffffff;
+       wc->dlid_path_bits = cqe->ml_path;
+       g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
+       wc->wc_flags |= g ? IB_WC_GRH : 0;
+       wc->pkey_index     = be32_to_cpu(cqe->imm_inval_pkey) & 0xffff;
+}
+
+static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
+{
+       __be32 *p = (__be32 *)cqe;
+       int i;
+
+       mlx5_ib_warn(dev, "dump error cqe\n");
+       for (i = 0; i < sizeof(*cqe) / 16; i++, p += 4)
+               pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p[0]),
+                       be32_to_cpu(p[1]), be32_to_cpu(p[2]),
+                       be32_to_cpu(p[3]));
+}
+
+static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
+                                 struct mlx5_err_cqe *cqe,
+                                 struct ib_wc *wc)
+{
+       int dump = 1;
+
+       switch (cqe->syndrome) {
+       case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
+               wc->status = IB_WC_LOC_LEN_ERR;
+               break;
+       case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR:
+               wc->status = IB_WC_LOC_QP_OP_ERR;
+               break;
+       case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
+               wc->status = IB_WC_LOC_PROT_ERR;
+               break;
+       case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
+               dump = 0;
+               wc->status = IB_WC_WR_FLUSH_ERR;
+               break;
+       case MLX5_CQE_SYNDROME_MW_BIND_ERR:
+               wc->status = IB_WC_MW_BIND_ERR;
+               break;
+       case MLX5_CQE_SYNDROME_BAD_RESP_ERR:
+               wc->status = IB_WC_BAD_RESP_ERR;
+               break;
+       case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR:
+               wc->status = IB_WC_LOC_ACCESS_ERR;
+               break;
+       case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
+               wc->status = IB_WC_REM_INV_REQ_ERR;
+               break;
+       case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
+               wc->status = IB_WC_REM_ACCESS_ERR;
+               break;
+       case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
+               wc->status = IB_WC_REM_OP_ERR;
+               break;
+       case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
+               wc->status = IB_WC_RETRY_EXC_ERR;
+               dump = 0;
+               break;
+       case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
+               wc->status = IB_WC_RNR_RETRY_EXC_ERR;
+               dump = 0;
+               break;
+       case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
+               wc->status = IB_WC_REM_ABORT_ERR;
+               break;
+       default:
+               wc->status = IB_WC_GENERAL_ERR;
+               break;
+       }
+
+       wc->vendor_err = cqe->vendor_err_synd;
+       if (dump)
+               dump_cqe(dev, cqe);
+}
+
+static int is_atomic_response(struct mlx5_ib_qp *qp, uint16_t idx)
+{
+       /* TBD: waiting decision
+       */
+       return 0;
+}
+
+static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp *qp, uint16_t idx)
+{
+       struct mlx5_wqe_data_seg *dpseg;
+       void *addr;
+
+       dpseg = mlx5_get_send_wqe(qp, idx) + sizeof(struct mlx5_wqe_ctrl_seg) +
+               sizeof(struct mlx5_wqe_raddr_seg) +
+               sizeof(struct mlx5_wqe_atomic_seg);
+       addr = (void *)(unsigned long)be64_to_cpu(dpseg->addr);
+       return addr;
+}
+
+static void handle_atomic(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
+                         uint16_t idx)
+{
+       void *addr;
+       int byte_count;
+       int i;
+
+       if (!is_atomic_response(qp, idx))
+               return;
+
+       byte_count = be32_to_cpu(cqe64->byte_cnt);
+       addr = mlx5_get_atomic_laddr(qp, idx);
+
+       if (byte_count == 4) {
+               *(uint32_t *)addr = be32_to_cpu(*((__be32 *)addr));
+       } else {
+               for (i = 0; i < byte_count; i += 8) {
+                       *(uint64_t *)addr = be64_to_cpu(*((__be64 *)addr));
+                       addr += 8;
+               }
+       }
+
+       return;
+}
+
+static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
+                          u16 tail, u16 head)
+{
+       int idx;
+
+       do {
+               idx = tail & (qp->sq.wqe_cnt - 1);
+               handle_atomic(qp, cqe64, idx);
+               if (idx == head)
+                       break;
+
+               tail = qp->sq.w_list[idx].next;
+       } while (1);
+       tail = qp->sq.w_list[idx].next;
+       qp->sq.last_poll = tail;
+}
+
+static int mlx5_poll_one(struct mlx5_ib_cq *cq,
+                        struct mlx5_ib_qp **cur_qp,
+                        struct ib_wc *wc)
+{
+       struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
+       struct mlx5_err_cqe *err_cqe;
+       struct mlx5_cqe64 *cqe64;
+       struct mlx5_core_qp *mqp;
+       struct mlx5_ib_wq *wq;
+       uint8_t opcode;
+       uint32_t qpn;
+       u16 wqe_ctr;
+       void *cqe;
+       int idx;
+
+       cqe = next_cqe_sw(cq);
+       if (!cqe)
+               return -EAGAIN;
+
+       cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
+
+       ++cq->mcq.cons_index;
+
+       /* Make sure we read CQ entry contents after we've checked the
+        * ownership bit.
+        */
+       rmb();
+
+       /* TBD: resize CQ */
+
+       qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
+       if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
+               /* We do not have to take the QP table lock here,
+                * because CQs will be locked while QPs are removed
+                * from the table.
+                */
+               mqp = __mlx5_qp_lookup(&dev->mdev, qpn);
+               if (unlikely(!mqp)) {
+                       mlx5_ib_warn(dev, "CQE@CQ %06x for unknown QPN %6x\n",
+                                    cq->mcq.cqn, qpn);
+                       return -EINVAL;
+               }
+
+               *cur_qp = to_mibqp(mqp);
+       }
+
+       wc->qp  = &(*cur_qp)->ibqp;
+       opcode = cqe64->op_own >> 4;
+       switch (opcode) {
+       case MLX5_CQE_REQ:
+               wq = &(*cur_qp)->sq;
+               wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
+               idx = wqe_ctr & (wq->wqe_cnt - 1);
+               handle_good_req(wc, cqe64, wq, idx);
+               handle_atomics(*cur_qp, cqe64, wq->last_poll, idx);
+               wc->wr_id = wq->wrid[idx];
+               wq->tail = wq->wqe_head[idx] + 1;
+               wc->status = IB_WC_SUCCESS;
+               break;
+       case MLX5_CQE_RESP_WR_IMM:
+       case MLX5_CQE_RESP_SEND:
+       case MLX5_CQE_RESP_SEND_IMM:
+       case MLX5_CQE_RESP_SEND_INV:
+               handle_responder(wc, cqe64, *cur_qp);
+               wc->status = IB_WC_SUCCESS;
+               break;
+       case MLX5_CQE_RESIZE_CQ:
+               break;
+       case MLX5_CQE_REQ_ERR:
+       case MLX5_CQE_RESP_ERR:
+               err_cqe = (struct mlx5_err_cqe *)cqe64;
+               mlx5_handle_error_cqe(dev, err_cqe, wc);
+               mlx5_ib_dbg(dev, "%s error cqe on cqn 0x%x:\n",
+                           opcode == MLX5_CQE_REQ_ERR ?
+                           "Requestor" : "Responder", cq->mcq.cqn);
+               mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n",
+                           err_cqe->syndrome, err_cqe->vendor_err_synd);
+               if (opcode == MLX5_CQE_REQ_ERR) {
+                       wq = &(*cur_qp)->sq;
+                       wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
+                       idx = wqe_ctr & (wq->wqe_cnt - 1);
+                       wc->wr_id = wq->wrid[idx];
+                       wq->tail = wq->wqe_head[idx] + 1;
+               } else {
+                       struct mlx5_ib_srq *srq;
+
+                       if ((*cur_qp)->ibqp.srq) {
+                               srq = to_msrq((*cur_qp)->ibqp.srq);
+                               wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
+                               wc->wr_id = srq->wrid[wqe_ctr];
+                               mlx5_ib_free_srq_wqe(srq, wqe_ctr);
+                       } else {
+                               wq = &(*cur_qp)->rq;
+                               wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+                               ++wq->tail;
+                       }
+               }
+               break;
+       }
+
+       return 0;
+}
+
+int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+       struct mlx5_ib_cq *cq = to_mcq(ibcq);
+       struct mlx5_ib_qp *cur_qp = NULL;
+       unsigned long flags;
+       int npolled;
+       int err = 0;
+
+       spin_lock_irqsave(&cq->lock, flags);
+
+       for (npolled = 0; npolled < num_entries; npolled++) {
+               err = mlx5_poll_one(cq, &cur_qp, wc + npolled);
+               if (err)
+                       break;
+       }
+
+       if (npolled)
+               mlx5_cq_set_ci(&cq->mcq);
+
+       spin_unlock_irqrestore(&cq->lock, flags);
+
+       if (err == 0 || err == -EAGAIN)
+               return npolled;
+       else
+               return err;
+}
+
+int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+       mlx5_cq_arm(&to_mcq(ibcq)->mcq,
+                   (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
+                   MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT,
+                   to_mdev(ibcq->device)->mdev.priv.uuari.uars[0].map,
+                   MLX5_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->mdev.priv.cq_uar_lock));
+
+       return 0;
+}
+
+static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
+                       int nent, int cqe_size)
+{
+       int err;
+
+       err = mlx5_buf_alloc(&dev->mdev, nent * cqe_size,
+                            PAGE_SIZE * 2, &buf->buf);
+       if (err)
+               return err;
+
+       buf->cqe_size = cqe_size;
+
+       return 0;
+}
+
+static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
+{
+       mlx5_buf_free(&dev->mdev, &buf->buf);
+}
+
+static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
+                         struct ib_ucontext *context, struct mlx5_ib_cq *cq,
+                         int entries, struct mlx5_create_cq_mbox_in **cqb,
+                         int *cqe_size, int *index, int *inlen)
+{
+       struct mlx5_ib_create_cq ucmd;
+       int page_shift;
+       int npages;
+       int ncont;
+       int err;
+
+       if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
+               return -EFAULT;
+
+       if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128)
+               return -EINVAL;
+
+       *cqe_size = ucmd.cqe_size;
+
+       cq->buf.umem = ib_umem_get(context, ucmd.buf_addr,
+                                  entries * ucmd.cqe_size,
+                                  IB_ACCESS_LOCAL_WRITE, 1);
+       if (IS_ERR(cq->buf.umem)) {
+               err = PTR_ERR(cq->buf.umem);
+               return err;
+       }
+
+       err = mlx5_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
+                                 &cq->db);
+       if (err)
+               goto err_umem;
+
+       mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, &npages, &page_shift,
+                          &ncont, NULL);
+       mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
+                   ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont);
+
+       *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * ncont;
+       *cqb = mlx5_vzalloc(*inlen);
+       if (!*cqb) {
+               err = -ENOMEM;
+               goto err_db;
+       }
+       mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0);
+       (*cqb)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
+
+       *index = to_mucontext(context)->uuari.uars[0].index;
+
+       return 0;
+
+err_db:
+       mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
+
+err_umem:
+       ib_umem_release(cq->buf.umem);
+       return err;
+}
+
+static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context)
+{
+       mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
+       ib_umem_release(cq->buf.umem);
+}
+
+static void init_cq_buf(struct mlx5_ib_cq *cq, int nent)
+{
+       int i;
+       void *cqe;
+       struct mlx5_cqe64 *cqe64;
+
+       for (i = 0; i < nent; i++) {
+               cqe = get_cqe(cq, i);
+               cqe64 = (cq->buf.cqe_size == 64) ? cqe : cqe + 64;
+               cqe64->op_own = 0xf1;
+       }
+}
+
+static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
+                           int entries, int cqe_size,
+                           struct mlx5_create_cq_mbox_in **cqb,
+                           int *index, int *inlen)
+{
+       int err;
+
+       err = mlx5_db_alloc(&dev->mdev, &cq->db);
+       if (err)
+               return err;
+
+       cq->mcq.set_ci_db  = cq->db.db;
+       cq->mcq.arm_db     = cq->db.db + 1;
+       *cq->mcq.set_ci_db = 0;
+       *cq->mcq.arm_db    = 0;
+       cq->mcq.cqe_sz = cqe_size;
+
+       err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size);
+       if (err)
+               goto err_db;
+
+       init_cq_buf(cq, entries);
+
+       *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages;
+       *cqb = mlx5_vzalloc(*inlen);
+       if (!*cqb) {
+               err = -ENOMEM;
+               goto err_buf;
+       }
+       mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
+
+       (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - PAGE_SHIFT;
+       *index = dev->mdev.priv.uuari.uars[0].index;
+
+       return 0;
+
+err_buf:
+       free_cq_buf(dev, &cq->buf);
+
+err_db:
+       mlx5_db_free(&dev->mdev, &cq->db);
+       return err;
+}
+
+static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
+{
+       free_cq_buf(dev, &cq->buf);
+       mlx5_db_free(&dev->mdev, &cq->db);
+}
+
+struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
+                               int vector, struct ib_ucontext *context,
+                               struct ib_udata *udata)
+{
+       struct mlx5_create_cq_mbox_in *cqb = NULL;
+       struct mlx5_ib_dev *dev = to_mdev(ibdev);
+       struct mlx5_ib_cq *cq;
+       int uninitialized_var(index);
+       int uninitialized_var(inlen);
+       int cqe_size;
+       int irqn;
+       int eqn;
+       int err;
+
+       entries = roundup_pow_of_two(entries + 1);
+       if (entries < 1 || entries > dev->mdev.caps.max_cqes)
+               return ERR_PTR(-EINVAL);
+
+       cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+       if (!cq)
+               return ERR_PTR(-ENOMEM);
+
+       cq->ibcq.cqe = entries - 1;
+       mutex_init(&cq->resize_mutex);
+       spin_lock_init(&cq->lock);
+       cq->resize_buf = NULL;
+       cq->resize_umem = NULL;
+
+       if (context) {
+               err = create_cq_user(dev, udata, context, cq, entries,
+                                    &cqb, &cqe_size, &index, &inlen);
+               if (err)
+                       goto err_create;
+       } else {
+               /* for now choose 64 bytes till we have a proper interface */
+               cqe_size = 64;
+               err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb,
+                                      &index, &inlen);
+               if (err)
+                       goto err_create;
+       }
+
+       cq->cqe_size = cqe_size;
+       cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
+       cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index);
+       err = mlx5_vector2eqn(dev, vector, &eqn, &irqn);
+       if (err)
+               goto err_cqb;
+
+       cqb->ctx.c_eqn = cpu_to_be16(eqn);
+       cqb->ctx.db_record_addr = cpu_to_be64(cq->db.dma);
+
+       err = mlx5_core_create_cq(&dev->mdev, &cq->mcq, cqb, inlen);
+       if (err)
+               goto err_cqb;
+
+       mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn);
+       cq->mcq.irqn = irqn;
+       cq->mcq.comp  = mlx5_ib_cq_comp;
+       cq->mcq.event = mlx5_ib_cq_event;
+
+       if (context)
+               if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) {
+                       err = -EFAULT;
+                       goto err_cmd;
+               }
+
+
+       mlx5_vfree(cqb);
+       return &cq->ibcq;
+
+err_cmd:
+       mlx5_core_destroy_cq(&dev->mdev, &cq->mcq);
+
+err_cqb:
+       mlx5_vfree(cqb);
+       if (context)
+               destroy_cq_user(cq, context);
+       else
+               destroy_cq_kernel(dev, cq);
+
+err_create:
+       kfree(cq);
+
+       return ERR_PTR(err);
+}
+
+
+int mlx5_ib_destroy_cq(struct ib_cq *cq)
+{
+       struct mlx5_ib_dev *dev = to_mdev(cq->device);
+       struct mlx5_ib_cq *mcq = to_mcq(cq);
+       struct ib_ucontext *context = NULL;
+
+       if (cq->uobject)
+               context = cq->uobject->context;
+
+       mlx5_core_destroy_cq(&dev->mdev, &mcq->mcq);
+       if (context)
+               destroy_cq_user(mcq, context);
+       else
+               destroy_cq_kernel(dev, mcq);
+
+       kfree(mcq);
+
+       return 0;
+}
+
+static int is_equal_rsn(struct mlx5_cqe64 *cqe64, struct mlx5_ib_srq *srq,
+                       u32 rsn)
+{
+       u32 lrsn;
+
+       if (srq)
+               lrsn = be32_to_cpu(cqe64->srqn) & 0xffffff;
+       else
+               lrsn = be32_to_cpu(cqe64->sop_drop_qpn) & 0xffffff;
+
+       return rsn == lrsn;
+}
+
+void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
+{
+       struct mlx5_cqe64 *cqe64, *dest64;
+       void *cqe, *dest;
+       u32 prod_index;
+       int nfreed = 0;
+       u8 owner_bit;
+
+       if (!cq)
+               return;
+
+       /* First we need to find the current producer index, so we
+        * know where to start cleaning from.  It doesn't matter if HW
+        * adds new entries after this loop -- the QP we're worried
+        * about is already in RESET, so the new entries won't come
+        * from our QP and therefore don't need to be checked.
+        */
+       for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); prod_index++)
+               if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe)
+                       break;
+
+       /* Now sweep backwards through the CQ, removing CQ entries
+        * that match our QP by copying older entries on top of them.
+        */
+       while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
+               cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
+               cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
+               if (is_equal_rsn(cqe64, srq, rsn)) {
+                       if (srq)
+                               mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter));
+                       ++nfreed;
+               } else if (nfreed) {
+                       dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
+                       dest64 = (cq->mcq.cqe_sz == 64) ? dest : dest + 64;
+                       owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK;
+                       memcpy(dest, cqe, cq->mcq.cqe_sz);
+                       dest64->op_own = owner_bit |
+                               (dest64->op_own & ~MLX5_CQE_OWNER_MASK);
+               }
+       }
+
+       if (nfreed) {
+               cq->mcq.cons_index += nfreed;
+               /* Make sure update of buffer contents is done before
+                * updating consumer index.
+                */
+               wmb();
+               mlx5_cq_set_ci(&cq->mcq);
+       }
+}
+
+void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq)
+{
+       if (!cq)
+               return;
+
+       spin_lock_irq(&cq->lock);
+       __mlx5_ib_cq_clean(cq, qpn, srq);
+       spin_unlock_irq(&cq->lock);
+}
+
+int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
+{
+       return -ENOSYS;
+}
+
+int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
+{
+       return -ENOSYS;
+}
+
+int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq)
+{
+       struct mlx5_ib_cq *cq;
+
+       if (!ibcq)
+               return 128;
+
+       cq = to_mcq(ibcq);
+       return cq->cqe_size;
+}
diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c
new file mode 100644 (file)
index 0000000..256a233
--- /dev/null
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kref.h>
+#include <linux/slab.h>
+#include <rdma/ib_umem.h>
+
+#include "mlx5_ib.h"
+
+struct mlx5_ib_user_db_page {
+       struct list_head        list;
+       struct ib_umem         *umem;
+       unsigned long           user_virt;
+       int                     refcnt;
+};
+
+int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
+                       struct mlx5_db *db)
+{
+       struct mlx5_ib_user_db_page *page;
+       struct ib_umem_chunk *chunk;
+       int err = 0;
+
+       mutex_lock(&context->db_page_mutex);
+
+       list_for_each_entry(page, &context->db_page_list, list)
+               if (page->user_virt == (virt & PAGE_MASK))
+                       goto found;
+
+       page = kmalloc(sizeof(*page), GFP_KERNEL);
+       if (!page) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       page->user_virt = (virt & PAGE_MASK);
+       page->refcnt    = 0;
+       page->umem      = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
+                                     PAGE_SIZE, 0, 0);
+       if (IS_ERR(page->umem)) {
+               err = PTR_ERR(page->umem);
+               kfree(page);
+               goto out;
+       }
+
+       list_add(&page->list, &context->db_page_list);
+
+found:
+       chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list);
+       db->dma         = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK);
+       db->u.user_page = page;
+       ++page->refcnt;
+
+out:
+       mutex_unlock(&context->db_page_mutex);
+
+       return err;
+}
+
+void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db)
+{
+       mutex_lock(&context->db_page_mutex);
+
+       if (!--db->u.user_page->refcnt) {
+               list_del(&db->u.user_page->list);
+               ib_umem_release(db->u.user_page->umem);
+               kfree(db->u.user_page);
+       }
+
+       mutex_unlock(&context->db_page_mutex);
+}
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
new file mode 100644 (file)
index 0000000..5c8938b
--- /dev/null
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/cmd.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_smi.h>
+#include "mlx5_ib.h"
+
+enum {
+       MLX5_IB_VENDOR_CLASS1 = 0x9,
+       MLX5_IB_VENDOR_CLASS2 = 0xa
+};
+
+int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
+                int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
+                void *in_mad, void *response_mad)
+{
+       u8 op_modifier = 0;
+
+       /* Key check traps can't be generated unless we have in_wc to
+        * tell us where to send the trap.
+        */
+       if (ignore_mkey || !in_wc)
+               op_modifier |= 0x1;
+       if (ignore_bkey || !in_wc)
+               op_modifier |= 0x2;
+
+       return mlx5_core_mad_ifc(&dev->mdev, in_mad, response_mad, op_modifier, port);
+}
+
+int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+                       struct ib_wc *in_wc, struct ib_grh *in_grh,
+                       struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+       u16 slid;
+       int err;
+
+       slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
+
+       if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0)
+               return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+
+       if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+           in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+               if (in_mad->mad_hdr.method   != IB_MGMT_METHOD_GET &&
+                   in_mad->mad_hdr.method   != IB_MGMT_METHOD_SET &&
+                   in_mad->mad_hdr.method   != IB_MGMT_METHOD_TRAP_REPRESS)
+                       return IB_MAD_RESULT_SUCCESS;
+
+               /* Don't process SMInfo queries -- the SMA can't handle them.
+                */
+               if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO)
+                       return IB_MAD_RESULT_SUCCESS;
+       } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
+                  in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS1   ||
+                  in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS2   ||
+                  in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) {
+               if (in_mad->mad_hdr.method  != IB_MGMT_METHOD_GET &&
+                   in_mad->mad_hdr.method  != IB_MGMT_METHOD_SET)
+                       return IB_MAD_RESULT_SUCCESS;
+       } else {
+               return IB_MAD_RESULT_SUCCESS;
+       }
+
+       err = mlx5_MAD_IFC(to_mdev(ibdev),
+                          mad_flags & IB_MAD_IGNORE_MKEY,
+                          mad_flags & IB_MAD_IGNORE_BKEY,
+                          port_num, in_wc, in_grh, in_mad, out_mad);
+       if (err)
+               return IB_MAD_RESULT_FAILURE;
+
+       /* set return bit in status of directed route responses */
+       if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+               out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
+
+       if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
+               /* no response for trap repress */
+               return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+
+       return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
+{
+       struct ib_smp *in_mad  = NULL;
+       struct ib_smp *out_mad = NULL;
+       int err = -ENOMEM;
+       u16 packet_error;
+
+       in_mad  = kzalloc(sizeof(*in_mad), GFP_KERNEL);
+       out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
+       if (!in_mad || !out_mad)
+               goto out;
+
+       init_query_mad(in_mad);
+       in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
+       in_mad->attr_mod = cpu_to_be32(port);
+
+       err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
+
+       packet_error = be16_to_cpu(out_mad->status);
+
+       dev->mdev.caps.ext_port_cap[port - 1] = (!err && !packet_error) ?
+               MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0;
+
+out:
+       kfree(in_mad);
+       kfree(out_mad);
+       return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
new file mode 100644 (file)
index 0000000..6b1007f
--- /dev/null
@@ -0,0 +1,1504 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <asm-generic/kmap_types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/io-mapping.h>
+#include <linux/sched.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_umem.h>
+#include "user.h"
+#include "mlx5_ib.h"
+
+#define DRIVER_NAME "mlx5_ib"
+#define DRIVER_VERSION "1.0"
+#define DRIVER_RELDATE "June 2013"
+
+MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRIVER_VERSION);
+
+static int prof_sel = 2;
+module_param_named(prof_sel, prof_sel, int, 0444);
+MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
+
+static char mlx5_version[] =
+       DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
+       DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
+
+struct mlx5_profile profile[] = {
+       [0] = {
+               .mask           = 0,
+       },
+       [1] = {
+               .mask           = MLX5_PROF_MASK_QP_SIZE,
+               .log_max_qp     = 12,
+       },
+       [2] = {
+               .mask           = MLX5_PROF_MASK_QP_SIZE |
+                                 MLX5_PROF_MASK_MR_CACHE,
+               .log_max_qp     = 17,
+               .mr_cache[0]    = {
+                       .size   = 500,
+                       .limit  = 250
+               },
+               .mr_cache[1]    = {
+                       .size   = 500,
+                       .limit  = 250
+               },
+               .mr_cache[2]    = {
+                       .size   = 500,
+                       .limit  = 250
+               },
+               .mr_cache[3]    = {
+                       .size   = 500,
+                       .limit  = 250
+               },
+               .mr_cache[4]    = {
+                       .size   = 500,
+                       .limit  = 250
+               },
+               .mr_cache[5]    = {
+                       .size   = 500,
+                       .limit  = 250
+               },
+               .mr_cache[6]    = {
+                       .size   = 500,
+                       .limit  = 250
+               },
+               .mr_cache[7]    = {
+                       .size   = 500,
+                       .limit  = 250
+               },
+               .mr_cache[8]    = {
+                       .size   = 500,
+                       .limit  = 250
+               },
+               .mr_cache[9]    = {
+                       .size   = 500,
+                       .limit  = 250
+               },
+               .mr_cache[10]   = {
+                       .size   = 500,
+                       .limit  = 250
+               },
+               .mr_cache[11]   = {
+                       .size   = 500,
+                       .limit  = 250
+               },
+               .mr_cache[12]   = {
+                       .size   = 64,
+                       .limit  = 32
+               },
+               .mr_cache[13]   = {
+                       .size   = 32,
+                       .limit  = 16
+               },
+               .mr_cache[14]   = {
+                       .size   = 16,
+                       .limit  = 8
+               },
+               .mr_cache[15]   = {
+                       .size   = 8,
+                       .limit  = 4
+               },
+       },
+};
+
+int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn)
+{
+       struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
+       struct mlx5_eq *eq, *n;
+       int err = -ENOENT;
+
+       spin_lock(&table->lock);
+       list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
+               if (eq->index == vector) {
+                       *eqn = eq->eqn;
+                       *irqn = eq->irqn;
+                       err = 0;
+                       break;
+               }
+       }
+       spin_unlock(&table->lock);
+
+       return err;
+}
+
+static int alloc_comp_eqs(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
+       struct mlx5_eq *eq, *n;
+       int ncomp_vec;
+       int nent;
+       int err;
+       int i;
+
+       INIT_LIST_HEAD(&dev->eqs_list);
+       ncomp_vec = table->num_comp_vectors;
+       nent = MLX5_COMP_EQ_SIZE;
+       for (i = 0; i < ncomp_vec; i++) {
+               eq = kzalloc(sizeof(*eq), GFP_KERNEL);
+               if (!eq) {
+                       err = -ENOMEM;
+                       goto clean;
+               }
+
+               snprintf(eq->name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i);
+               err = mlx5_create_map_eq(&dev->mdev, eq,
+                                        i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
+                                        eq->name,
+                                        &dev->mdev.priv.uuari.uars[0]);
+               if (err) {
+                       kfree(eq);
+                       goto clean;
+               }
+               mlx5_ib_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
+               eq->index = i;
+               spin_lock(&table->lock);
+               list_add_tail(&eq->list, &dev->eqs_list);
+               spin_unlock(&table->lock);
+       }
+
+       dev->num_comp_vectors = ncomp_vec;
+       return 0;
+
+clean:
+       spin_lock(&table->lock);
+       list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
+               list_del(&eq->list);
+               spin_unlock(&table->lock);
+               if (mlx5_destroy_unmap_eq(&dev->mdev, eq))
+                       mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn);
+               kfree(eq);
+               spin_lock(&table->lock);
+       }
+       spin_unlock(&table->lock);
+       return err;
+}
+
+static void free_comp_eqs(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
+       struct mlx5_eq *eq, *n;
+
+       spin_lock(&table->lock);
+       list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
+               list_del(&eq->list);
+               spin_unlock(&table->lock);
+               if (mlx5_destroy_unmap_eq(&dev->mdev, eq))
+                       mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn);
+               kfree(eq);
+               spin_lock(&table->lock);
+       }
+       spin_unlock(&table->lock);
+}
+
+static int mlx5_ib_query_device(struct ib_device *ibdev,
+                               struct ib_device_attr *props)
+{
+       struct mlx5_ib_dev *dev = to_mdev(ibdev);
+       struct ib_smp *in_mad  = NULL;
+       struct ib_smp *out_mad = NULL;
+       int err = -ENOMEM;
+       int max_rq_sg;
+       int max_sq_sg;
+       u64 flags;
+
+       in_mad  = kzalloc(sizeof(*in_mad), GFP_KERNEL);
+       out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
+       if (!in_mad || !out_mad)
+               goto out;
+
+       init_query_mad(in_mad);
+       in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
+
+       err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad);
+       if (err)
+               goto out;
+
+       memset(props, 0, sizeof(*props));
+
+       props->fw_ver = ((u64)fw_rev_maj(&dev->mdev) << 32) |
+               (fw_rev_min(&dev->mdev) << 16) |
+               fw_rev_sub(&dev->mdev);
+       props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
+               IB_DEVICE_PORT_ACTIVE_EVENT             |
+               IB_DEVICE_SYS_IMAGE_GUID                |
+               IB_DEVICE_RC_RNR_NAK_GEN                |
+               IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+       flags = dev->mdev.caps.flags;
+       if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR)
+               props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
+       if (flags & MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR)
+               props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
+       if (flags & MLX5_DEV_CAP_FLAG_APM)
+               props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
+       props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
+       if (flags & MLX5_DEV_CAP_FLAG_XRC)
+               props->device_cap_flags |= IB_DEVICE_XRC;
+       props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+
+       props->vendor_id           = be32_to_cpup((__be32 *)(out_mad->data + 36)) &
+               0xffffff;
+       props->vendor_part_id      = be16_to_cpup((__be16 *)(out_mad->data + 30));
+       props->hw_ver              = be32_to_cpup((__be32 *)(out_mad->data + 32));
+       memcpy(&props->sys_image_guid, out_mad->data +  4, 8);
+
+       props->max_mr_size         = ~0ull;
+       props->page_size_cap       = dev->mdev.caps.min_page_sz;
+       props->max_qp              = 1 << dev->mdev.caps.log_max_qp;
+       props->max_qp_wr           = dev->mdev.caps.max_wqes;
+       max_rq_sg = dev->mdev.caps.max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg);
+       max_sq_sg = (dev->mdev.caps.max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) /
+               sizeof(struct mlx5_wqe_data_seg);
+       props->max_sge = min(max_rq_sg, max_sq_sg);
+       props->max_cq              = 1 << dev->mdev.caps.log_max_cq;
+       props->max_cqe             = dev->mdev.caps.max_cqes - 1;
+       props->max_mr              = 1 << dev->mdev.caps.log_max_mkey;
+       props->max_pd              = 1 << dev->mdev.caps.log_max_pd;
+       props->max_qp_rd_atom      = dev->mdev.caps.max_ra_req_qp;
+       props->max_qp_init_rd_atom = dev->mdev.caps.max_ra_res_qp;
+       props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
+       props->max_srq             = 1 << dev->mdev.caps.log_max_srq;
+       props->max_srq_wr          = dev->mdev.caps.max_srq_wqes - 1;
+       props->max_srq_sge         = max_rq_sg - 1;
+       props->max_fast_reg_page_list_len = (unsigned int)-1;
+       props->local_ca_ack_delay  = dev->mdev.caps.local_ca_ack_delay;
+       props->atomic_cap          = dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_ATOMIC ?
+               IB_ATOMIC_HCA : IB_ATOMIC_NONE;
+       props->masked_atomic_cap   = IB_ATOMIC_HCA;
+       props->max_pkeys           = be16_to_cpup((__be16 *)(out_mad->data + 28));
+       props->max_mcast_grp       = 1 << dev->mdev.caps.log_max_mcg;
+       props->max_mcast_qp_attach = dev->mdev.caps.max_qp_mcg;
+       props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
+                                          props->max_mcast_grp;
+       props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
+
+out:
+       kfree(in_mad);
+       kfree(out_mad);
+
+       return err;
+}
+
+int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
+                      struct ib_port_attr *props)
+{
+       struct mlx5_ib_dev *dev = to_mdev(ibdev);
+       struct ib_smp *in_mad  = NULL;
+       struct ib_smp *out_mad = NULL;
+       int ext_active_speed;
+       int err = -ENOMEM;
+
+       if (port < 1 || port > dev->mdev.caps.num_ports) {
+               mlx5_ib_warn(dev, "invalid port number %d\n", port);
+               return -EINVAL;
+       }
+
+       in_mad  = kzalloc(sizeof(*in_mad), GFP_KERNEL);
+       out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
+       if (!in_mad || !out_mad)
+               goto out;
+
+       memset(props, 0, sizeof(*props));
+
+       init_query_mad(in_mad);
+       in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
+       in_mad->attr_mod = cpu_to_be32(port);
+
+       err = mlx5_MAD_IFC(dev, 1, 1, port, NULL, NULL, in_mad, out_mad);
+       if (err) {
+               mlx5_ib_warn(dev, "err %d\n", err);
+               goto out;
+       }
+
+
+       props->lid              = be16_to_cpup((__be16 *)(out_mad->data + 16));
+       props->lmc              = out_mad->data[34] & 0x7;
+       props->sm_lid           = be16_to_cpup((__be16 *)(out_mad->data + 18));
+       props->sm_sl            = out_mad->data[36] & 0xf;
+       props->state            = out_mad->data[32] & 0xf;
+       props->phys_state       = out_mad->data[33] >> 4;
+       props->port_cap_flags   = be32_to_cpup((__be32 *)(out_mad->data + 20));
+       props->gid_tbl_len      = out_mad->data[50];
+       props->max_msg_sz       = 1 << to_mdev(ibdev)->mdev.caps.log_max_msg;
+       props->pkey_tbl_len     = to_mdev(ibdev)->mdev.caps.port[port - 1].pkey_table_len;
+       props->bad_pkey_cntr    = be16_to_cpup((__be16 *)(out_mad->data + 46));
+       props->qkey_viol_cntr   = be16_to_cpup((__be16 *)(out_mad->data + 48));
+       props->active_width     = out_mad->data[31] & 0xf;
+       props->active_speed     = out_mad->data[35] >> 4;
+       props->max_mtu          = out_mad->data[41] & 0xf;
+       props->active_mtu       = out_mad->data[36] >> 4;
+       props->subnet_timeout   = out_mad->data[51] & 0x1f;
+       props->max_vl_num       = out_mad->data[37] >> 4;
+       props->init_type_reply  = out_mad->data[41] >> 4;
+
+       /* Check if extended speeds (EDR/FDR/...) are supported */
+       if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
+               ext_active_speed = out_mad->data[62] >> 4;
+
+               switch (ext_active_speed) {
+               case 1:
+                       props->active_speed = 16; /* FDR */
+                       break;
+               case 2:
+                       props->active_speed = 32; /* EDR */
+                       break;
+               }
+       }
+
+       /* If reported active speed is QDR, check if is FDR-10 */
+       if (props->active_speed == 4) {
+               if (dev->mdev.caps.ext_port_cap[port - 1] &
+                   MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) {
+                       init_query_mad(in_mad);
+                       in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
+                       in_mad->attr_mod = cpu_to_be32(port);
+
+                       err = mlx5_MAD_IFC(dev, 1, 1, port,
+                                          NULL, NULL, in_mad, out_mad);
+                       if (err)
+                               goto out;
+
+                       /* Checking LinkSpeedActive for FDR-10 */
+                       if (out_mad->data[15] & 0x1)
+                               props->active_speed = 8;
+               }
+       }
+
+out:
+       kfree(in_mad);
+       kfree(out_mad);
+
+       return err;
+}
+
+static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+                            union ib_gid *gid)
+{
+       struct ib_smp *in_mad  = NULL;
+       struct ib_smp *out_mad = NULL;
+       int err = -ENOMEM;
+
+       in_mad  = kzalloc(sizeof(*in_mad), GFP_KERNEL);
+       out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
+       if (!in_mad || !out_mad)
+               goto out;
+
+       init_query_mad(in_mad);
+       in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
+       in_mad->attr_mod = cpu_to_be32(port);
+
+       err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+       if (err)
+               goto out;
+
+       memcpy(gid->raw, out_mad->data + 8, 8);
+
+       init_query_mad(in_mad);
+       in_mad->attr_id  = IB_SMP_ATTR_GUID_INFO;
+       in_mad->attr_mod = cpu_to_be32(index / 8);
+
+       err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+       if (err)
+               goto out;
+
+       memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
+
+out:
+       kfree(in_mad);
+       kfree(out_mad);
+       return err;
+}
+
+static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+                             u16 *pkey)
+{
+       struct ib_smp *in_mad  = NULL;
+       struct ib_smp *out_mad = NULL;
+       int err = -ENOMEM;
+
+       in_mad  = kzalloc(sizeof(*in_mad), GFP_KERNEL);
+       out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
+       if (!in_mad || !out_mad)
+               goto out;
+
+       init_query_mad(in_mad);
+       in_mad->attr_id  = IB_SMP_ATTR_PKEY_TABLE;
+       in_mad->attr_mod = cpu_to_be32(index / 32);
+
+       err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+       if (err)
+               goto out;
+
+       *pkey = be16_to_cpu(((__be16 *)out_mad->data)[index % 32]);
+
+out:
+       kfree(in_mad);
+       kfree(out_mad);
+       return err;
+}
+
+struct mlx5_reg_node_desc {
+       u8      desc[64];
+};
+
+static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
+                                struct ib_device_modify *props)
+{
+       struct mlx5_ib_dev *dev = to_mdev(ibdev);
+       struct mlx5_reg_node_desc in;
+       struct mlx5_reg_node_desc out;
+       int err;
+
+       if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
+               return -EOPNOTSUPP;
+
+       if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
+               return 0;
+
+       /*
+        * If possible, pass node desc to FW, so it can generate
+        * a 144 trap.  If cmd fails, just ignore.
+        */
+       memcpy(&in, props->node_desc, 64);
+       err = mlx5_core_access_reg(&dev->mdev, &in, sizeof(in), &out,
+                                  sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
+       if (err)
+               return err;
+
+       memcpy(ibdev->node_desc, props->node_desc, 64);
+
+       return err;
+}
+
+static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
+                              struct ib_port_modify *props)
+{
+       struct mlx5_ib_dev *dev = to_mdev(ibdev);
+       struct ib_port_attr attr;
+       u32 tmp;
+       int err;
+
+       mutex_lock(&dev->cap_mask_mutex);
+
+       err = mlx5_ib_query_port(ibdev, port, &attr);
+       if (err)
+               goto out;
+
+       tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
+               ~props->clr_port_cap_mask;
+
+       err = mlx5_set_port_caps(&dev->mdev, port, tmp);
+
+out:
+       mutex_unlock(&dev->cap_mask_mutex);
+       return err;
+}
+
+static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
+                                                 struct ib_udata *udata)
+{
+       struct mlx5_ib_dev *dev = to_mdev(ibdev);
+       struct mlx5_ib_alloc_ucontext_req req;
+       struct mlx5_ib_alloc_ucontext_resp resp;
+       struct mlx5_ib_ucontext *context;
+       struct mlx5_uuar_info *uuari;
+       struct mlx5_uar *uars;
+       int num_uars;
+       int uuarn;
+       int err;
+       int i;
+
+       if (!dev->ib_active)
+               return ERR_PTR(-EAGAIN);
+
+       err = ib_copy_from_udata(&req, udata, sizeof(req));
+       if (err)
+               return ERR_PTR(err);
+
+       if (req.total_num_uuars > MLX5_MAX_UUARS)
+               return ERR_PTR(-ENOMEM);
+
+       if (req.total_num_uuars == 0)
+               return ERR_PTR(-EINVAL);
+
+       req.total_num_uuars = ALIGN(req.total_num_uuars, MLX5_BF_REGS_PER_PAGE);
+       if (req.num_low_latency_uuars > req.total_num_uuars - 1)
+               return ERR_PTR(-EINVAL);
+
+       num_uars = req.total_num_uuars / MLX5_BF_REGS_PER_PAGE;
+       resp.qp_tab_size      = 1 << dev->mdev.caps.log_max_qp;
+       resp.bf_reg_size      = dev->mdev.caps.bf_reg_size;
+       resp.cache_line_size  = L1_CACHE_BYTES;
+       resp.max_sq_desc_sz = dev->mdev.caps.max_sq_desc_sz;
+       resp.max_rq_desc_sz = dev->mdev.caps.max_rq_desc_sz;
+       resp.max_send_wqebb = dev->mdev.caps.max_wqes;
+       resp.max_recv_wr = dev->mdev.caps.max_wqes;
+       resp.max_srq_recv_wr = dev->mdev.caps.max_srq_wqes;
+
+       context = kzalloc(sizeof(*context), GFP_KERNEL);
+       if (!context)
+               return ERR_PTR(-ENOMEM);
+
+       uuari = &context->uuari;
+       mutex_init(&uuari->lock);
+       uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
+       if (!uars) {
+               err = -ENOMEM;
+               goto out_ctx;
+       }
+
+       uuari->bitmap = kcalloc(BITS_TO_LONGS(req.total_num_uuars),
+                               sizeof(*uuari->bitmap),
+                               GFP_KERNEL);
+       if (!uuari->bitmap) {
+               err = -ENOMEM;
+               goto out_uar_ctx;
+       }
+       /*
+        * clear all fast path uuars
+        */
+       for (i = 0; i < req.total_num_uuars; i++) {
+               uuarn = i & 3;
+               if (uuarn == 2 || uuarn == 3)
+                       set_bit(i, uuari->bitmap);
+       }
+
+       uuari->count = kcalloc(req.total_num_uuars, sizeof(*uuari->count), GFP_KERNEL);
+       if (!uuari->count) {
+               err = -ENOMEM;
+               goto out_bitmap;
+       }
+
+       for (i = 0; i < num_uars; i++) {
+               err = mlx5_cmd_alloc_uar(&dev->mdev, &uars[i].index);
+               if (err)
+                       goto out_count;
+       }
+
+       INIT_LIST_HEAD(&context->db_page_list);
+       mutex_init(&context->db_page_mutex);
+
+       resp.tot_uuars = req.total_num_uuars;
+       resp.num_ports = dev->mdev.caps.num_ports;
+       err = ib_copy_to_udata(udata, &resp, sizeof(resp));
+       if (err)
+               goto out_uars;
+
+       uuari->num_low_latency_uuars = req.num_low_latency_uuars;
+       uuari->uars = uars;
+       uuari->num_uars = num_uars;
+       return &context->ibucontext;
+
+out_uars:
+       for (i--; i >= 0; i--)
+               mlx5_cmd_free_uar(&dev->mdev, uars[i].index);
+out_count:
+       kfree(uuari->count);
+
+out_bitmap:
+       kfree(uuari->bitmap);
+
+out_uar_ctx:
+       kfree(uars);
+
+out_ctx:
+       kfree(context);
+       return ERR_PTR(err);
+}
+
+static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
+{
+       struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
+       struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
+       struct mlx5_uuar_info *uuari = &context->uuari;
+       int i;
+
+       for (i = 0; i < uuari->num_uars; i++) {
+               if (mlx5_cmd_free_uar(&dev->mdev, uuari->uars[i].index))
+                       mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
+       }
+
+       kfree(uuari->count);
+       kfree(uuari->bitmap);
+       kfree(uuari->uars);
+       kfree(context);
+
+       return 0;
+}
+
+static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
+{
+       return (pci_resource_start(dev->mdev.pdev, 0) >> PAGE_SHIFT) + index;
+}
+
+static int get_command(unsigned long offset)
+{
+       return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
+}
+
+static int get_arg(unsigned long offset)
+{
+       return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
+}
+
+static int get_index(unsigned long offset)
+{
+       return get_arg(offset);
+}
+
+static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
+{
+       struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
+       struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
+       struct mlx5_uuar_info *uuari = &context->uuari;
+       unsigned long command;
+       unsigned long idx;
+       phys_addr_t pfn;
+
+       command = get_command(vma->vm_pgoff);
+       switch (command) {
+       case MLX5_IB_MMAP_REGULAR_PAGE:
+               if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+                       return -EINVAL;
+
+               idx = get_index(vma->vm_pgoff);
+               pfn = uar_index2pfn(dev, uuari->uars[idx].index);
+               mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
+                           (unsigned long long)pfn);
+
+               if (idx >= uuari->num_uars)
+                       return -EINVAL;
+
+               vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+               if (io_remap_pfn_range(vma, vma->vm_start, pfn,
+                                      PAGE_SIZE, vma->vm_page_prot))
+                       return -EAGAIN;
+
+               mlx5_ib_dbg(dev, "mapped WC at 0x%lx, PA 0x%llx\n",
+                           vma->vm_start,
+                           (unsigned long long)pfn << PAGE_SHIFT);
+               break;
+
+       case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
+               return -ENOSYS;
+
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
+{
+       struct mlx5_create_mkey_mbox_in *in;
+       struct mlx5_mkey_seg *seg;
+       struct mlx5_core_mr mr;
+       int err;
+
+       in = kzalloc(sizeof(*in), GFP_KERNEL);
+       if (!in)
+               return -ENOMEM;
+
+       seg = &in->seg;
+       seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA;
+       seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
+       seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+       seg->start_addr = 0;
+
+       err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in));
+       if (err) {
+               mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
+               goto err_in;
+       }
+
+       kfree(in);
+       *key = mr.key;
+
+       return 0;
+
+err_in:
+       kfree(in);
+
+       return err;
+}
+
+static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key)
+{
+       struct mlx5_core_mr mr;
+       int err;
+
+       memset(&mr, 0, sizeof(mr));
+       mr.key = key;
+       err = mlx5_core_destroy_mkey(&dev->mdev, &mr);
+       if (err)
+               mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key);
+}
+
+static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
+                                     struct ib_ucontext *context,
+                                     struct ib_udata *udata)
+{
+       struct mlx5_ib_alloc_pd_resp resp;
+       struct mlx5_ib_pd *pd;
+       int err;
+
+       pd = kmalloc(sizeof(*pd), GFP_KERNEL);
+       if (!pd)
+               return ERR_PTR(-ENOMEM);
+
+       err = mlx5_core_alloc_pd(&to_mdev(ibdev)->mdev, &pd->pdn);
+       if (err) {
+               kfree(pd);
+               return ERR_PTR(err);
+       }
+
+       if (context) {
+               resp.pdn = pd->pdn;
+               if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
+                       mlx5_core_dealloc_pd(&to_mdev(ibdev)->mdev, pd->pdn);
+                       kfree(pd);
+                       return ERR_PTR(-EFAULT);
+               }
+       } else {
+               err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn);
+               if (err) {
+                       mlx5_core_dealloc_pd(&to_mdev(ibdev)->mdev, pd->pdn);
+                       kfree(pd);
+                       return ERR_PTR(err);
+               }
+       }
+
+       return &pd->ibpd;
+}
+
+static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
+{
+       struct mlx5_ib_dev *mdev = to_mdev(pd->device);
+       struct mlx5_ib_pd *mpd = to_mpd(pd);
+
+       if (!pd->uobject)
+               free_pa_mkey(mdev, mpd->pa_lkey);
+
+       mlx5_core_dealloc_pd(&mdev->mdev, mpd->pdn);
+       kfree(mpd);
+
+       return 0;
+}
+
+static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+       struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+       int err;
+
+       err = mlx5_core_attach_mcg(&dev->mdev, gid, ibqp->qp_num);
+       if (err)
+               mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
+                            ibqp->qp_num, gid->raw);
+
+       return err;
+}
+
+static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+       struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+       int err;
+
+       err = mlx5_core_detach_mcg(&dev->mdev, gid, ibqp->qp_num);
+       if (err)
+               mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
+                            ibqp->qp_num, gid->raw);
+
+       return err;
+}
+
+static int init_node_data(struct mlx5_ib_dev *dev)
+{
+       struct ib_smp *in_mad  = NULL;
+       struct ib_smp *out_mad = NULL;
+       int err = -ENOMEM;
+
+       in_mad  = kzalloc(sizeof(*in_mad), GFP_KERNEL);
+       out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
+       if (!in_mad || !out_mad)
+               goto out;
+
+       init_query_mad(in_mad);
+       in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
+
+       err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
+       if (err)
+               goto out;
+
+       memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
+
+       in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
+
+       err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
+       if (err)
+               goto out;
+
+       dev->mdev.rev_id = be32_to_cpup((__be32 *)(out_mad->data + 32));
+       memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
+
+out:
+       kfree(in_mad);
+       kfree(out_mad);
+       return err;
+}
+
+static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
+                            char *buf)
+{
+       struct mlx5_ib_dev *dev =
+               container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+
+       return sprintf(buf, "%d\n", dev->mdev.priv.fw_pages);
+}
+
+static ssize_t show_reg_pages(struct device *device,
+                             struct device_attribute *attr, char *buf)
+{
+       struct mlx5_ib_dev *dev =
+               container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+
+       return sprintf(buf, "%d\n", dev->mdev.priv.reg_pages);
+}
+
+static ssize_t show_hca(struct device *device, struct device_attribute *attr,
+                       char *buf)
+{
+       struct mlx5_ib_dev *dev =
+               container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+       return sprintf(buf, "MT%d\n", dev->mdev.pdev->device);
+}
+
+static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
+                          char *buf)
+{
+       struct mlx5_ib_dev *dev =
+               container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+       return sprintf(buf, "%d.%d.%d\n", fw_rev_maj(&dev->mdev),
+                      fw_rev_min(&dev->mdev), fw_rev_sub(&dev->mdev));
+}
+
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+                       char *buf)
+{
+       struct mlx5_ib_dev *dev =
+               container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+       return sprintf(buf, "%x\n", dev->mdev.rev_id);
+}
+
+static ssize_t show_board(struct device *device, struct device_attribute *attr,
+                         char *buf)
+{
+       struct mlx5_ib_dev *dev =
+               container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+       return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
+                      dev->mdev.board_id);
+}
+
+static DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
+static DEVICE_ATTR(fw_ver,   S_IRUGO, show_fw_ver, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
+static DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
+static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
+static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
+
+static struct device_attribute *mlx5_class_attributes[] = {
+       &dev_attr_hw_rev,
+       &dev_attr_fw_ver,
+       &dev_attr_hca_type,
+       &dev_attr_board_id,
+       &dev_attr_fw_pages,
+       &dev_attr_reg_pages,
+};
+
+static void mlx5_ib_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
+                         void *data)
+{
+       struct mlx5_ib_dev *ibdev = container_of(dev, struct mlx5_ib_dev, mdev);
+       struct ib_event ibev;
+       u8 port = 0;
+
+       switch (event) {
+       case MLX5_DEV_EVENT_SYS_ERROR:
+               ibdev->ib_active = false;
+               ibev.event = IB_EVENT_DEVICE_FATAL;
+               break;
+
+       case MLX5_DEV_EVENT_PORT_UP:
+               ibev.event = IB_EVENT_PORT_ACTIVE;
+               port = *(u8 *)data;
+               break;
+
+       case MLX5_DEV_EVENT_PORT_DOWN:
+               ibev.event = IB_EVENT_PORT_ERR;
+               port = *(u8 *)data;
+               break;
+
+       case MLX5_DEV_EVENT_PORT_INITIALIZED:
+               /* not used by ULPs */
+               return;
+
+       case MLX5_DEV_EVENT_LID_CHANGE:
+               ibev.event = IB_EVENT_LID_CHANGE;
+               port = *(u8 *)data;
+               break;
+
+       case MLX5_DEV_EVENT_PKEY_CHANGE:
+               ibev.event = IB_EVENT_PKEY_CHANGE;
+               port = *(u8 *)data;
+               break;
+
+       case MLX5_DEV_EVENT_GUID_CHANGE:
+               ibev.event = IB_EVENT_GID_CHANGE;
+               port = *(u8 *)data;
+               break;
+
+       case MLX5_DEV_EVENT_CLIENT_REREG:
+               ibev.event = IB_EVENT_CLIENT_REREGISTER;
+               port = *(u8 *)data;
+               break;
+       }
+
+       ibev.device           = &ibdev->ib_dev;
+       ibev.element.port_num = port;
+
+       if (ibdev->ib_active)
+               ib_dispatch_event(&ibev);
+}
+
+static void get_ext_port_caps(struct mlx5_ib_dev *dev)
+{
+       int port;
+
+       for (port = 1; port <= dev->mdev.caps.num_ports; port++)
+               mlx5_query_ext_port_caps(dev, port);
+}
+
+static int get_port_caps(struct mlx5_ib_dev *dev)
+{
+       struct ib_device_attr *dprops = NULL;
+       struct ib_port_attr *pprops = NULL;
+       int err = 0;
+       int port;
+
+       pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
+       if (!pprops)
+               goto out;
+
+       dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
+       if (!dprops)
+               goto out;
+
+       err = mlx5_ib_query_device(&dev->ib_dev, dprops);
+       if (err) {
+               mlx5_ib_warn(dev, "query_device failed %d\n", err);
+               goto out;
+       }
+
+       for (port = 1; port <= dev->mdev.caps.num_ports; port++) {
+               err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
+               if (err) {
+                       mlx5_ib_warn(dev, "query_port %d failed %d\n", port, err);
+                       break;
+               }
+               dev->mdev.caps.port[port - 1].pkey_table_len = dprops->max_pkeys;
+               dev->mdev.caps.port[port - 1].gid_table_len = pprops->gid_tbl_len;
+               mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
+                           dprops->max_pkeys, pprops->gid_tbl_len);
+       }
+
+out:
+       kfree(pprops);
+       kfree(dprops);
+
+       return err;
+}
+
+static void destroy_umrc_res(struct mlx5_ib_dev *dev)
+{
+       int err;
+
+       err = mlx5_mr_cache_cleanup(dev);
+       if (err)
+               mlx5_ib_warn(dev, "mr cache cleanup failed\n");
+
+       mlx5_ib_destroy_qp(dev->umrc.qp);
+       ib_destroy_cq(dev->umrc.cq);
+       ib_dereg_mr(dev->umrc.mr);
+       ib_dealloc_pd(dev->umrc.pd);
+}
+
+enum {
+       MAX_UMR_WR = 128,
+};
+
+static int create_umr_res(struct mlx5_ib_dev *dev)
+{
+       struct ib_qp_init_attr *init_attr = NULL;
+       struct ib_qp_attr *attr = NULL;
+       struct ib_pd *pd;
+       struct ib_cq *cq;
+       struct ib_qp *qp;
+       struct ib_mr *mr;
+       int ret;
+
+       attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+       init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
+       if (!attr || !init_attr) {
+               ret = -ENOMEM;
+               goto error_0;
+       }
+
+       pd = ib_alloc_pd(&dev->ib_dev);
+       if (IS_ERR(pd)) {
+               mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
+               ret = PTR_ERR(pd);
+               goto error_0;
+       }
+
+       mr = ib_get_dma_mr(pd,  IB_ACCESS_LOCAL_WRITE);
+       if (IS_ERR(mr)) {
+               mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n");
+               ret = PTR_ERR(mr);
+               goto error_1;
+       }
+
+       cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL, 128,
+                         0);
+       if (IS_ERR(cq)) {
+               mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
+               ret = PTR_ERR(cq);
+               goto error_2;
+       }
+       ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+
+       init_attr->send_cq = cq;
+       init_attr->recv_cq = cq;
+       init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
+       init_attr->cap.max_send_wr = MAX_UMR_WR;
+       init_attr->cap.max_send_sge = 1;
+       init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
+       init_attr->port_num = 1;
+       qp = mlx5_ib_create_qp(pd, init_attr, NULL);
+       if (IS_ERR(qp)) {
+               mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
+               ret = PTR_ERR(qp);
+               goto error_3;
+       }
+       qp->device     = &dev->ib_dev;
+       qp->real_qp    = qp;
+       qp->uobject    = NULL;
+       qp->qp_type    = MLX5_IB_QPT_REG_UMR;
+
+       attr->qp_state = IB_QPS_INIT;
+       attr->port_num = 1;
+       ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
+                               IB_QP_PORT, NULL);
+       if (ret) {
+               mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
+               goto error_4;
+       }
+
+       memset(attr, 0, sizeof(*attr));
+       attr->qp_state = IB_QPS_RTR;
+       attr->path_mtu = IB_MTU_256;
+
+       ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
+       if (ret) {
+               mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
+               goto error_4;
+       }
+
+       memset(attr, 0, sizeof(*attr));
+       attr->qp_state = IB_QPS_RTS;
+       ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
+       if (ret) {
+               mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
+               goto error_4;
+       }
+
+       dev->umrc.qp = qp;
+       dev->umrc.cq = cq;
+       dev->umrc.mr = mr;
+       dev->umrc.pd = pd;
+
+       sema_init(&dev->umrc.sem, MAX_UMR_WR);
+       ret = mlx5_mr_cache_init(dev);
+       if (ret) {
+               mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
+               goto error_4;
+       }
+
+       kfree(attr);
+       kfree(init_attr);
+
+       return 0;
+
+error_4:
+       mlx5_ib_destroy_qp(qp);
+
+error_3:
+       ib_destroy_cq(cq);
+
+error_2:
+       ib_dereg_mr(mr);
+
+error_1:
+       ib_dealloc_pd(pd);
+
+error_0:
+       kfree(attr);
+       kfree(init_attr);
+       return ret;
+}
+
+static int create_dev_resources(struct mlx5_ib_resources *devr)
+{
+       struct ib_srq_init_attr attr;
+       struct mlx5_ib_dev *dev;
+       int ret = 0;
+
+       dev = container_of(devr, struct mlx5_ib_dev, devr);
+
+       devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
+       if (IS_ERR(devr->p0)) {
+               ret = PTR_ERR(devr->p0);
+               goto error0;
+       }
+       devr->p0->device  = &dev->ib_dev;
+       devr->p0->uobject = NULL;
+       atomic_set(&devr->p0->usecnt, 0);
+
+       devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, 1, 0, NULL, NULL);
+       if (IS_ERR(devr->c0)) {
+               ret = PTR_ERR(devr->c0);
+               goto error1;
+       }
+       devr->c0->device        = &dev->ib_dev;
+       devr->c0->uobject       = NULL;
+       devr->c0->comp_handler  = NULL;
+       devr->c0->event_handler = NULL;
+       devr->c0->cq_context    = NULL;
+       atomic_set(&devr->c0->usecnt, 0);
+
+       devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
+       if (IS_ERR(devr->x0)) {
+               ret = PTR_ERR(devr->x0);
+               goto error2;
+       }
+       devr->x0->device = &dev->ib_dev;
+       devr->x0->inode = NULL;
+       atomic_set(&devr->x0->usecnt, 0);
+       mutex_init(&devr->x0->tgt_qp_mutex);
+       INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
+
+       devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
+       if (IS_ERR(devr->x1)) {
+               ret = PTR_ERR(devr->x1);
+               goto error3;
+       }
+       devr->x1->device = &dev->ib_dev;
+       devr->x1->inode = NULL;
+       atomic_set(&devr->x1->usecnt, 0);
+       mutex_init(&devr->x1->tgt_qp_mutex);
+       INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
+
+       memset(&attr, 0, sizeof(attr));
+       attr.attr.max_sge = 1;
+       attr.attr.max_wr = 1;
+       attr.srq_type = IB_SRQT_XRC;
+       attr.ext.xrc.cq = devr->c0;
+       attr.ext.xrc.xrcd = devr->x0;
+
+       devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
+       if (IS_ERR(devr->s0)) {
+               ret = PTR_ERR(devr->s0);
+               goto error4;
+       }
+       devr->s0->device        = &dev->ib_dev;
+       devr->s0->pd            = devr->p0;
+       devr->s0->uobject       = NULL;
+       devr->s0->event_handler = NULL;
+       devr->s0->srq_context   = NULL;
+       devr->s0->srq_type      = IB_SRQT_XRC;
+       devr->s0->ext.xrc.xrcd  = devr->x0;
+       devr->s0->ext.xrc.cq    = devr->c0;
+       atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
+       atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
+       atomic_inc(&devr->p0->usecnt);
+       atomic_set(&devr->s0->usecnt, 0);
+
+       return 0;
+
+error4:
+       mlx5_ib_dealloc_xrcd(devr->x1);
+error3:
+       mlx5_ib_dealloc_xrcd(devr->x0);
+error2:
+       mlx5_ib_destroy_cq(devr->c0);
+error1:
+       mlx5_ib_dealloc_pd(devr->p0);
+error0:
+       return ret;
+}
+
+static void destroy_dev_resources(struct mlx5_ib_resources *devr)
+{
+       mlx5_ib_destroy_srq(devr->s0);
+       mlx5_ib_dealloc_xrcd(devr->x0);
+       mlx5_ib_dealloc_xrcd(devr->x1);
+       mlx5_ib_destroy_cq(devr->c0);
+       mlx5_ib_dealloc_pd(devr->p0);
+}
+
+static int init_one(struct pci_dev *pdev,
+                   const struct pci_device_id *id)
+{
+       struct mlx5_core_dev *mdev;
+       struct mlx5_ib_dev *dev;
+       int err;
+       int i;
+
+       printk_once(KERN_INFO "%s", mlx5_version);
+
+       dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
+       if (!dev)
+               return -ENOMEM;
+
+       mdev = &dev->mdev;
+       mdev->event = mlx5_ib_event;
+       if (prof_sel >= ARRAY_SIZE(profile)) {
+               pr_warn("selected pofile out of range, selceting default\n");
+               prof_sel = 0;
+       }
+       mdev->profile = &profile[prof_sel];
+       err = mlx5_dev_init(mdev, pdev);
+       if (err)
+               goto err_free;
+
+       err = get_port_caps(dev);
+       if (err)
+               goto err_cleanup;
+
+       get_ext_port_caps(dev);
+
+       err = alloc_comp_eqs(dev);
+       if (err)
+               goto err_cleanup;
+
+       MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
+
+       strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
+       dev->ib_dev.owner               = THIS_MODULE;
+       dev->ib_dev.node_type           = RDMA_NODE_IB_CA;
+       dev->ib_dev.local_dma_lkey      = mdev->caps.reserved_lkey;
+       dev->num_ports          = mdev->caps.num_ports;
+       dev->ib_dev.phys_port_cnt     = dev->num_ports;
+       dev->ib_dev.num_comp_vectors    = dev->num_comp_vectors;
+       dev->ib_dev.dma_device  = &mdev->pdev->dev;
+
+       dev->ib_dev.uverbs_abi_ver      = MLX5_IB_UVERBS_ABI_VERSION;
+       dev->ib_dev.uverbs_cmd_mask     =
+               (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
+               (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
+               (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
+               (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
+               (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
+               (1ull << IB_USER_VERBS_CMD_REG_MR)              |
+               (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
+               (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+               (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
+               (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
+               (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
+               (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
+               (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
+               (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
+               (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
+               (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
+               (1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
+               (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
+               (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
+               (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
+               (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
+               (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)         |
+               (1ull << IB_USER_VERBS_CMD_OPEN_QP);
+
+       dev->ib_dev.query_device        = mlx5_ib_query_device;
+       dev->ib_dev.query_port          = mlx5_ib_query_port;
+       dev->ib_dev.query_gid           = mlx5_ib_query_gid;
+       dev->ib_dev.query_pkey          = mlx5_ib_query_pkey;
+       dev->ib_dev.modify_device       = mlx5_ib_modify_device;
+       dev->ib_dev.modify_port         = mlx5_ib_modify_port;
+       dev->ib_dev.alloc_ucontext      = mlx5_ib_alloc_ucontext;
+       dev->ib_dev.dealloc_ucontext    = mlx5_ib_dealloc_ucontext;
+       dev->ib_dev.mmap                = mlx5_ib_mmap;
+       dev->ib_dev.alloc_pd            = mlx5_ib_alloc_pd;
+       dev->ib_dev.dealloc_pd          = mlx5_ib_dealloc_pd;
+       dev->ib_dev.create_ah           = mlx5_ib_create_ah;
+       dev->ib_dev.query_ah            = mlx5_ib_query_ah;
+       dev->ib_dev.destroy_ah          = mlx5_ib_destroy_ah;
+       dev->ib_dev.create_srq          = mlx5_ib_create_srq;
+       dev->ib_dev.modify_srq          = mlx5_ib_modify_srq;
+       dev->ib_dev.query_srq           = mlx5_ib_query_srq;
+       dev->ib_dev.destroy_srq         = mlx5_ib_destroy_srq;
+       dev->ib_dev.post_srq_recv       = mlx5_ib_post_srq_recv;
+       dev->ib_dev.create_qp           = mlx5_ib_create_qp;
+       dev->ib_dev.modify_qp           = mlx5_ib_modify_qp;
+       dev->ib_dev.query_qp            = mlx5_ib_query_qp;
+       dev->ib_dev.destroy_qp          = mlx5_ib_destroy_qp;
+       dev->ib_dev.post_send           = mlx5_ib_post_send;
+       dev->ib_dev.post_recv           = mlx5_ib_post_recv;
+       dev->ib_dev.create_cq           = mlx5_ib_create_cq;
+       dev->ib_dev.modify_cq           = mlx5_ib_modify_cq;
+       dev->ib_dev.resize_cq           = mlx5_ib_resize_cq;
+       dev->ib_dev.destroy_cq          = mlx5_ib_destroy_cq;
+       dev->ib_dev.poll_cq             = mlx5_ib_poll_cq;
+       dev->ib_dev.req_notify_cq       = mlx5_ib_arm_cq;
+       dev->ib_dev.get_dma_mr          = mlx5_ib_get_dma_mr;
+       dev->ib_dev.reg_user_mr         = mlx5_ib_reg_user_mr;
+       dev->ib_dev.dereg_mr            = mlx5_ib_dereg_mr;
+       dev->ib_dev.attach_mcast        = mlx5_ib_mcg_attach;
+       dev->ib_dev.detach_mcast        = mlx5_ib_mcg_detach;
+       dev->ib_dev.process_mad         = mlx5_ib_process_mad;
+       dev->ib_dev.alloc_fast_reg_mr   = mlx5_ib_alloc_fast_reg_mr;
+       dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
+       dev->ib_dev.free_fast_reg_page_list  = mlx5_ib_free_fast_reg_page_list;
+
+       if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) {
+               dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
+               dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
+               dev->ib_dev.uverbs_cmd_mask |=
+                       (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
+                       (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
+       }
+
+       err = init_node_data(dev);
+       if (err)
+               goto err_eqs;
+
+       mutex_init(&dev->cap_mask_mutex);
+       spin_lock_init(&dev->mr_lock);
+
+       err = create_dev_resources(&dev->devr);
+       if (err)
+               goto err_eqs;
+
+       if (ib_register_device(&dev->ib_dev, NULL))
+               goto err_rsrc;
+
+       err = create_umr_res(dev);
+       if (err)
+               goto err_dev;
+
+       for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
+               if (device_create_file(&dev->ib_dev.dev,
+                                      mlx5_class_attributes[i]))
+                       goto err_umrc;
+       }
+
+       dev->ib_active = true;
+
+       return 0;
+
+err_umrc:
+       destroy_umrc_res(dev);
+
+err_dev:
+       ib_unregister_device(&dev->ib_dev);
+
+err_rsrc:
+       destroy_dev_resources(&dev->devr);
+
+err_eqs:
+       free_comp_eqs(dev);
+
+err_cleanup:
+       mlx5_dev_cleanup(mdev);
+
+err_free:
+       ib_dealloc_device((struct ib_device *)dev);
+
+       return err;
+}
+
+static void remove_one(struct pci_dev *pdev)
+{
+       struct mlx5_ib_dev *dev = mlx5_pci2ibdev(pdev);
+
+       destroy_umrc_res(dev);
+       ib_unregister_device(&dev->ib_dev);
+       destroy_dev_resources(&dev->devr);
+       free_comp_eqs(dev);
+       mlx5_dev_cleanup(&dev->mdev);
+       ib_dealloc_device(&dev->ib_dev);
+}
+
+static DEFINE_PCI_DEVICE_TABLE(mlx5_ib_pci_table) = {
+       { PCI_VDEVICE(MELLANOX, 4113) }, /* MT4113 Connect-IB */
+       { 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, mlx5_ib_pci_table);
+
+static struct pci_driver mlx5_ib_driver = {
+       .name           = DRIVER_NAME,
+       .id_table       = mlx5_ib_pci_table,
+       .probe          = init_one,
+       .remove         = remove_one
+};
+
+static int __init mlx5_ib_init(void)
+{
+       return pci_register_driver(&mlx5_ib_driver);
+}
+
+static void __exit mlx5_ib_cleanup(void)
+{
+       pci_unregister_driver(&mlx5_ib_driver);
+}
+
+module_init(mlx5_ib_init);
+module_exit(mlx5_ib_cleanup);
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
new file mode 100644 (file)
index 0000000..3a53228
--- /dev/null
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <rdma/ib_umem.h>
+#include "mlx5_ib.h"
+
+/* @umem: umem object to scan
+ * @addr: ib virtual address requested by the user
+ * @count: number of PAGE_SIZE pages covered by umem
+ * @shift: page shift for the compound pages found in the region
+ * @ncont: number of compund pages
+ * @order: log2 of the number of compound pages
+ */
+void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
+                       int *ncont, int *order)
+{
+       struct ib_umem_chunk *chunk;
+       unsigned long tmp;
+       unsigned long m;
+       int i, j, k;
+       u64 base = 0;
+       int p = 0;
+       int skip;
+       int mask;
+       u64 len;
+       u64 pfn;
+
+       addr = addr >> PAGE_SHIFT;
+       tmp = (unsigned long)addr;
+       m = find_first_bit(&tmp, sizeof(tmp));
+       skip = 1 << m;
+       mask = skip - 1;
+       i = 0;
+       list_for_each_entry(chunk, &umem->chunk_list, list)
+               for (j = 0; j < chunk->nmap; j++) {
+                       len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT;
+                       pfn = sg_dma_address(&chunk->page_list[j]) >> PAGE_SHIFT;
+                       for (k = 0; k < len; k++) {
+                               if (!(i & mask)) {
+                                       tmp = (unsigned long)pfn;
+                                       m = min(m, find_first_bit(&tmp, sizeof(tmp)));
+                                       skip = 1 << m;
+                                       mask = skip - 1;
+                                       base = pfn;
+                                       p = 0;
+                               } else {
+                                       if (base + p != pfn) {
+                                               tmp = (unsigned long)p;
+                                               m = find_first_bit(&tmp, sizeof(tmp));
+                                               skip = 1 << m;
+                                               mask = skip - 1;
+                                               base = pfn;
+                                               p = 0;
+                                       }
+                               }
+                               p++;
+                               i++;
+                       }
+               }
+
+       if (i) {
+               m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m);
+
+               if (order)
+                       *order = ilog2(roundup_pow_of_two(i) >> m);
+
+               *ncont = DIV_ROUND_UP(i, (1 << m));
+       } else {
+               m  = 0;
+
+               if (order)
+                       *order = 0;
+
+               *ncont = 0;
+       }
+       *shift = PAGE_SHIFT + m;
+       *count = i;
+}
+
+void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+                         int page_shift, __be64 *pas, int umr)
+{
+       int shift = page_shift - PAGE_SHIFT;
+       int mask = (1 << shift) - 1;
+       struct ib_umem_chunk *chunk;
+       int i, j, k;
+       u64 cur = 0;
+       u64 base;
+       int len;
+
+       i = 0;
+       list_for_each_entry(chunk, &umem->chunk_list, list)
+               for (j = 0; j < chunk->nmap; j++) {
+                       len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT;
+                       base = sg_dma_address(&chunk->page_list[j]);
+                       for (k = 0; k < len; k++) {
+                               if (!(i & mask)) {
+                                       cur = base + (k << PAGE_SHIFT);
+                                       if (umr)
+                                               cur |= 3;
+
+                                       pas[i >> shift] = cpu_to_be64(cur);
+                                       mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
+                                                   i >> shift, be64_to_cpu(pas[i >> shift]));
+                               }  else
+                                       mlx5_ib_dbg(dev, "=====> 0x%llx\n",
+                                                   base + (k << PAGE_SHIFT));
+                               i++;
+                       }
+               }
+}
+
+int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
+{
+       u64 page_size;
+       u64 page_mask;
+       u64 off_size;
+       u64 off_mask;
+       u64 buf_off;
+
+       page_size = 1 << page_shift;
+       page_mask = page_size - 1;
+       buf_off = addr & page_mask;
+       off_size = page_size >> 6;
+       off_mask = off_size - 1;
+
+       if (buf_off & off_mask)
+               return -EINVAL;
+
+       *offset = buf_off >> ilog2(off_size);
+       return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
new file mode 100644 (file)
index 0000000..836be91
--- /dev/null
@@ -0,0 +1,545 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_IB_H
+#define MLX5_IB_H
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_smi.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/srq.h>
+#include <linux/types.h>
+
+#define mlx5_ib_dbg(dev, format, arg...)                               \
+pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,   \
+        __LINE__, current->pid, ##arg)
+
+#define mlx5_ib_err(dev, format, arg...)                               \
+pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,     \
+       __LINE__, current->pid, ##arg)
+
+#define mlx5_ib_warn(dev, format, arg...)                              \
+pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,    \
+       __LINE__, current->pid, ##arg)
+
+enum {
+       MLX5_IB_MMAP_CMD_SHIFT  = 8,
+       MLX5_IB_MMAP_CMD_MASK   = 0xff,
+};
+
+enum mlx5_ib_mmap_cmd {
+       MLX5_IB_MMAP_REGULAR_PAGE               = 0,
+       MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES       = 1, /* always last */
+};
+
+enum {
+       MLX5_RES_SCAT_DATA32_CQE        = 0x1,
+       MLX5_RES_SCAT_DATA64_CQE        = 0x2,
+       MLX5_REQ_SCAT_DATA32_CQE        = 0x11,
+       MLX5_REQ_SCAT_DATA64_CQE        = 0x22,
+};
+
+enum mlx5_ib_latency_class {
+       MLX5_IB_LATENCY_CLASS_LOW,
+       MLX5_IB_LATENCY_CLASS_MEDIUM,
+       MLX5_IB_LATENCY_CLASS_HIGH,
+       MLX5_IB_LATENCY_CLASS_FAST_PATH
+};
+
+enum mlx5_ib_mad_ifc_flags {
+       MLX5_MAD_IFC_IGNORE_MKEY        = 1,
+       MLX5_MAD_IFC_IGNORE_BKEY        = 2,
+       MLX5_MAD_IFC_NET_VIEW           = 4,
+};
+
+struct mlx5_ib_ucontext {
+       struct ib_ucontext      ibucontext;
+       struct list_head        db_page_list;
+
+       /* protect doorbell record alloc/free
+        */
+       struct mutex            db_page_mutex;
+       struct mlx5_uuar_info   uuari;
+};
+
+static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
+{
+       return container_of(ibucontext, struct mlx5_ib_ucontext, ibucontext);
+}
+
+struct mlx5_ib_pd {
+       struct ib_pd            ibpd;
+       u32                     pdn;
+       u32                     pa_lkey;
+};
+
+/* Use macros here so that don't have to duplicate
+ * enum ib_send_flags and enum ib_qp_type for low-level driver
+ */
+
+#define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START
+#define MLX5_IB_QPT_REG_UMR    IB_QPT_RESERVED1
+#define MLX5_IB_WR_UMR         IB_WR_RESERVED1
+
+struct wr_list {
+       u16     opcode;
+       u16     next;
+};
+
+struct mlx5_ib_wq {
+       u64                    *wrid;
+       u32                    *wr_data;
+       struct wr_list         *w_list;
+       unsigned               *wqe_head;
+       u16                     unsig_count;
+
+       /* serialize post to the work queue
+        */
+       spinlock_t              lock;
+       int                     wqe_cnt;
+       int                     max_post;
+       int                     max_gs;
+       int                     offset;
+       int                     wqe_shift;
+       unsigned                head;
+       unsigned                tail;
+       u16                     cur_post;
+       u16                     last_poll;
+       void                   *qend;
+};
+
+enum {
+       MLX5_QP_USER,
+       MLX5_QP_KERNEL,
+       MLX5_QP_EMPTY
+};
+
+struct mlx5_ib_qp {
+       struct ib_qp            ibqp;
+       struct mlx5_core_qp     mqp;
+       struct mlx5_buf         buf;
+
+       struct mlx5_db          db;
+       struct mlx5_ib_wq       rq;
+
+       u32                     doorbell_qpn;
+       u8                      sq_signal_bits;
+       u8                      fm_cache;
+       int                     sq_max_wqes_per_wr;
+       int                     sq_spare_wqes;
+       struct mlx5_ib_wq       sq;
+
+       struct ib_umem         *umem;
+       int                     buf_size;
+
+       /* serialize qp state modifications
+        */
+       struct mutex            mutex;
+       u16                     xrcdn;
+       u32                     flags;
+       u8                      port;
+       u8                      alt_port;
+       u8                      atomic_rd_en;
+       u8                      resp_depth;
+       u8                      state;
+       int                     mlx_type;
+       int                     wq_sig;
+       int                     scat_cqe;
+       int                     max_inline_data;
+       struct mlx5_bf         *bf;
+       int                     has_rq;
+
+       /* only for user space QPs. For kernel
+        * we have it from the bf object
+        */
+       int                     uuarn;
+
+       int                     create_type;
+       u32                     pa_lkey;
+};
+
+struct mlx5_ib_cq_buf {
+       struct mlx5_buf         buf;
+       struct ib_umem          *umem;
+       int                     cqe_size;
+};
+
+enum mlx5_ib_qp_flags {
+       MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK     = 1 << 0,
+       MLX5_IB_QP_SIGNATURE_HANDLING           = 1 << 1,
+};
+
+struct mlx5_shared_mr_info {
+       int mr_id;
+       struct ib_umem          *umem;
+};
+
+struct mlx5_ib_cq {
+       struct ib_cq            ibcq;
+       struct mlx5_core_cq     mcq;
+       struct mlx5_ib_cq_buf   buf;
+       struct mlx5_db          db;
+
+       /* serialize access to the CQ
+        */
+       spinlock_t              lock;
+
+       /* protect resize cq
+        */
+       struct mutex            resize_mutex;
+       struct mlx5_ib_cq_resize *resize_buf;
+       struct ib_umem         *resize_umem;
+       int                     cqe_size;
+};
+
+struct mlx5_ib_srq {
+       struct ib_srq           ibsrq;
+       struct mlx5_core_srq    msrq;
+       struct mlx5_buf         buf;
+       struct mlx5_db          db;
+       u64                    *wrid;
+       /* protect SRQ hanlding
+        */
+       spinlock_t              lock;
+       int                     head;
+       int                     tail;
+       u16                     wqe_ctr;
+       struct ib_umem         *umem;
+       /* serialize arming a SRQ
+        */
+       struct mutex            mutex;
+       int                     wq_sig;
+};
+
+struct mlx5_ib_xrcd {
+       struct ib_xrcd          ibxrcd;
+       u32                     xrcdn;
+};
+
+struct mlx5_ib_mr {
+       struct ib_mr            ibmr;
+       struct mlx5_core_mr     mmr;
+       struct ib_umem         *umem;
+       struct mlx5_shared_mr_info      *smr_info;
+       struct list_head        list;
+       int                     order;
+       int                     umred;
+       __be64                  *pas;
+       dma_addr_t              dma;
+       int                     npages;
+       struct completion       done;
+       enum ib_wc_status       status;
+};
+
+struct mlx5_ib_fast_reg_page_list {
+       struct ib_fast_reg_page_list    ibfrpl;
+       __be64                         *mapped_page_list;
+       dma_addr_t                      map;
+};
+
+struct umr_common {
+       struct ib_pd    *pd;
+       struct ib_cq    *cq;
+       struct ib_qp    *qp;
+       struct ib_mr    *mr;
+       /* control access to UMR QP
+        */
+       struct semaphore        sem;
+};
+
+enum {
+       MLX5_FMR_INVALID,
+       MLX5_FMR_VALID,
+       MLX5_FMR_BUSY,
+};
+
+struct mlx5_ib_fmr {
+       struct ib_fmr                   ibfmr;
+       struct mlx5_core_mr             mr;
+       int                             access_flags;
+       int                             state;
+       /* protect fmr state
+        */
+       spinlock_t                      lock;
+       u64                             wrid;
+       struct ib_send_wr               wr[2];
+       u8                              page_shift;
+       struct ib_fast_reg_page_list    page_list;
+};
+
+struct mlx5_cache_ent {
+       struct list_head        head;
+       /* sync access to the cahce entry
+        */
+       spinlock_t              lock;
+
+
+       struct dentry          *dir;
+       char                    name[4];
+       u32                     order;
+       u32                     size;
+       u32                     cur;
+       u32                     miss;
+       u32                     limit;
+
+       struct dentry          *fsize;
+       struct dentry          *fcur;
+       struct dentry          *fmiss;
+       struct dentry          *flimit;
+
+       struct mlx5_ib_dev     *dev;
+       struct work_struct      work;
+       struct delayed_work     dwork;
+};
+
+struct mlx5_mr_cache {
+       struct workqueue_struct *wq;
+       struct mlx5_cache_ent   ent[MAX_MR_CACHE_ENTRIES];
+       int                     stopped;
+       struct dentry           *root;
+       unsigned long           last_add;
+};
+
+struct mlx5_ib_resources {
+       struct ib_cq    *c0;
+       struct ib_xrcd  *x0;
+       struct ib_xrcd  *x1;
+       struct ib_pd    *p0;
+       struct ib_srq   *s0;
+};
+
+struct mlx5_ib_dev {
+       struct ib_device                ib_dev;
+       struct mlx5_core_dev            mdev;
+       MLX5_DECLARE_DOORBELL_LOCK(uar_lock);
+       struct list_head                eqs_list;
+       int                             num_ports;
+       int                             num_comp_vectors;
+       /* serialize update of capability mask
+        */
+       struct mutex                    cap_mask_mutex;
+       bool                            ib_active;
+       struct umr_common               umrc;
+       /* sync used page count stats
+        */
+       spinlock_t                      mr_lock;
+       struct mlx5_ib_resources        devr;
+       struct mlx5_mr_cache            cache;
+};
+
+static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
+{
+       return container_of(mcq, struct mlx5_ib_cq, mcq);
+}
+
+static inline struct mlx5_ib_xrcd *to_mxrcd(struct ib_xrcd *ibxrcd)
+{
+       return container_of(ibxrcd, struct mlx5_ib_xrcd, ibxrcd);
+}
+
+static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev)
+{
+       return container_of(ibdev, struct mlx5_ib_dev, ib_dev);
+}
+
+static inline struct mlx5_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
+{
+       return container_of(ibfmr, struct mlx5_ib_fmr, ibfmr);
+}
+
+static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq)
+{
+       return container_of(ibcq, struct mlx5_ib_cq, ibcq);
+}
+
+static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
+{
+       return container_of(mqp, struct mlx5_ib_qp, mqp);
+}
+
+static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd)
+{
+       return container_of(ibpd, struct mlx5_ib_pd, ibpd);
+}
+
+static inline struct mlx5_ib_srq *to_msrq(struct ib_srq *ibsrq)
+{
+       return container_of(ibsrq, struct mlx5_ib_srq, ibsrq);
+}
+
+static inline struct mlx5_ib_qp *to_mqp(struct ib_qp *ibqp)
+{
+       return container_of(ibqp, struct mlx5_ib_qp, ibqp);
+}
+
+static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq)
+{
+       return container_of(msrq, struct mlx5_ib_srq, msrq);
+}
+
+static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr)
+{
+       return container_of(ibmr, struct mlx5_ib_mr, ibmr);
+}
+
+static inline struct mlx5_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
+{
+       return container_of(ibfrpl, struct mlx5_ib_fast_reg_page_list, ibfrpl);
+}
+
+struct mlx5_ib_ah {
+       struct ib_ah            ibah;
+       struct mlx5_av          av;
+};
+
+static inline struct mlx5_ib_ah *to_mah(struct ib_ah *ibah)
+{
+       return container_of(ibah, struct mlx5_ib_ah, ibah);
+}
+
+static inline struct mlx5_ib_dev *mlx5_core2ibdev(struct mlx5_core_dev *dev)
+{
+       return container_of(dev, struct mlx5_ib_dev, mdev);
+}
+
+static inline struct mlx5_ib_dev *mlx5_pci2ibdev(struct pci_dev *pdev)
+{
+       return mlx5_core2ibdev(pci2mlx5_core_dev(pdev));
+}
+
+int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
+                       struct mlx5_db *db);
+void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db);
+void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
+void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
+void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
+int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
+                int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
+                void *in_mad, void *response_mad);
+struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
+                          struct mlx5_ib_ah *ah);
+struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
+int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
+int mlx5_ib_destroy_ah(struct ib_ah *ah);
+struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
+                                 struct ib_srq_init_attr *init_attr,
+                                 struct ib_udata *udata);
+int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+                      enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
+int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr);
+int mlx5_ib_destroy_srq(struct ib_srq *srq);
+int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+                         struct ib_recv_wr **bad_wr);
+struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
+                               struct ib_qp_init_attr *init_attr,
+                               struct ib_udata *udata);
+int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+                     int attr_mask, struct ib_udata *udata);
+int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
+                    struct ib_qp_init_attr *qp_init_attr);
+int mlx5_ib_destroy_qp(struct ib_qp *qp);
+int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+                     struct ib_send_wr **bad_wr);
+int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+                     struct ib_recv_wr **bad_wr);
+void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
+struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
+                               int vector, struct ib_ucontext *context,
+                               struct ib_udata *udata);
+int mlx5_ib_destroy_cq(struct ib_cq *cq);
+int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
+int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
+struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc);
+struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+                                 u64 virt_addr, int access_flags,
+                                 struct ib_udata *udata);
+int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
+struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
+                                       int max_page_list_len);
+struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
+                                                              int page_list_len);
+void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
+struct ib_fmr *mlx5_ib_fmr_alloc(struct ib_pd *pd, int acc,
+                                struct ib_fmr_attr *fmr_attr);
+int mlx5_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+                     int npages, u64 iova);
+int mlx5_ib_unmap_fmr(struct list_head *fmr_list);
+int mlx5_ib_fmr_dealloc(struct ib_fmr *ibfmr);
+int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+                       struct ib_wc *in_wc, struct ib_grh *in_grh,
+                       struct ib_mad *in_mad, struct ib_mad *out_mad);
+struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
+                                         struct ib_ucontext *context,
+                                         struct ib_udata *udata);
+int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd);
+int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn);
+int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset);
+int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port);
+int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
+                      struct ib_port_attr *props);
+int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev);
+void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev);
+void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
+                       int *ncont, int *order);
+void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+                         int page_shift, __be64 *pas, int umr);
+void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
+int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
+int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
+int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
+void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context);
+
+static inline void init_query_mad(struct ib_smp *mad)
+{
+       mad->base_version  = 1;
+       mad->mgmt_class    = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+       mad->class_version = 1;
+       mad->method        = IB_MGMT_METHOD_GET;
+}
+
+static inline u8 convert_access(int acc)
+{
+       return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC       : 0) |
+              (acc & IB_ACCESS_REMOTE_WRITE  ? MLX5_PERM_REMOTE_WRITE : 0) |
+              (acc & IB_ACCESS_REMOTE_READ   ? MLX5_PERM_REMOTE_READ  : 0) |
+              (acc & IB_ACCESS_LOCAL_WRITE   ? MLX5_PERM_LOCAL_WRITE  : 0) |
+              MLX5_PERM_LOCAL_READ;
+}
+
+#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
new file mode 100644 (file)
index 0000000..e2daa8f
--- /dev/null
@@ -0,0 +1,1007 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#include <linux/kref.h>
+#include <linux/random.h>
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <rdma/ib_umem.h>
+#include "mlx5_ib.h"
+
+enum {
+       DEF_CACHE_SIZE  = 10,
+};
+
+static __be64 *mr_align(__be64 *ptr, int align)
+{
+       unsigned long mask = align - 1;
+
+       return (__be64 *)(((unsigned long)ptr + mask) & ~mask);
+}
+
+static int order2idx(struct mlx5_ib_dev *dev, int order)
+{
+       struct mlx5_mr_cache *cache = &dev->cache;
+
+       if (order < cache->ent[0].order)
+               return 0;
+       else
+               return order - cache->ent[0].order;
+}
+
+static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
+{
+       struct device *ddev = dev->ib_dev.dma_device;
+       struct mlx5_mr_cache *cache = &dev->cache;
+       struct mlx5_cache_ent *ent = &cache->ent[c];
+       struct mlx5_create_mkey_mbox_in *in;
+       struct mlx5_ib_mr *mr;
+       int npages = 1 << ent->order;
+       int size = sizeof(u64) * npages;
+       int err = 0;
+       int i;
+
+       in = kzalloc(sizeof(*in), GFP_KERNEL);
+       if (!in)
+               return -ENOMEM;
+
+       for (i = 0; i < num; i++) {
+               mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+               if (!mr) {
+                       err = -ENOMEM;
+                       goto out;
+               }
+               mr->order = ent->order;
+               mr->umred = 1;
+               mr->pas = kmalloc(size + 0x3f, GFP_KERNEL);
+               if (!mr->pas) {
+                       kfree(mr);
+                       err = -ENOMEM;
+                       goto out;
+               }
+               mr->dma = dma_map_single(ddev, mr_align(mr->pas, 0x40), size,
+                                        DMA_TO_DEVICE);
+               if (dma_mapping_error(ddev, mr->dma)) {
+                       kfree(mr->pas);
+                       kfree(mr);
+                       err = -ENOMEM;
+                       goto out;
+               }
+
+               in->seg.status = 1 << 6;
+               in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
+               in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+               in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
+               in->seg.log2_page_size = 12;
+
+               err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
+                                           sizeof(*in));
+               if (err) {
+                       mlx5_ib_warn(dev, "create mkey failed %d\n", err);
+                       dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
+                       kfree(mr->pas);
+                       kfree(mr);
+                       goto out;
+               }
+               cache->last_add = jiffies;
+
+               spin_lock(&ent->lock);
+               list_add_tail(&mr->list, &ent->head);
+               ent->cur++;
+               ent->size++;
+               spin_unlock(&ent->lock);
+       }
+
+out:
+       kfree(in);
+       return err;
+}
+
+static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
+{
+       struct device *ddev = dev->ib_dev.dma_device;
+       struct mlx5_mr_cache *cache = &dev->cache;
+       struct mlx5_cache_ent *ent = &cache->ent[c];
+       struct mlx5_ib_mr *mr;
+       int size;
+       int err;
+       int i;
+
+       for (i = 0; i < num; i++) {
+               spin_lock(&ent->lock);
+               if (list_empty(&ent->head)) {
+                       spin_unlock(&ent->lock);
+                       return;
+               }
+               mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
+               list_del(&mr->list);
+               ent->cur--;
+               ent->size--;
+               spin_unlock(&ent->lock);
+               err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
+               if (err) {
+                       mlx5_ib_warn(dev, "failed destroy mkey\n");
+               } else {
+                       size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40);
+                       dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
+                       kfree(mr->pas);
+                       kfree(mr);
+               }
+       }
+}
+
+static ssize_t size_write(struct file *filp, const char __user *buf,
+                         size_t count, loff_t *pos)
+{
+       struct mlx5_cache_ent *ent = filp->private_data;
+       struct mlx5_ib_dev *dev = ent->dev;
+       char lbuf[20];
+       u32 var;
+       int err;
+       int c;
+
+       if (copy_from_user(lbuf, buf, sizeof(lbuf)))
+               return -EPERM;
+
+       c = order2idx(dev, ent->order);
+       lbuf[sizeof(lbuf) - 1] = 0;
+
+       if (sscanf(lbuf, "%u", &var) != 1)
+               return -EINVAL;
+
+       if (var < ent->limit)
+               return -EINVAL;
+
+       if (var > ent->size) {
+               err = add_keys(dev, c, var - ent->size);
+               if (err)
+                       return err;
+       } else if (var < ent->size) {
+               remove_keys(dev, c, ent->size - var);
+       }
+
+       return count;
+}
+
+static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
+                        loff_t *pos)
+{
+       struct mlx5_cache_ent *ent = filp->private_data;
+       char lbuf[20];
+       int err;
+
+       if (*pos)
+               return 0;
+
+       err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
+       if (err < 0)
+               return err;
+
+       if (copy_to_user(buf, lbuf, err))
+               return -EPERM;
+
+       *pos += err;
+
+       return err;
+}
+
+static const struct file_operations size_fops = {
+       .owner  = THIS_MODULE,
+       .open   = simple_open,
+       .write  = size_write,
+       .read   = size_read,
+};
+
+static ssize_t limit_write(struct file *filp, const char __user *buf,
+                          size_t count, loff_t *pos)
+{
+       struct mlx5_cache_ent *ent = filp->private_data;
+       struct mlx5_ib_dev *dev = ent->dev;
+       char lbuf[20];
+       u32 var;
+       int err;
+       int c;
+
+       if (copy_from_user(lbuf, buf, sizeof(lbuf)))
+               return -EPERM;
+
+       c = order2idx(dev, ent->order);
+       lbuf[sizeof(lbuf) - 1] = 0;
+
+       if (sscanf(lbuf, "%u", &var) != 1)
+               return -EINVAL;
+
+       if (var > ent->size)
+               return -EINVAL;
+
+       ent->limit = var;
+
+       if (ent->cur < ent->limit) {
+               err = add_keys(dev, c, 2 * ent->limit - ent->cur);
+               if (err)
+                       return err;
+       }
+
+       return count;
+}
+
+static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
+                         loff_t *pos)
+{
+       struct mlx5_cache_ent *ent = filp->private_data;
+       char lbuf[20];
+       int err;
+
+       if (*pos)
+               return 0;
+
+       err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
+       if (err < 0)
+               return err;
+
+       if (copy_to_user(buf, lbuf, err))
+               return -EPERM;
+
+       *pos += err;
+
+       return err;
+}
+
+static const struct file_operations limit_fops = {
+       .owner  = THIS_MODULE,
+       .open   = simple_open,
+       .write  = limit_write,
+       .read   = limit_read,
+};
+
+static int someone_adding(struct mlx5_mr_cache *cache)
+{
+       int i;
+
+       for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+               if (cache->ent[i].cur < cache->ent[i].limit)
+                       return 1;
+       }
+
+       return 0;
+}
+
+static void __cache_work_func(struct mlx5_cache_ent *ent)
+{
+       struct mlx5_ib_dev *dev = ent->dev;
+       struct mlx5_mr_cache *cache = &dev->cache;
+       int i = order2idx(dev, ent->order);
+
+       if (cache->stopped)
+               return;
+
+       ent = &dev->cache.ent[i];
+       if (ent->cur < 2 * ent->limit) {
+               add_keys(dev, i, 1);
+               if (ent->cur < 2 * ent->limit)
+                       queue_work(cache->wq, &ent->work);
+       } else if (ent->cur > 2 * ent->limit) {
+               if (!someone_adding(cache) &&
+                   time_after(jiffies, cache->last_add + 60 * HZ)) {
+                       remove_keys(dev, i, 1);
+                       if (ent->cur > ent->limit)
+                               queue_work(cache->wq, &ent->work);
+               } else {
+                       queue_delayed_work(cache->wq, &ent->dwork, 60 * HZ);
+               }
+       }
+}
+
+static void delayed_cache_work_func(struct work_struct *work)
+{
+       struct mlx5_cache_ent *ent;
+
+       ent = container_of(work, struct mlx5_cache_ent, dwork.work);
+       __cache_work_func(ent);
+}
+
+static void cache_work_func(struct work_struct *work)
+{
+       struct mlx5_cache_ent *ent;
+
+       ent = container_of(work, struct mlx5_cache_ent, work);
+       __cache_work_func(ent);
+}
+
+static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
+{
+       struct mlx5_mr_cache *cache = &dev->cache;
+       struct mlx5_ib_mr *mr = NULL;
+       struct mlx5_cache_ent *ent;
+       int c;
+       int i;
+
+       c = order2idx(dev, order);
+       if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
+               mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
+               return NULL;
+       }
+
+       for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
+               ent = &cache->ent[i];
+
+               mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
+
+               spin_lock(&ent->lock);
+               if (!list_empty(&ent->head)) {
+                       mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
+                                             list);
+                       list_del(&mr->list);
+                       ent->cur--;
+                       spin_unlock(&ent->lock);
+                       if (ent->cur < ent->limit)
+                               queue_work(cache->wq, &ent->work);
+                       break;
+               }
+               spin_unlock(&ent->lock);
+
+               queue_work(cache->wq, &ent->work);
+
+               if (mr)
+                       break;
+       }
+
+       if (!mr)
+               cache->ent[c].miss++;
+
+       return mr;
+}
+
+static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+{
+       struct mlx5_mr_cache *cache = &dev->cache;
+       struct mlx5_cache_ent *ent;
+       int shrink = 0;
+       int c;
+
+       c = order2idx(dev, mr->order);
+       if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
+               mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
+               return;
+       }
+       ent = &cache->ent[c];
+       spin_lock(&ent->lock);
+       list_add_tail(&mr->list, &ent->head);
+       ent->cur++;
+       if (ent->cur > 2 * ent->limit)
+               shrink = 1;
+       spin_unlock(&ent->lock);
+
+       if (shrink)
+               queue_work(cache->wq, &ent->work);
+}
+
+static void clean_keys(struct mlx5_ib_dev *dev, int c)
+{
+       struct device *ddev = dev->ib_dev.dma_device;
+       struct mlx5_mr_cache *cache = &dev->cache;
+       struct mlx5_cache_ent *ent = &cache->ent[c];
+       struct mlx5_ib_mr *mr;
+       int size;
+       int err;
+
+       while (1) {
+               spin_lock(&ent->lock);
+               if (list_empty(&ent->head)) {
+                       spin_unlock(&ent->lock);
+                       return;
+               }
+               mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
+               list_del(&mr->list);
+               ent->cur--;
+               ent->size--;
+               spin_unlock(&ent->lock);
+               err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
+               if (err) {
+                       mlx5_ib_warn(dev, "failed destroy mkey\n");
+               } else {
+                       size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40);
+                       dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
+                       kfree(mr->pas);
+                       kfree(mr);
+               }
+       }
+}
+
+static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_mr_cache *cache = &dev->cache;
+       struct mlx5_cache_ent *ent;
+       int i;
+
+       if (!mlx5_debugfs_root)
+               return 0;
+
+       cache->root = debugfs_create_dir("mr_cache", dev->mdev.priv.dbg_root);
+       if (!cache->root)
+               return -ENOMEM;
+
+       for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+               ent = &cache->ent[i];
+               sprintf(ent->name, "%d", ent->order);
+               ent->dir = debugfs_create_dir(ent->name,  cache->root);
+               if (!ent->dir)
+                       return -ENOMEM;
+
+               ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
+                                                &size_fops);
+               if (!ent->fsize)
+                       return -ENOMEM;
+
+               ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
+                                                 &limit_fops);
+               if (!ent->flimit)
+                       return -ENOMEM;
+
+               ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
+                                              &ent->cur);
+               if (!ent->fcur)
+                       return -ENOMEM;
+
+               ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
+                                               &ent->miss);
+               if (!ent->fmiss)
+                       return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
+{
+       if (!mlx5_debugfs_root)
+               return;
+
+       debugfs_remove_recursive(dev->cache.root);
+}
+
+int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_mr_cache *cache = &dev->cache;
+       struct mlx5_cache_ent *ent;
+       int limit;
+       int size;
+       int err;
+       int i;
+
+       cache->wq = create_singlethread_workqueue("mkey_cache");
+       if (!cache->wq) {
+               mlx5_ib_warn(dev, "failed to create work queue\n");
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+               INIT_LIST_HEAD(&cache->ent[i].head);
+               spin_lock_init(&cache->ent[i].lock);
+
+               ent = &cache->ent[i];
+               INIT_LIST_HEAD(&ent->head);
+               spin_lock_init(&ent->lock);
+               ent->order = i + 2;
+               ent->dev = dev;
+
+               if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) {
+                       size = dev->mdev.profile->mr_cache[i].size;
+                       limit = dev->mdev.profile->mr_cache[i].limit;
+               } else {
+                       size = DEF_CACHE_SIZE;
+                       limit = 0;
+               }
+               INIT_WORK(&ent->work, cache_work_func);
+               INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
+               ent->limit = limit;
+               queue_work(cache->wq, &ent->work);
+       }
+
+       err = mlx5_mr_cache_debugfs_init(dev);
+       if (err)
+               mlx5_ib_warn(dev, "cache debugfs failure\n");
+
+       return 0;
+}
+
+int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
+{
+       int i;
+
+       dev->cache.stopped = 1;
+       destroy_workqueue(dev->cache.wq);
+
+       mlx5_mr_cache_debugfs_cleanup(dev);
+
+       for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
+               clean_keys(dev, i);
+
+       return 0;
+}
+
+struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
+{
+       struct mlx5_ib_dev *dev = to_mdev(pd->device);
+       struct mlx5_core_dev *mdev = &dev->mdev;
+       struct mlx5_create_mkey_mbox_in *in;
+       struct mlx5_mkey_seg *seg;
+       struct mlx5_ib_mr *mr;
+       int err;
+
+       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+       if (!mr)
+               return ERR_PTR(-ENOMEM);
+
+       in = kzalloc(sizeof(*in), GFP_KERNEL);
+       if (!in) {
+               err = -ENOMEM;
+               goto err_free;
+       }
+
+       seg = &in->seg;
+       seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA;
+       seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64);
+       seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+       seg->start_addr = 0;
+
+       err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in));
+       if (err)
+               goto err_in;
+
+       kfree(in);
+       mr->ibmr.lkey = mr->mmr.key;
+       mr->ibmr.rkey = mr->mmr.key;
+       mr->umem = NULL;
+
+       return &mr->ibmr;
+
+err_in:
+       kfree(in);
+
+err_free:
+       kfree(mr);
+
+       return ERR_PTR(err);
+}
+
+static int get_octo_len(u64 addr, u64 len, int page_size)
+{
+       u64 offset;
+       int npages;
+
+       offset = addr & (page_size - 1);
+       npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
+       return (npages + 1) / 2;
+}
+
+static int use_umr(int order)
+{
+       return order <= 17;
+}
+
+static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
+                            struct ib_sge *sg, u64 dma, int n, u32 key,
+                            int page_shift, u64 virt_addr, u64 len,
+                            int access_flags)
+{
+       struct mlx5_ib_dev *dev = to_mdev(pd->device);
+       struct ib_mr *mr = dev->umrc.mr;
+
+       sg->addr = dma;
+       sg->length = ALIGN(sizeof(u64) * n, 64);
+       sg->lkey = mr->lkey;
+
+       wr->next = NULL;
+       wr->send_flags = 0;
+       wr->sg_list = sg;
+       if (n)
+               wr->num_sge = 1;
+       else
+               wr->num_sge = 0;
+
+       wr->opcode = MLX5_IB_WR_UMR;
+       wr->wr.fast_reg.page_list_len = n;
+       wr->wr.fast_reg.page_shift = page_shift;
+       wr->wr.fast_reg.rkey = key;
+       wr->wr.fast_reg.iova_start = virt_addr;
+       wr->wr.fast_reg.length = len;
+       wr->wr.fast_reg.access_flags = access_flags;
+       wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd;
+}
+
+static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
+                              struct ib_send_wr *wr, u32 key)
+{
+       wr->send_flags = MLX5_IB_SEND_UMR_UNREG;
+       wr->opcode = MLX5_IB_WR_UMR;
+       wr->wr.fast_reg.rkey = key;
+}
+
+void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
+{
+       struct mlx5_ib_mr *mr;
+       struct ib_wc wc;
+       int err;
+
+       while (1) {
+               err = ib_poll_cq(cq, 1, &wc);
+               if (err < 0) {
+                       pr_warn("poll cq error %d\n", err);
+                       return;
+               }
+               if (err == 0)
+                       break;
+
+               mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id;
+               mr->status = wc.status;
+               complete(&mr->done);
+       }
+       ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+}
+
+static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
+                                 u64 virt_addr, u64 len, int npages,
+                                 int page_shift, int order, int access_flags)
+{
+       struct mlx5_ib_dev *dev = to_mdev(pd->device);
+       struct umr_common *umrc = &dev->umrc;
+       struct ib_send_wr wr, *bad;
+       struct mlx5_ib_mr *mr;
+       struct ib_sge sg;
+       int err;
+       int i;
+
+       for (i = 0; i < 10; i++) {
+               mr = alloc_cached_mr(dev, order);
+               if (mr)
+                       break;
+
+               err = add_keys(dev, order2idx(dev, order), 1);
+               if (err) {
+                       mlx5_ib_warn(dev, "add_keys failed\n");
+                       break;
+               }
+       }
+
+       if (!mr)
+               return ERR_PTR(-EAGAIN);
+
+       mlx5_ib_populate_pas(dev, umem, page_shift, mr_align(mr->pas, 0x40), 1);
+
+       memset(&wr, 0, sizeof(wr));
+       wr.wr_id = (u64)(unsigned long)mr;
+       prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags);
+
+       /* We serialize polls so one process does not kidnap another's
+        * completion. This is not a problem since wr is completed in
+        * around 1 usec
+        */
+       down(&umrc->sem);
+       init_completion(&mr->done);
+       err = ib_post_send(umrc->qp, &wr, &bad);
+       if (err) {
+               mlx5_ib_warn(dev, "post send failed, err %d\n", err);
+               up(&umrc->sem);
+               goto error;
+       }
+       wait_for_completion(&mr->done);
+       up(&umrc->sem);
+
+       if (mr->status != IB_WC_SUCCESS) {
+               mlx5_ib_warn(dev, "reg umr failed\n");
+               err = -EFAULT;
+               goto error;
+       }
+
+       return mr;
+
+error:
+       free_cached_mr(dev, mr);
+       return ERR_PTR(err);
+}
+
+static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
+                                    u64 length, struct ib_umem *umem,
+                                    int npages, int page_shift,
+                                    int access_flags)
+{
+       struct mlx5_ib_dev *dev = to_mdev(pd->device);
+       struct mlx5_create_mkey_mbox_in *in;
+       struct mlx5_ib_mr *mr;
+       int inlen;
+       int err;
+
+       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+       if (!mr)
+               return ERR_PTR(-ENOMEM);
+
+       inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2;
+       in = mlx5_vzalloc(inlen);
+       if (!in) {
+               err = -ENOMEM;
+               goto err_1;
+       }
+       mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0);
+
+       in->seg.flags = convert_access(access_flags) |
+               MLX5_ACCESS_MODE_MTT;
+       in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
+       in->seg.start_addr = cpu_to_be64(virt_addr);
+       in->seg.len = cpu_to_be64(length);
+       in->seg.bsfs_octo_size = 0;
+       in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
+       in->seg.log2_page_size = page_shift;
+       in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+       in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
+       err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen);
+       if (err) {
+               mlx5_ib_warn(dev, "create mkey failed\n");
+               goto err_2;
+       }
+       mr->umem = umem;
+       mlx5_vfree(in);
+
+       mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
+
+       return mr;
+
+err_2:
+       mlx5_vfree(in);
+
+err_1:
+       kfree(mr);
+
+       return ERR_PTR(err);
+}
+
+struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+                                 u64 virt_addr, int access_flags,
+                                 struct ib_udata *udata)
+{
+       struct mlx5_ib_dev *dev = to_mdev(pd->device);
+       struct mlx5_ib_mr *mr = NULL;
+       struct ib_umem *umem;
+       int page_shift;
+       int npages;
+       int ncont;
+       int order;
+       int err;
+
+       mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n",
+                   start, virt_addr, length);
+       umem = ib_umem_get(pd->uobject->context, start, length, access_flags,
+                          0);
+       if (IS_ERR(umem)) {
+               mlx5_ib_dbg(dev, "umem get failed\n");
+               return (void *)umem;
+       }
+
+       mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order);
+       if (!npages) {
+               mlx5_ib_warn(dev, "avoid zero region\n");
+               err = -EINVAL;
+               goto error;
+       }
+
+       mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
+                   npages, ncont, order, page_shift);
+
+       if (use_umr(order)) {
+               mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
+                            order, access_flags);
+               if (PTR_ERR(mr) == -EAGAIN) {
+                       mlx5_ib_dbg(dev, "cache empty for order %d", order);
+                       mr = NULL;
+               }
+       }
+
+       if (!mr)
+               mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift,
+                               access_flags);
+
+       if (IS_ERR(mr)) {
+               err = PTR_ERR(mr);
+               goto error;
+       }
+
+       mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key);
+
+       mr->umem = umem;
+       mr->npages = npages;
+       spin_lock(&dev->mr_lock);
+       dev->mdev.priv.reg_pages += npages;
+       spin_unlock(&dev->mr_lock);
+       mr->ibmr.lkey = mr->mmr.key;
+       mr->ibmr.rkey = mr->mmr.key;
+
+       return &mr->ibmr;
+
+error:
+       ib_umem_release(umem);
+       return ERR_PTR(err);
+}
+
+static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+{
+       struct umr_common *umrc = &dev->umrc;
+       struct ib_send_wr wr, *bad;
+       int err;
+
+       memset(&wr, 0, sizeof(wr));
+       wr.wr_id = (u64)(unsigned long)mr;
+       prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
+
+       down(&umrc->sem);
+       init_completion(&mr->done);
+       err = ib_post_send(umrc->qp, &wr, &bad);
+       if (err) {
+               up(&umrc->sem);
+               mlx5_ib_dbg(dev, "err %d\n", err);
+               goto error;
+       }
+       wait_for_completion(&mr->done);
+       up(&umrc->sem);
+       if (mr->status != IB_WC_SUCCESS) {
+               mlx5_ib_warn(dev, "unreg umr failed\n");
+               err = -EFAULT;
+               goto error;
+       }
+       return 0;
+
+error:
+       return err;
+}
+
+int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
+{
+       struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
+       struct mlx5_ib_mr *mr = to_mmr(ibmr);
+       struct ib_umem *umem = mr->umem;
+       int npages = mr->npages;
+       int umred = mr->umred;
+       int err;
+
+       if (!umred) {
+               err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
+               if (err) {
+                       mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
+                                    mr->mmr.key, err);
+                       return err;
+               }
+       } else {
+               err = unreg_umr(dev, mr);
+               if (err) {
+                       mlx5_ib_warn(dev, "failed unregister\n");
+                       return err;
+               }
+               free_cached_mr(dev, mr);
+       }
+
+       if (umem) {
+               ib_umem_release(umem);
+               spin_lock(&dev->mr_lock);
+               dev->mdev.priv.reg_pages -= npages;
+               spin_unlock(&dev->mr_lock);
+       }
+
+       if (!umred)
+               kfree(mr);
+
+       return 0;
+}
+
+struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
+                                       int max_page_list_len)
+{
+       struct mlx5_ib_dev *dev = to_mdev(pd->device);
+       struct mlx5_create_mkey_mbox_in *in;
+       struct mlx5_ib_mr *mr;
+       int err;
+
+       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+       if (!mr)
+               return ERR_PTR(-ENOMEM);
+
+       in = kzalloc(sizeof(*in), GFP_KERNEL);
+       if (!in) {
+               err = -ENOMEM;
+               goto err_free;
+       }
+
+       in->seg.status = 1 << 6; /* free */
+       in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2);
+       in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+       in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
+       in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
+       /*
+        * TBD not needed - issue 197292 */
+       in->seg.log2_page_size = PAGE_SHIFT;
+
+       err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in));
+       kfree(in);
+       if (err)
+               goto err_free;
+
+       mr->ibmr.lkey = mr->mmr.key;
+       mr->ibmr.rkey = mr->mmr.key;
+       mr->umem = NULL;
+
+       return &mr->ibmr;
+
+err_free:
+       kfree(mr);
+       return ERR_PTR(err);
+}
+
+struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
+                                                              int page_list_len)
+{
+       struct mlx5_ib_fast_reg_page_list *mfrpl;
+       int size = page_list_len * sizeof(u64);
+
+       mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL);
+       if (!mfrpl)
+               return ERR_PTR(-ENOMEM);
+
+       mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
+       if (!mfrpl->ibfrpl.page_list)
+               goto err_free;
+
+       mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device,
+                                                    size, &mfrpl->map,
+                                                    GFP_KERNEL);
+       if (!mfrpl->mapped_page_list)
+               goto err_free;
+
+       WARN_ON(mfrpl->map & 0x3f);
+
+       return &mfrpl->ibfrpl;
+
+err_free:
+       kfree(mfrpl->ibfrpl.page_list);
+       kfree(mfrpl);
+       return ERR_PTR(-ENOMEM);
+}
+
+void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
+{
+       struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
+       struct mlx5_ib_dev *dev = to_mdev(page_list->device);
+       int size = page_list->max_page_list_len * sizeof(u64);
+
+       dma_free_coherent(&dev->mdev.pdev->dev, size, mfrpl->mapped_page_list,
+                         mfrpl->map);
+       kfree(mfrpl->ibfrpl.page_list);
+       kfree(mfrpl);
+}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
new file mode 100644 (file)
index 0000000..16ac54c
--- /dev/null
@@ -0,0 +1,2524 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <rdma/ib_umem.h>
+#include "mlx5_ib.h"
+#include "user.h"
+
+/* not supported currently */
+static int wq_signature;
+
+enum {
+       MLX5_IB_ACK_REQ_FREQ    = 8,
+};
+
+enum {
+       MLX5_IB_DEFAULT_SCHED_QUEUE     = 0x83,
+       MLX5_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f,
+       MLX5_IB_LINK_TYPE_IB            = 0,
+       MLX5_IB_LINK_TYPE_ETH           = 1
+};
+
+enum {
+       MLX5_IB_SQ_STRIDE       = 6,
+       MLX5_IB_CACHE_LINE_SIZE = 64,
+};
+
+static const u32 mlx5_ib_opcode[] = {
+       [IB_WR_SEND]                            = MLX5_OPCODE_SEND,
+       [IB_WR_SEND_WITH_IMM]                   = MLX5_OPCODE_SEND_IMM,
+       [IB_WR_RDMA_WRITE]                      = MLX5_OPCODE_RDMA_WRITE,
+       [IB_WR_RDMA_WRITE_WITH_IMM]             = MLX5_OPCODE_RDMA_WRITE_IMM,
+       [IB_WR_RDMA_READ]                       = MLX5_OPCODE_RDMA_READ,
+       [IB_WR_ATOMIC_CMP_AND_SWP]              = MLX5_OPCODE_ATOMIC_CS,
+       [IB_WR_ATOMIC_FETCH_AND_ADD]            = MLX5_OPCODE_ATOMIC_FA,
+       [IB_WR_SEND_WITH_INV]                   = MLX5_OPCODE_SEND_INVAL,
+       [IB_WR_LOCAL_INV]                       = MLX5_OPCODE_UMR,
+       [IB_WR_FAST_REG_MR]                     = MLX5_OPCODE_UMR,
+       [IB_WR_MASKED_ATOMIC_CMP_AND_SWP]       = MLX5_OPCODE_ATOMIC_MASKED_CS,
+       [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]     = MLX5_OPCODE_ATOMIC_MASKED_FA,
+       [MLX5_IB_WR_UMR]                        = MLX5_OPCODE_UMR,
+};
+
+struct umr_wr {
+       u64                             virt_addr;
+       struct ib_pd                   *pd;
+       unsigned int                    page_shift;
+       unsigned int                    npages;
+       u32                             length;
+       int                             access_flags;
+       u32                             mkey;
+};
+
+static int is_qp0(enum ib_qp_type qp_type)
+{
+       return qp_type == IB_QPT_SMI;
+}
+
+static int is_qp1(enum ib_qp_type qp_type)
+{
+       return qp_type == IB_QPT_GSI;
+}
+
+static int is_sqp(enum ib_qp_type qp_type)
+{
+       return is_qp0(qp_type) || is_qp1(qp_type);
+}
+
+static void *get_wqe(struct mlx5_ib_qp *qp, int offset)
+{
+       return mlx5_buf_offset(&qp->buf, offset);
+}
+
+static void *get_recv_wqe(struct mlx5_ib_qp *qp, int n)
+{
+       return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
+}
+
+void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
+{
+       return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE));
+}
+
+static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
+{
+       struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
+       struct ib_event event;
+
+       if (type == MLX5_EVENT_TYPE_PATH_MIG)
+               to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
+
+       if (ibqp->event_handler) {
+               event.device     = ibqp->device;
+               event.element.qp = ibqp;
+               switch (type) {
+               case MLX5_EVENT_TYPE_PATH_MIG:
+                       event.event = IB_EVENT_PATH_MIG;
+                       break;
+               case MLX5_EVENT_TYPE_COMM_EST:
+                       event.event = IB_EVENT_COMM_EST;
+                       break;
+               case MLX5_EVENT_TYPE_SQ_DRAINED:
+                       event.event = IB_EVENT_SQ_DRAINED;
+                       break;
+               case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+                       event.event = IB_EVENT_QP_LAST_WQE_REACHED;
+                       break;
+               case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+                       event.event = IB_EVENT_QP_FATAL;
+                       break;
+               case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+                       event.event = IB_EVENT_PATH_MIG_ERR;
+                       break;
+               case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+                       event.event = IB_EVENT_QP_REQ_ERR;
+                       break;
+               case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+                       event.event = IB_EVENT_QP_ACCESS_ERR;
+                       break;
+               default:
+                       pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n", type, qp->qpn);
+                       return;
+               }
+
+               ibqp->event_handler(&event, ibqp->qp_context);
+       }
+}
+
+static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
+                      int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd)
+{
+       int wqe_size;
+       int wq_size;
+
+       /* Sanity check RQ size before proceeding */
+       if (cap->max_recv_wr  > dev->mdev.caps.max_wqes)
+               return -EINVAL;
+
+       if (!has_rq) {
+               qp->rq.max_gs = 0;
+               qp->rq.wqe_cnt = 0;
+               qp->rq.wqe_shift = 0;
+       } else {
+               if (ucmd) {
+                       qp->rq.wqe_cnt = ucmd->rq_wqe_count;
+                       qp->rq.wqe_shift = ucmd->rq_wqe_shift;
+                       qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
+                       qp->rq.max_post = qp->rq.wqe_cnt;
+               } else {
+                       wqe_size = qp->wq_sig ? sizeof(struct mlx5_wqe_signature_seg) : 0;
+                       wqe_size += cap->max_recv_sge * sizeof(struct mlx5_wqe_data_seg);
+                       wqe_size = roundup_pow_of_two(wqe_size);
+                       wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size;
+                       wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB);
+                       qp->rq.wqe_cnt = wq_size / wqe_size;
+                       if (wqe_size > dev->mdev.caps.max_rq_desc_sz) {
+                               mlx5_ib_dbg(dev, "wqe_size %d, max %d\n",
+                                           wqe_size,
+                                           dev->mdev.caps.max_rq_desc_sz);
+                               return -EINVAL;
+                       }
+                       qp->rq.wqe_shift = ilog2(wqe_size);
+                       qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
+                       qp->rq.max_post = qp->rq.wqe_cnt;
+               }
+       }
+
+       return 0;
+}
+
+static int sq_overhead(enum ib_qp_type qp_type)
+{
+       int size;
+
+       switch (qp_type) {
+       case IB_QPT_XRC_INI:
+               size = sizeof(struct mlx5_wqe_xrc_seg);
+               /* fall through */
+       case IB_QPT_RC:
+               size += sizeof(struct mlx5_wqe_ctrl_seg) +
+                       sizeof(struct mlx5_wqe_atomic_seg) +
+                       sizeof(struct mlx5_wqe_raddr_seg);
+               break;
+
+       case IB_QPT_UC:
+               size = sizeof(struct mlx5_wqe_ctrl_seg) +
+                       sizeof(struct mlx5_wqe_raddr_seg);
+               break;
+
+       case IB_QPT_UD:
+       case IB_QPT_SMI:
+       case IB_QPT_GSI:
+               size = sizeof(struct mlx5_wqe_ctrl_seg) +
+                       sizeof(struct mlx5_wqe_datagram_seg);
+               break;
+
+       case MLX5_IB_QPT_REG_UMR:
+               size = sizeof(struct mlx5_wqe_ctrl_seg) +
+                       sizeof(struct mlx5_wqe_umr_ctrl_seg) +
+                       sizeof(struct mlx5_mkey_seg);
+               break;
+
+       default:
+               return -EINVAL;
+       }
+
+       return size;
+}
+
+static int calc_send_wqe(struct ib_qp_init_attr *attr)
+{
+       int inl_size = 0;
+       int size;
+
+       size = sq_overhead(attr->qp_type);
+       if (size < 0)
+               return size;
+
+       if (attr->cap.max_inline_data) {
+               inl_size = size + sizeof(struct mlx5_wqe_inline_seg) +
+                       attr->cap.max_inline_data;
+       }
+
+       size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg);
+
+       return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
+}
+
+static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
+                       struct mlx5_ib_qp *qp)
+{
+       int wqe_size;
+       int wq_size;
+
+       if (!attr->cap.max_send_wr)
+               return 0;
+
+       wqe_size = calc_send_wqe(attr);
+       mlx5_ib_dbg(dev, "wqe_size %d\n", wqe_size);
+       if (wqe_size < 0)
+               return wqe_size;
+
+       if (wqe_size > dev->mdev.caps.max_sq_desc_sz) {
+               mlx5_ib_dbg(dev, "\n");
+               return -EINVAL;
+       }
+
+       qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) -
+               sizeof(struct mlx5_wqe_inline_seg);
+       attr->cap.max_inline_data = qp->max_inline_data;
+
+       wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
+       qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
+       qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
+       qp->sq.max_gs = attr->cap.max_send_sge;
+       qp->sq.max_post = 1 << ilog2(wq_size / wqe_size);
+
+       return wq_size;
+}
+
+static int set_user_buf_size(struct mlx5_ib_dev *dev,
+                           struct mlx5_ib_qp *qp,
+                           struct mlx5_ib_create_qp *ucmd)
+{
+       int desc_sz = 1 << qp->sq.wqe_shift;
+
+       if (desc_sz > dev->mdev.caps.max_sq_desc_sz) {
+               mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n",
+                            desc_sz, dev->mdev.caps.max_sq_desc_sz);
+               return -EINVAL;
+       }
+
+       if (ucmd->sq_wqe_count && ((1 << ilog2(ucmd->sq_wqe_count)) != ucmd->sq_wqe_count)) {
+               mlx5_ib_warn(dev, "sq_wqe_count %d, sq_wqe_count %d\n",
+                            ucmd->sq_wqe_count, ucmd->sq_wqe_count);
+               return -EINVAL;
+       }
+
+       qp->sq.wqe_cnt = ucmd->sq_wqe_count;
+
+       if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) {
+               mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n",
+                            qp->sq.wqe_cnt, dev->mdev.caps.max_wqes);
+               return -EINVAL;
+       }
+
+       qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
+               (qp->sq.wqe_cnt << 6);
+
+       return 0;
+}
+
+static int qp_has_rq(struct ib_qp_init_attr *attr)
+{
+       if (attr->qp_type == IB_QPT_XRC_INI ||
+           attr->qp_type == IB_QPT_XRC_TGT || attr->srq ||
+           attr->qp_type == MLX5_IB_QPT_REG_UMR ||
+           !attr->cap.max_recv_wr)
+               return 0;
+
+       return 1;
+}
+
+static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
+{
+       int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
+       int start_uuar;
+       int i;
+
+       start_uuar = nuuars - uuari->num_low_latency_uuars;
+       for (i = start_uuar; i < nuuars; i++) {
+               if (!test_bit(i, uuari->bitmap)) {
+                       set_bit(i, uuari->bitmap);
+                       uuari->count[i]++;
+                       return i;
+               }
+       }
+
+       return -ENOMEM;
+}
+
+static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari)
+{
+       int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
+       int minidx = 1;
+       int uuarn;
+       int end;
+       int i;
+
+       end = nuuars - uuari->num_low_latency_uuars;
+
+       for (i = 1; i < end; i++) {
+               uuarn = i & 3;
+               if (uuarn == 2 || uuarn == 3)
+                       continue;
+
+               if (uuari->count[i] < uuari->count[minidx])
+                       minidx = i;
+       }
+
+       uuari->count[minidx]++;
+       return minidx;
+}
+
+static int alloc_uuar(struct mlx5_uuar_info *uuari,
+                     enum mlx5_ib_latency_class lat)
+{
+       int uuarn = -EINVAL;
+
+       mutex_lock(&uuari->lock);
+       switch (lat) {
+       case MLX5_IB_LATENCY_CLASS_LOW:
+               uuarn = 0;
+               uuari->count[uuarn]++;
+               break;
+
+       case MLX5_IB_LATENCY_CLASS_MEDIUM:
+               uuarn = alloc_med_class_uuar(uuari);
+               break;
+
+       case MLX5_IB_LATENCY_CLASS_HIGH:
+               uuarn = alloc_high_class_uuar(uuari);
+               break;
+
+       case MLX5_IB_LATENCY_CLASS_FAST_PATH:
+               uuarn = 2;
+               break;
+       }
+       mutex_unlock(&uuari->lock);
+
+       return uuarn;
+}
+
+static void free_med_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
+{
+       clear_bit(uuarn, uuari->bitmap);
+       --uuari->count[uuarn];
+}
+
+static void free_high_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
+{
+       clear_bit(uuarn, uuari->bitmap);
+       --uuari->count[uuarn];
+}
+
+static void free_uuar(struct mlx5_uuar_info *uuari, int uuarn)
+{
+       int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
+       int high_uuar = nuuars - uuari->num_low_latency_uuars;
+
+       mutex_lock(&uuari->lock);
+       if (uuarn == 0) {
+               --uuari->count[uuarn];
+               goto out;
+       }
+
+       if (uuarn < high_uuar) {
+               free_med_class_uuar(uuari, uuarn);
+               goto out;
+       }
+
+       free_high_class_uuar(uuari, uuarn);
+
+out:
+       mutex_unlock(&uuari->lock);
+}
+
+static enum mlx5_qp_state to_mlx5_state(enum ib_qp_state state)
+{
+       switch (state) {
+       case IB_QPS_RESET:      return MLX5_QP_STATE_RST;
+       case IB_QPS_INIT:       return MLX5_QP_STATE_INIT;
+       case IB_QPS_RTR:        return MLX5_QP_STATE_RTR;
+       case IB_QPS_RTS:        return MLX5_QP_STATE_RTS;
+       case IB_QPS_SQD:        return MLX5_QP_STATE_SQD;
+       case IB_QPS_SQE:        return MLX5_QP_STATE_SQER;
+       case IB_QPS_ERR:        return MLX5_QP_STATE_ERR;
+       default:                return -1;
+       }
+}
+
+static int to_mlx5_st(enum ib_qp_type type)
+{
+       switch (type) {
+       case IB_QPT_RC:                 return MLX5_QP_ST_RC;
+       case IB_QPT_UC:                 return MLX5_QP_ST_UC;
+       case IB_QPT_UD:                 return MLX5_QP_ST_UD;
+       case MLX5_IB_QPT_REG_UMR:       return MLX5_QP_ST_REG_UMR;
+       case IB_QPT_XRC_INI:
+       case IB_QPT_XRC_TGT:            return MLX5_QP_ST_XRC;
+       case IB_QPT_SMI:                return MLX5_QP_ST_QP0;
+       case IB_QPT_GSI:                return MLX5_QP_ST_QP1;
+       case IB_QPT_RAW_IPV6:           return MLX5_QP_ST_RAW_IPV6;
+       case IB_QPT_RAW_ETHERTYPE:      return MLX5_QP_ST_RAW_ETHERTYPE;
+       case IB_QPT_RAW_PACKET:
+       case IB_QPT_MAX:
+       default:                return -EINVAL;
+       }
+}
+
+static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
+{
+       return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
+}
+
+static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+                         struct mlx5_ib_qp *qp, struct ib_udata *udata,
+                         struct mlx5_create_qp_mbox_in **in,
+                         struct mlx5_ib_create_qp_resp *resp, int *inlen)
+{
+       struct mlx5_ib_ucontext *context;
+       struct mlx5_ib_create_qp ucmd;
+       int page_shift;
+       int uar_index;
+       int npages;
+       u32 offset;
+       int uuarn;
+       int ncont;
+       int err;
+
+       err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
+       if (err) {
+               mlx5_ib_dbg(dev, "copy failed\n");
+               return err;
+       }
+
+       context = to_mucontext(pd->uobject->context);
+       /*
+        * TBD: should come from the verbs when we have the API
+        */
+       uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
+       if (uuarn < 0) {
+               mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
+               mlx5_ib_dbg(dev, "reverting to high latency\n");
+               uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+               if (uuarn < 0) {
+                       mlx5_ib_dbg(dev, "uuar allocation failed\n");
+                       return uuarn;
+               }
+       }
+
+       uar_index = uuarn_to_uar_index(&context->uuari, uuarn);
+       mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index);
+
+       err = set_user_buf_size(dev, qp, &ucmd);
+       if (err)
+               goto err_uuar;
+
+       qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
+                              qp->buf_size, 0, 0);
+       if (IS_ERR(qp->umem)) {
+               mlx5_ib_dbg(dev, "umem_get failed\n");
+               err = PTR_ERR(qp->umem);
+               goto err_uuar;
+       }
+
+       mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
+                          &ncont, NULL);
+       err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
+       if (err) {
+               mlx5_ib_warn(dev, "bad offset\n");
+               goto err_umem;
+       }
+       mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
+                   ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
+
+       *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
+       *in = mlx5_vzalloc(*inlen);
+       if (!*in) {
+               err = -ENOMEM;
+               goto err_umem;
+       }
+       mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
+       (*in)->ctx.log_pg_sz_remote_qpn =
+               cpu_to_be32((page_shift - PAGE_SHIFT) << 24);
+       (*in)->ctx.params2 = cpu_to_be32(offset << 6);
+
+       (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
+       resp->uuar_index = uuarn;
+       qp->uuarn = uuarn;
+
+       err = mlx5_ib_db_map_user(context, ucmd.db_addr, &qp->db);
+       if (err) {
+               mlx5_ib_dbg(dev, "map failed\n");
+               goto err_free;
+       }
+
+       err = ib_copy_to_udata(udata, resp, sizeof(*resp));
+       if (err) {
+               mlx5_ib_dbg(dev, "copy failed\n");
+               goto err_unmap;
+       }
+       qp->create_type = MLX5_QP_USER;
+
+       return 0;
+
+err_unmap:
+       mlx5_ib_db_unmap_user(context, &qp->db);
+
+err_free:
+       mlx5_vfree(*in);
+
+err_umem:
+       ib_umem_release(qp->umem);
+
+err_uuar:
+       free_uuar(&context->uuari, uuarn);
+       return err;
+}
+
+static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp)
+{
+       struct mlx5_ib_ucontext *context;
+
+       context = to_mucontext(pd->uobject->context);
+       mlx5_ib_db_unmap_user(context, &qp->db);
+       ib_umem_release(qp->umem);
+       free_uuar(&context->uuari, qp->uuarn);
+}
+
+static int create_kernel_qp(struct mlx5_ib_dev *dev,
+                           struct ib_qp_init_attr *init_attr,
+                           struct mlx5_ib_qp *qp,
+                           struct mlx5_create_qp_mbox_in **in, int *inlen)
+{
+       enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
+       struct mlx5_uuar_info *uuari;
+       int uar_index;
+       int uuarn;
+       int err;
+
+       uuari = &dev->mdev.priv.uuari;
+       if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
+               qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK;
+
+       if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
+               lc = MLX5_IB_LATENCY_CLASS_FAST_PATH;
+
+       uuarn = alloc_uuar(uuari, lc);
+       if (uuarn < 0) {
+               mlx5_ib_dbg(dev, "\n");
+               return -ENOMEM;
+       }
+
+       qp->bf = &uuari->bfs[uuarn];
+       uar_index = qp->bf->uar->index;
+
+       err = calc_sq_size(dev, init_attr, qp);
+       if (err < 0) {
+               mlx5_ib_dbg(dev, "err %d\n", err);
+               goto err_uuar;
+       }
+
+       qp->rq.offset = 0;
+       qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
+       qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
+
+       err = mlx5_buf_alloc(&dev->mdev, qp->buf_size, PAGE_SIZE * 2, &qp->buf);
+       if (err) {
+               mlx5_ib_dbg(dev, "err %d\n", err);
+               goto err_uuar;
+       }
+
+       qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
+       *inlen = sizeof(**in) + sizeof(*(*in)->pas) * qp->buf.npages;
+       *in = mlx5_vzalloc(*inlen);
+       if (!*in) {
+               err = -ENOMEM;
+               goto err_buf;
+       }
+       (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
+       (*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - PAGE_SHIFT) << 24);
+       /* Set "fast registration enabled" for all kernel QPs */
+       (*in)->ctx.params1 |= cpu_to_be32(1 << 11);
+       (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
+
+       mlx5_fill_page_array(&qp->buf, (*in)->pas);
+
+       err = mlx5_db_alloc(&dev->mdev, &qp->db);
+       if (err) {
+               mlx5_ib_dbg(dev, "err %d\n", err);
+               goto err_free;
+       }
+
+       qp->db.db[0] = 0;
+       qp->db.db[1] = 0;
+
+       qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid), GFP_KERNEL);
+       qp->sq.wr_data = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data), GFP_KERNEL);
+       qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(*qp->rq.wrid), GFP_KERNEL);
+       qp->sq.w_list = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.w_list), GFP_KERNEL);
+       qp->sq.wqe_head = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head), GFP_KERNEL);
+
+       if (!qp->sq.wrid || !qp->sq.wr_data || !qp->rq.wrid ||
+           !qp->sq.w_list || !qp->sq.wqe_head) {
+               err = -ENOMEM;
+               goto err_wrid;
+       }
+       qp->create_type = MLX5_QP_KERNEL;
+
+       return 0;
+
+err_wrid:
+       mlx5_db_free(&dev->mdev, &qp->db);
+       kfree(qp->sq.wqe_head);
+       kfree(qp->sq.w_list);
+       kfree(qp->sq.wrid);
+       kfree(qp->sq.wr_data);
+       kfree(qp->rq.wrid);
+
+err_free:
+       mlx5_vfree(*in);
+
+err_buf:
+       mlx5_buf_free(&dev->mdev, &qp->buf);
+
+err_uuar:
+       free_uuar(&dev->mdev.priv.uuari, uuarn);
+       return err;
+}
+
+static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
+{
+       mlx5_db_free(&dev->mdev, &qp->db);
+       kfree(qp->sq.wqe_head);
+       kfree(qp->sq.w_list);
+       kfree(qp->sq.wrid);
+       kfree(qp->sq.wr_data);
+       kfree(qp->rq.wrid);
+       mlx5_buf_free(&dev->mdev, &qp->buf);
+       free_uuar(&dev->mdev.priv.uuari, qp->bf->uuarn);
+}
+
+static __be32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
+{
+       if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) ||
+           (attr->qp_type == IB_QPT_XRC_INI))
+               return cpu_to_be32(MLX5_SRQ_RQ);
+       else if (!qp->has_rq)
+               return cpu_to_be32(MLX5_ZERO_LEN_RQ);
+       else
+               return cpu_to_be32(MLX5_NON_ZERO_RQ);
+}
+
+static int is_connected(enum ib_qp_type qp_type)
+{
+       if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC)
+               return 1;
+
+       return 0;
+}
+
+static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+                           struct ib_qp_init_attr *init_attr,
+                           struct ib_udata *udata, struct mlx5_ib_qp *qp)
+{
+       struct mlx5_ib_resources *devr = &dev->devr;
+       struct mlx5_ib_create_qp_resp resp;
+       struct mlx5_create_qp_mbox_in *in;
+       struct mlx5_ib_create_qp ucmd;
+       int inlen = sizeof(*in);
+       int err;
+
+       mutex_init(&qp->mutex);
+       spin_lock_init(&qp->sq.lock);
+       spin_lock_init(&qp->rq.lock);
+
+       if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+               qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
+
+       if (pd && pd->uobject) {
+               if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
+                       mlx5_ib_dbg(dev, "copy failed\n");
+                       return -EFAULT;
+               }
+
+               qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
+               qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
+       } else {
+               qp->wq_sig = !!wq_signature;
+       }
+
+       qp->has_rq = qp_has_rq(init_attr);
+       err = set_rq_size(dev, &init_attr->cap, qp->has_rq,
+                         qp, (pd && pd->uobject) ? &ucmd : NULL);
+       if (err) {
+               mlx5_ib_dbg(dev, "err %d\n", err);
+               return err;
+       }
+
+       if (pd) {
+               if (pd->uobject) {
+                       mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count);
+                       if (ucmd.rq_wqe_shift != qp->rq.wqe_shift ||
+                           ucmd.rq_wqe_count != qp->rq.wqe_cnt) {
+                               mlx5_ib_dbg(dev, "invalid rq params\n");
+                               return -EINVAL;
+                       }
+                       if (ucmd.sq_wqe_count > dev->mdev.caps.max_wqes) {
+                               mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n",
+                                           ucmd.sq_wqe_count, dev->mdev.caps.max_wqes);
+                               return -EINVAL;
+                       }
+                       err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen);
+                       if (err)
+                               mlx5_ib_dbg(dev, "err %d\n", err);
+               } else {
+                       err = create_kernel_qp(dev, init_attr, qp, &in, &inlen);
+                       if (err)
+                               mlx5_ib_dbg(dev, "err %d\n", err);
+                       else
+                               qp->pa_lkey = to_mpd(pd)->pa_lkey;
+               }
+
+               if (err)
+                       return err;
+       } else {
+               in = mlx5_vzalloc(sizeof(*in));
+               if (!in)
+                       return -ENOMEM;
+
+               qp->create_type = MLX5_QP_EMPTY;
+       }
+
+       if (is_sqp(init_attr->qp_type))
+               qp->port = init_attr->port_num;
+
+       in->ctx.flags = cpu_to_be32(to_mlx5_st(init_attr->qp_type) << 16 |
+                                   MLX5_QP_PM_MIGRATED << 11);
+
+       if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR)
+               in->ctx.flags_pd = cpu_to_be32(to_mpd(pd ? pd : devr->p0)->pdn);
+       else
+               in->ctx.flags_pd = cpu_to_be32(MLX5_QP_LAT_SENSITIVE);
+
+       if (qp->wq_sig)
+               in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_ENABLE_SIG);
+
+       if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
+               int rcqe_sz;
+               int scqe_sz;
+
+               rcqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->recv_cq);
+               scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq);
+
+               if (rcqe_sz == 128)
+                       in->ctx.cs_res = MLX5_RES_SCAT_DATA64_CQE;
+               else
+                       in->ctx.cs_res = MLX5_RES_SCAT_DATA32_CQE;
+
+               if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) {
+                       if (scqe_sz == 128)
+                               in->ctx.cs_req = MLX5_REQ_SCAT_DATA64_CQE;
+                       else
+                               in->ctx.cs_req = MLX5_REQ_SCAT_DATA32_CQE;
+               }
+       }
+
+       if (qp->rq.wqe_cnt) {
+               in->ctx.rq_size_stride = (qp->rq.wqe_shift - 4);
+               in->ctx.rq_size_stride |= ilog2(qp->rq.wqe_cnt) << 3;
+       }
+
+       in->ctx.rq_type_srqn = get_rx_type(qp, init_attr);
+
+       if (qp->sq.wqe_cnt)
+               in->ctx.sq_crq_size |= cpu_to_be16(ilog2(qp->sq.wqe_cnt) << 11);
+       else
+               in->ctx.sq_crq_size |= cpu_to_be16(0x8000);
+
+       /* Set default resources */
+       switch (init_attr->qp_type) {
+       case IB_QPT_XRC_TGT:
+               in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
+               in->ctx.cqn_send = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
+               in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
+               in->ctx.xrcd = cpu_to_be32(to_mxrcd(init_attr->xrcd)->xrcdn);
+               break;
+       case IB_QPT_XRC_INI:
+               in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
+               in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
+               in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
+               break;
+       default:
+               if (init_attr->srq) {
+                       in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x0)->xrcdn);
+                       in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(init_attr->srq)->msrq.srqn);
+               } else {
+                       in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
+                       in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
+               }
+       }
+
+       if (init_attr->send_cq)
+               in->ctx.cqn_send = cpu_to_be32(to_mcq(init_attr->send_cq)->mcq.cqn);
+
+       if (init_attr->recv_cq)
+               in->ctx.cqn_recv = cpu_to_be32(to_mcq(init_attr->recv_cq)->mcq.cqn);
+
+       in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma);
+
+       err = mlx5_core_create_qp(&dev->mdev, &qp->mqp, in, inlen);
+       if (err) {
+               mlx5_ib_dbg(dev, "create qp failed\n");
+               goto err_create;
+       }
+
+       mlx5_vfree(in);
+       /* Hardware wants QPN written in big-endian order (after
+        * shifting) for send doorbell.  Precompute this value to save
+        * a little bit when posting sends.
+        */
+       qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
+
+       qp->mqp.event = mlx5_ib_qp_event;
+
+       return 0;
+
+err_create:
+       if (qp->create_type == MLX5_QP_USER)
+               destroy_qp_user(pd, qp);
+       else if (qp->create_type == MLX5_QP_KERNEL)
+               destroy_qp_kernel(dev, qp);
+
+       mlx5_vfree(in);
+       return err;
+}
+
+static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq)
+       __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
+{
+       if (send_cq) {
+               if (recv_cq) {
+                       if (send_cq->mcq.cqn < recv_cq->mcq.cqn)  {
+                               spin_lock_irq(&send_cq->lock);
+                               spin_lock_nested(&recv_cq->lock,
+                                                SINGLE_DEPTH_NESTING);
+                       } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
+                               spin_lock_irq(&send_cq->lock);
+                               __acquire(&recv_cq->lock);
+                       } else {
+                               spin_lock_irq(&recv_cq->lock);
+                               spin_lock_nested(&send_cq->lock,
+                                                SINGLE_DEPTH_NESTING);
+                       }
+               } else {
+                       spin_lock_irq(&send_cq->lock);
+               }
+       } else if (recv_cq) {
+               spin_lock_irq(&recv_cq->lock);
+       }
+}
+
+static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq)
+       __releases(&send_cq->lock) __releases(&recv_cq->lock)
+{
+       if (send_cq) {
+               if (recv_cq) {
+                       if (send_cq->mcq.cqn < recv_cq->mcq.cqn)  {
+                               spin_unlock(&recv_cq->lock);
+                               spin_unlock_irq(&send_cq->lock);
+                       } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
+                               __release(&recv_cq->lock);
+                               spin_unlock_irq(&send_cq->lock);
+                       } else {
+                               spin_unlock(&send_cq->lock);
+                               spin_unlock_irq(&recv_cq->lock);
+                       }
+               } else {
+                       spin_unlock_irq(&send_cq->lock);
+               }
+       } else if (recv_cq) {
+               spin_unlock_irq(&recv_cq->lock);
+       }
+}
+
+static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp)
+{
+       return to_mpd(qp->ibqp.pd);
+}
+
+static void get_cqs(struct mlx5_ib_qp *qp,
+                   struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq)
+{
+       switch (qp->ibqp.qp_type) {
+       case IB_QPT_XRC_TGT:
+               *send_cq = NULL;
+               *recv_cq = NULL;
+               break;
+       case MLX5_IB_QPT_REG_UMR:
+       case IB_QPT_XRC_INI:
+               *send_cq = to_mcq(qp->ibqp.send_cq);
+               *recv_cq = NULL;
+               break;
+
+       case IB_QPT_SMI:
+       case IB_QPT_GSI:
+       case IB_QPT_RC:
+       case IB_QPT_UC:
+       case IB_QPT_UD:
+       case IB_QPT_RAW_IPV6:
+       case IB_QPT_RAW_ETHERTYPE:
+               *send_cq = to_mcq(qp->ibqp.send_cq);
+               *recv_cq = to_mcq(qp->ibqp.recv_cq);
+               break;
+
+       case IB_QPT_RAW_PACKET:
+       case IB_QPT_MAX:
+       default:
+               *send_cq = NULL;
+               *recv_cq = NULL;
+               break;
+       }
+}
+
+static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
+{
+       struct mlx5_ib_cq *send_cq, *recv_cq;
+       struct mlx5_modify_qp_mbox_in *in;
+       int err;
+
+       in = kzalloc(sizeof(*in), GFP_KERNEL);
+       if (!in)
+               return;
+       if (qp->state != IB_QPS_RESET)
+               if (mlx5_core_qp_modify(&dev->mdev, to_mlx5_state(qp->state),
+                                       MLX5_QP_STATE_RST, in, sizeof(*in), &qp->mqp))
+                       mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n",
+                                    qp->mqp.qpn);
+
+       get_cqs(qp, &send_cq, &recv_cq);
+
+       if (qp->create_type == MLX5_QP_KERNEL) {
+               mlx5_ib_lock_cqs(send_cq, recv_cq);
+               __mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
+                                  qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
+               if (send_cq != recv_cq)
+                       __mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+               mlx5_ib_unlock_cqs(send_cq, recv_cq);
+       }
+
+       err = mlx5_core_destroy_qp(&dev->mdev, &qp->mqp);
+       if (err)
+               mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n", qp->mqp.qpn);
+       kfree(in);
+
+
+       if (qp->create_type == MLX5_QP_KERNEL)
+               destroy_qp_kernel(dev, qp);
+       else if (qp->create_type == MLX5_QP_USER)
+               destroy_qp_user(&get_pd(qp)->ibpd, qp);
+}
+
+static const char *ib_qp_type_str(enum ib_qp_type type)
+{
+       switch (type) {
+       case IB_QPT_SMI:
+               return "IB_QPT_SMI";
+       case IB_QPT_GSI:
+               return "IB_QPT_GSI";
+       case IB_QPT_RC:
+               return "IB_QPT_RC";
+       case IB_QPT_UC:
+               return "IB_QPT_UC";
+       case IB_QPT_UD:
+               return "IB_QPT_UD";
+       case IB_QPT_RAW_IPV6:
+               return "IB_QPT_RAW_IPV6";
+       case IB_QPT_RAW_ETHERTYPE:
+               return "IB_QPT_RAW_ETHERTYPE";
+       case IB_QPT_XRC_INI:
+               return "IB_QPT_XRC_INI";
+       case IB_QPT_XRC_TGT:
+               return "IB_QPT_XRC_TGT";
+       case IB_QPT_RAW_PACKET:
+               return "IB_QPT_RAW_PACKET";
+       case MLX5_IB_QPT_REG_UMR:
+               return "MLX5_IB_QPT_REG_UMR";
+       case IB_QPT_MAX:
+       default:
+               return "Invalid QP type";
+       }
+}
+
+struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
+                               struct ib_qp_init_attr *init_attr,
+                               struct ib_udata *udata)
+{
+       struct mlx5_ib_dev *dev;
+       struct mlx5_ib_qp *qp;
+       u16 xrcdn = 0;
+       int err;
+
+       if (pd) {
+               dev = to_mdev(pd->device);
+       } else {
+               /* being cautious here */
+               if (init_attr->qp_type != IB_QPT_XRC_TGT &&
+                   init_attr->qp_type != MLX5_IB_QPT_REG_UMR) {
+                       pr_warn("%s: no PD for transport %s\n", __func__,
+                               ib_qp_type_str(init_attr->qp_type));
+                       return ERR_PTR(-EINVAL);
+               }
+               dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
+       }
+
+       switch (init_attr->qp_type) {
+       case IB_QPT_XRC_TGT:
+       case IB_QPT_XRC_INI:
+               if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_XRC)) {
+                       mlx5_ib_dbg(dev, "XRC not supported\n");
+                       return ERR_PTR(-ENOSYS);
+               }
+               init_attr->recv_cq = NULL;
+               if (init_attr->qp_type == IB_QPT_XRC_TGT) {
+                       xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
+                       init_attr->send_cq = NULL;
+               }
+
+               /* fall through */
+       case IB_QPT_RC:
+       case IB_QPT_UC:
+       case IB_QPT_UD:
+       case IB_QPT_SMI:
+       case IB_QPT_GSI:
+       case MLX5_IB_QPT_REG_UMR:
+               qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+               if (!qp)
+                       return ERR_PTR(-ENOMEM);
+
+               err = create_qp_common(dev, pd, init_attr, udata, qp);
+               if (err) {
+                       mlx5_ib_dbg(dev, "create_qp_common failed\n");
+                       kfree(qp);
+                       return ERR_PTR(err);
+               }
+
+               if (is_qp0(init_attr->qp_type))
+                       qp->ibqp.qp_num = 0;
+               else if (is_qp1(init_attr->qp_type))
+                       qp->ibqp.qp_num = 1;
+               else
+                       qp->ibqp.qp_num = qp->mqp.qpn;
+
+               mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
+                           qp->ibqp.qp_num, qp->mqp.qpn, to_mcq(init_attr->recv_cq)->mcq.cqn,
+                           to_mcq(init_attr->send_cq)->mcq.cqn);
+
+               qp->xrcdn = xrcdn;
+
+               break;
+
+       case IB_QPT_RAW_IPV6:
+       case IB_QPT_RAW_ETHERTYPE:
+       case IB_QPT_RAW_PACKET:
+       case IB_QPT_MAX:
+       default:
+               mlx5_ib_dbg(dev, "unsupported qp type %d\n",
+                           init_attr->qp_type);
+               /* Don't support raw QPs */
+               return ERR_PTR(-EINVAL);
+       }
+
+       return &qp->ibqp;
+}
+
+int mlx5_ib_destroy_qp(struct ib_qp *qp)
+{
+       struct mlx5_ib_dev *dev = to_mdev(qp->device);
+       struct mlx5_ib_qp *mqp = to_mqp(qp);
+
+       destroy_qp_common(dev, mqp);
+
+       kfree(mqp);
+
+       return 0;
+}
+
+static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_attr *attr,
+                                  int attr_mask)
+{
+       u32 hw_access_flags = 0;
+       u8 dest_rd_atomic;
+       u32 access_flags;
+
+       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+               dest_rd_atomic = attr->max_dest_rd_atomic;
+       else
+               dest_rd_atomic = qp->resp_depth;
+
+       if (attr_mask & IB_QP_ACCESS_FLAGS)
+               access_flags = attr->qp_access_flags;
+       else
+               access_flags = qp->atomic_rd_en;
+
+       if (!dest_rd_atomic)
+               access_flags &= IB_ACCESS_REMOTE_WRITE;
+
+       if (access_flags & IB_ACCESS_REMOTE_READ)
+               hw_access_flags |= MLX5_QP_BIT_RRE;
+       if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
+               hw_access_flags |= (MLX5_QP_BIT_RAE | MLX5_ATOMIC_MODE_CX);
+       if (access_flags & IB_ACCESS_REMOTE_WRITE)
+               hw_access_flags |= MLX5_QP_BIT_RWE;
+
+       return cpu_to_be32(hw_access_flags);
+}
+
+enum {
+       MLX5_PATH_FLAG_FL       = 1 << 0,
+       MLX5_PATH_FLAG_FREE_AR  = 1 << 1,
+       MLX5_PATH_FLAG_COUNTER  = 1 << 2,
+};
+
+static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
+{
+       if (rate == IB_RATE_PORT_CURRENT) {
+               return 0;
+       } else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) {
+               return -EINVAL;
+       } else {
+               while (rate != IB_RATE_2_5_GBPS &&
+                      !(1 << (rate + MLX5_STAT_RATE_OFFSET) &
+                        dev->mdev.caps.stat_rate_support))
+                       --rate;
+       }
+
+       return rate + MLX5_STAT_RATE_OFFSET;
+}
+
+static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
+                        struct mlx5_qp_path *path, u8 port, int attr_mask,
+                        u32 path_flags, const struct ib_qp_attr *attr)
+{
+       int err;
+
+       path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
+       path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0;
+
+       if (attr_mask & IB_QP_PKEY_INDEX)
+               path->pkey_index = attr->pkey_index;
+
+       path->grh_mlid  = ah->src_path_bits & 0x7f;
+       path->rlid      = cpu_to_be16(ah->dlid);
+
+       if (ah->ah_flags & IB_AH_GRH) {
+               path->grh_mlid |= 1 << 7;
+               path->mgid_index = ah->grh.sgid_index;
+               path->hop_limit  = ah->grh.hop_limit;
+               path->tclass_flowlabel =
+                       cpu_to_be32((ah->grh.traffic_class << 20) |
+                                   (ah->grh.flow_label));
+               memcpy(path->rgid, ah->grh.dgid.raw, 16);
+       }
+
+       err = ib_rate_to_mlx5(dev, ah->static_rate);
+       if (err < 0)
+               return err;
+       path->static_rate = err;
+       path->port = port;
+
+       if (ah->ah_flags & IB_AH_GRH) {
+               if (ah->grh.sgid_index >= dev->mdev.caps.port[port - 1].gid_table_len) {
+                       pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n",
+                              ah->grh.sgid_index, dev->mdev.caps.port[port - 1].gid_table_len);
+                       return -EINVAL;
+               }
+
+               path->grh_mlid |= 1 << 7;
+               path->mgid_index = ah->grh.sgid_index;
+               path->hop_limit  = ah->grh.hop_limit;
+               path->tclass_flowlabel =
+                       cpu_to_be32((ah->grh.traffic_class << 20) |
+                                   (ah->grh.flow_label));
+               memcpy(path->rgid, ah->grh.dgid.raw, 16);
+       }
+
+       if (attr_mask & IB_QP_TIMEOUT)
+               path->ackto_lt = attr->timeout << 3;
+
+       path->sl = ah->sl & 0xf;
+
+       return 0;
+}
+
+static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_QP_ST_MAX] = {
+       [MLX5_QP_STATE_INIT] = {
+               [MLX5_QP_STATE_INIT] = {
+                       [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE            |
+                                         MLX5_QP_OPTPAR_RAE            |
+                                         MLX5_QP_OPTPAR_RWE            |
+                                         MLX5_QP_OPTPAR_PKEY_INDEX     |
+                                         MLX5_QP_OPTPAR_PRI_PORT,
+                       [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE            |
+                                         MLX5_QP_OPTPAR_PKEY_INDEX     |
+                                         MLX5_QP_OPTPAR_PRI_PORT,
+                       [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX     |
+                                         MLX5_QP_OPTPAR_Q_KEY          |
+                                         MLX5_QP_OPTPAR_PRI_PORT,
+               },
+               [MLX5_QP_STATE_RTR] = {
+                       [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH  |
+                                         MLX5_QP_OPTPAR_RRE            |
+                                         MLX5_QP_OPTPAR_RAE            |
+                                         MLX5_QP_OPTPAR_RWE            |
+                                         MLX5_QP_OPTPAR_PKEY_INDEX,
+                       [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH  |
+                                         MLX5_QP_OPTPAR_RWE            |
+                                         MLX5_QP_OPTPAR_PKEY_INDEX,
+                       [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX     |
+                                         MLX5_QP_OPTPAR_Q_KEY,
+                       [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX    |
+                                          MLX5_QP_OPTPAR_Q_KEY,
+               },
+       },
+       [MLX5_QP_STATE_RTR] = {
+               [MLX5_QP_STATE_RTS] = {
+                       [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH  |
+                                         MLX5_QP_OPTPAR_RRE            |
+                                         MLX5_QP_OPTPAR_RAE            |
+                                         MLX5_QP_OPTPAR_RWE            |
+                                         MLX5_QP_OPTPAR_PM_STATE       |
+                                         MLX5_QP_OPTPAR_RNR_TIMEOUT,
+                       [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH  |
+                                         MLX5_QP_OPTPAR_RWE            |
+                                         MLX5_QP_OPTPAR_PM_STATE,
+                       [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
+               },
+       },
+       [MLX5_QP_STATE_RTS] = {
+               [MLX5_QP_STATE_RTS] = {
+                       [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE            |
+                                         MLX5_QP_OPTPAR_RAE            |
+                                         MLX5_QP_OPTPAR_RWE            |
+                                         MLX5_QP_OPTPAR_RNR_TIMEOUT    |
+                                         MLX5_QP_OPTPAR_PM_STATE,
+                       [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE            |
+                                         MLX5_QP_OPTPAR_PM_STATE,
+                       [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY          |
+                                         MLX5_QP_OPTPAR_SRQN           |
+                                         MLX5_QP_OPTPAR_CQN_RCV,
+               },
+       },
+       [MLX5_QP_STATE_SQER] = {
+               [MLX5_QP_STATE_RTS] = {
+                       [MLX5_QP_ST_UD]  = MLX5_QP_OPTPAR_Q_KEY,
+                       [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY,
+               },
+       },
+};
+
+static int ib_nr_to_mlx5_nr(int ib_mask)
+{
+       switch (ib_mask) {
+       case IB_QP_STATE:
+               return 0;
+       case IB_QP_CUR_STATE:
+               return 0;
+       case IB_QP_EN_SQD_ASYNC_NOTIFY:
+               return 0;
+       case IB_QP_ACCESS_FLAGS:
+               return MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RRE |
+                       MLX5_QP_OPTPAR_RAE;
+       case IB_QP_PKEY_INDEX:
+               return MLX5_QP_OPTPAR_PKEY_INDEX;
+       case IB_QP_PORT:
+               return MLX5_QP_OPTPAR_PRI_PORT;
+       case IB_QP_QKEY:
+               return MLX5_QP_OPTPAR_Q_KEY;
+       case IB_QP_AV:
+               return MLX5_QP_OPTPAR_PRIMARY_ADDR_PATH |
+                       MLX5_QP_OPTPAR_PRI_PORT;
+       case IB_QP_PATH_MTU:
+               return 0;
+       case IB_QP_TIMEOUT:
+               return MLX5_QP_OPTPAR_ACK_TIMEOUT;
+       case IB_QP_RETRY_CNT:
+               return MLX5_QP_OPTPAR_RETRY_COUNT;
+       case IB_QP_RNR_RETRY:
+               return MLX5_QP_OPTPAR_RNR_RETRY;
+       case IB_QP_RQ_PSN:
+               return 0;
+       case IB_QP_MAX_QP_RD_ATOMIC:
+               return MLX5_QP_OPTPAR_SRA_MAX;
+       case IB_QP_ALT_PATH:
+               return MLX5_QP_OPTPAR_ALT_ADDR_PATH;
+       case IB_QP_MIN_RNR_TIMER:
+               return MLX5_QP_OPTPAR_RNR_TIMEOUT;
+       case IB_QP_SQ_PSN:
+               return 0;
+       case IB_QP_MAX_DEST_RD_ATOMIC:
+               return MLX5_QP_OPTPAR_RRA_MAX | MLX5_QP_OPTPAR_RWE |
+                       MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE;
+       case IB_QP_PATH_MIG_STATE:
+               return MLX5_QP_OPTPAR_PM_STATE;
+       case IB_QP_CAP:
+               return 0;
+       case IB_QP_DEST_QPN:
+               return 0;
+       }
+       return 0;
+}
+
+static int ib_mask_to_mlx5_opt(int ib_mask)
+{
+       int result = 0;
+       int i;
+
+       for (i = 0; i < 8 * sizeof(int); i++) {
+               if ((1 << i) & ib_mask)
+                       result |= ib_nr_to_mlx5_nr(1 << i);
+       }
+
+       return result;
+}
+
+static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
+                              const struct ib_qp_attr *attr, int attr_mask,
+                              enum ib_qp_state cur_state, enum ib_qp_state new_state)
+{
+       struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+       struct mlx5_ib_qp *qp = to_mqp(ibqp);
+       struct mlx5_ib_cq *send_cq, *recv_cq;
+       struct mlx5_qp_context *context;
+       struct mlx5_modify_qp_mbox_in *in;
+       struct mlx5_ib_pd *pd;
+       enum mlx5_qp_state mlx5_cur, mlx5_new;
+       enum mlx5_qp_optpar optpar;
+       int sqd_event;
+       int mlx5_st;
+       int err;
+
+       in = kzalloc(sizeof(*in), GFP_KERNEL);
+       if (!in)
+               return -ENOMEM;
+
+       context = &in->ctx;
+       err = to_mlx5_st(ibqp->qp_type);
+       if (err < 0)
+               goto out;
+
+       context->flags = cpu_to_be32(err << 16);
+
+       if (!(attr_mask & IB_QP_PATH_MIG_STATE)) {
+               context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
+       } else {
+               switch (attr->path_mig_state) {
+               case IB_MIG_MIGRATED:
+                       context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
+                       break;
+               case IB_MIG_REARM:
+                       context->flags |= cpu_to_be32(MLX5_QP_PM_REARM << 11);
+                       break;
+               case IB_MIG_ARMED:
+                       context->flags |= cpu_to_be32(MLX5_QP_PM_ARMED << 11);
+                       break;
+               }
+       }
+
+       if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) {
+               context->mtu_msgmax = (IB_MTU_256 << 5) | 8;
+       } else if (ibqp->qp_type == IB_QPT_UD ||
+                  ibqp->qp_type == MLX5_IB_QPT_REG_UMR) {
+               context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
+       } else if (attr_mask & IB_QP_PATH_MTU) {
+               if (attr->path_mtu < IB_MTU_256 ||
+                   attr->path_mtu > IB_MTU_4096) {
+                       mlx5_ib_warn(dev, "invalid mtu %d\n", attr->path_mtu);
+                       err = -EINVAL;
+                       goto out;
+               }
+               context->mtu_msgmax = (attr->path_mtu << 5) | dev->mdev.caps.log_max_msg;
+       }
+
+       if (attr_mask & IB_QP_DEST_QPN)
+               context->log_pg_sz_remote_qpn = cpu_to_be32(attr->dest_qp_num);
+
+       if (attr_mask & IB_QP_PKEY_INDEX)
+               context->pri_path.pkey_index = attr->pkey_index;
+
+       /* todo implement counter_index functionality */
+
+       if (is_sqp(ibqp->qp_type))
+               context->pri_path.port = qp->port;
+
+       if (attr_mask & IB_QP_PORT)
+               context->pri_path.port = attr->port_num;
+
+       if (attr_mask & IB_QP_AV) {
+               err = mlx5_set_path(dev, &attr->ah_attr, &context->pri_path,
+                                   attr_mask & IB_QP_PORT ? attr->port_num : qp->port,
+                                   attr_mask, 0, attr);
+               if (err)
+                       goto out;
+       }
+
+       if (attr_mask & IB_QP_TIMEOUT)
+               context->pri_path.ackto_lt |= attr->timeout << 3;
+
+       if (attr_mask & IB_QP_ALT_PATH) {
+               err = mlx5_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
+                                   attr->alt_port_num, attr_mask, 0, attr);
+               if (err)
+                       goto out;
+       }
+
+       pd = get_pd(qp);
+       get_cqs(qp, &send_cq, &recv_cq);
+
+       context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn);
+       context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0;
+       context->cqn_recv = recv_cq ? cpu_to_be32(recv_cq->mcq.cqn) : 0;
+       context->params1  = cpu_to_be32(MLX5_IB_ACK_REQ_FREQ << 28);
+
+       if (attr_mask & IB_QP_RNR_RETRY)
+               context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
+
+       if (attr_mask & IB_QP_RETRY_CNT)
+               context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
+
+       if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+               if (attr->max_rd_atomic)
+                       context->params1 |=
+                               cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
+       }
+
+       if (attr_mask & IB_QP_SQ_PSN)
+               context->next_send_psn = cpu_to_be32(attr->sq_psn);
+
+       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+               if (attr->max_dest_rd_atomic)
+                       context->params2 |=
+                               cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
+       }
+
+       if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
+               context->params2 |= to_mlx5_access_flags(qp, attr, attr_mask);
+
+       if (attr_mask & IB_QP_MIN_RNR_TIMER)
+               context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
+
+       if (attr_mask & IB_QP_RQ_PSN)
+               context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
+
+       if (attr_mask & IB_QP_QKEY)
+               context->qkey = cpu_to_be32(attr->qkey);
+
+       if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+               context->db_rec_addr = cpu_to_be64(qp->db.dma);
+
+       if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD  &&
+           attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
+               sqd_event = 1;
+       else
+               sqd_event = 0;
+
+       if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+               context->sq_crq_size |= cpu_to_be16(1 << 4);
+
+
+       mlx5_cur = to_mlx5_state(cur_state);
+       mlx5_new = to_mlx5_state(new_state);
+       mlx5_st = to_mlx5_st(ibqp->qp_type);
+       if (mlx5_cur < 0 || mlx5_new < 0 || mlx5_st < 0)
+               goto out;
+
+       optpar = ib_mask_to_mlx5_opt(attr_mask);
+       optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
+       in->optparam = cpu_to_be32(optpar);
+       err = mlx5_core_qp_modify(&dev->mdev, to_mlx5_state(cur_state),
+                                 to_mlx5_state(new_state), in, sqd_event,
+                                 &qp->mqp);
+       if (err)
+               goto out;
+
+       qp->state = new_state;
+
+       if (attr_mask & IB_QP_ACCESS_FLAGS)
+               qp->atomic_rd_en = attr->qp_access_flags;
+       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+               qp->resp_depth = attr->max_dest_rd_atomic;
+       if (attr_mask & IB_QP_PORT)
+               qp->port = attr->port_num;
+       if (attr_mask & IB_QP_ALT_PATH)
+               qp->alt_port = attr->alt_port_num;
+
+       /*
+        * If we moved a kernel QP to RESET, clean up all old CQ
+        * entries and reinitialize the QP.
+        */
+       if (new_state == IB_QPS_RESET && !ibqp->uobject) {
+               mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
+                                ibqp->srq ? to_msrq(ibqp->srq) : NULL);
+               if (send_cq != recv_cq)
+                       mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+
+               qp->rq.head = 0;
+               qp->rq.tail = 0;
+               qp->sq.head = 0;
+               qp->sq.tail = 0;
+               qp->sq.cur_post = 0;
+               qp->sq.last_poll = 0;
+               qp->db.db[MLX5_RCV_DBR] = 0;
+               qp->db.db[MLX5_SND_DBR] = 0;
+       }
+
+out:
+       kfree(in);
+       return err;
+}
+
+int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+                     int attr_mask, struct ib_udata *udata)
+{
+       struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+       struct mlx5_ib_qp *qp = to_mqp(ibqp);
+       enum ib_qp_state cur_state, new_state;
+       int err = -EINVAL;
+       int port;
+
+       mutex_lock(&qp->mutex);
+
+       cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
+       new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
+
+       if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR &&
+           !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
+               goto out;
+
+       if ((attr_mask & IB_QP_PORT) &&
+           (attr->port_num == 0 || attr->port_num > dev->mdev.caps.num_ports))
+               goto out;
+
+       if (attr_mask & IB_QP_PKEY_INDEX) {
+               port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+               if (attr->pkey_index >= dev->mdev.caps.port[port - 1].pkey_table_len)
+                       goto out;
+       }
+
+       if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+           attr->max_rd_atomic > dev->mdev.caps.max_ra_res_qp)
+               goto out;
+
+       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+           attr->max_dest_rd_atomic > dev->mdev.caps.max_ra_req_qp)
+               goto out;
+
+       if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+               err = 0;
+               goto out;
+       }
+
+       err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
+
+out:
+       mutex_unlock(&qp->mutex);
+       return err;
+}
+
+static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
+{
+       struct mlx5_ib_cq *cq;
+       unsigned cur;
+
+       cur = wq->head - wq->tail;
+       if (likely(cur + nreq < wq->max_post))
+               return 0;
+
+       cq = to_mcq(ib_cq);
+       spin_lock(&cq->lock);
+       cur = wq->head - wq->tail;
+       spin_unlock(&cq->lock);
+
+       return cur + nreq >= wq->max_post;
+}
+
+static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
+                                         u64 remote_addr, u32 rkey)
+{
+       rseg->raddr    = cpu_to_be64(remote_addr);
+       rseg->rkey     = cpu_to_be32(rkey);
+       rseg->reserved = 0;
+}
+
+static void set_atomic_seg(struct mlx5_wqe_atomic_seg *aseg, struct ib_send_wr *wr)
+{
+       if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+               aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
+               aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add);
+       } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
+               aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
+               aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add_mask);
+       } else {
+               aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
+               aseg->compare  = 0;
+       }
+}
+
+static void set_masked_atomic_seg(struct mlx5_wqe_masked_atomic_seg *aseg,
+                                 struct ib_send_wr *wr)
+{
+       aseg->swap_add          = cpu_to_be64(wr->wr.atomic.swap);
+       aseg->swap_add_mask     = cpu_to_be64(wr->wr.atomic.swap_mask);
+       aseg->compare           = cpu_to_be64(wr->wr.atomic.compare_add);
+       aseg->compare_mask      = cpu_to_be64(wr->wr.atomic.compare_add_mask);
+}
+
+static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
+                            struct ib_send_wr *wr)
+{
+       memcpy(&dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof(struct mlx5_av));
+       dseg->av.dqp_dct = cpu_to_be32(wr->wr.ud.remote_qpn | MLX5_EXTENDED_UD_AV);
+       dseg->av.key.qkey.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+}
+
+static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
+{
+       dseg->byte_count = cpu_to_be32(sg->length);
+       dseg->lkey       = cpu_to_be32(sg->lkey);
+       dseg->addr       = cpu_to_be64(sg->addr);
+}
+
+static __be16 get_klm_octo(int npages)
+{
+       return cpu_to_be16(ALIGN(npages, 8) / 2);
+}
+
+static __be64 frwr_mkey_mask(void)
+{
+       u64 result;
+
+       result = MLX5_MKEY_MASK_LEN             |
+               MLX5_MKEY_MASK_PAGE_SIZE        |
+               MLX5_MKEY_MASK_START_ADDR       |
+               MLX5_MKEY_MASK_EN_RINVAL        |
+               MLX5_MKEY_MASK_KEY              |
+               MLX5_MKEY_MASK_LR               |
+               MLX5_MKEY_MASK_LW               |
+               MLX5_MKEY_MASK_RR               |
+               MLX5_MKEY_MASK_RW               |
+               MLX5_MKEY_MASK_A                |
+               MLX5_MKEY_MASK_SMALL_FENCE      |
+               MLX5_MKEY_MASK_FREE;
+
+       return cpu_to_be64(result);
+}
+
+static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
+                                struct ib_send_wr *wr, int li)
+{
+       memset(umr, 0, sizeof(*umr));
+
+       if (li) {
+               umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
+               umr->flags = 1 << 7;
+               return;
+       }
+
+       umr->flags = (1 << 5); /* fail if not free */
+       umr->klm_octowords = get_klm_octo(wr->wr.fast_reg.page_list_len);
+       umr->mkey_mask = frwr_mkey_mask();
+}
+
+static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
+                               struct ib_send_wr *wr)
+{
+       struct umr_wr *umrwr = (struct umr_wr *)&wr->wr.fast_reg;
+       u64 mask;
+
+       memset(umr, 0, sizeof(*umr));
+
+       if (!(wr->send_flags & MLX5_IB_SEND_UMR_UNREG)) {
+               umr->flags = 1 << 5; /* fail if not free */
+               umr->klm_octowords = get_klm_octo(umrwr->npages);
+               mask =  MLX5_MKEY_MASK_LEN              |
+                       MLX5_MKEY_MASK_PAGE_SIZE        |
+                       MLX5_MKEY_MASK_START_ADDR       |
+                       MLX5_MKEY_MASK_PD               |
+                       MLX5_MKEY_MASK_LR               |
+                       MLX5_MKEY_MASK_LW               |
+                       MLX5_MKEY_MASK_RR               |
+                       MLX5_MKEY_MASK_RW               |
+                       MLX5_MKEY_MASK_A                |
+                       MLX5_MKEY_MASK_FREE;
+               umr->mkey_mask = cpu_to_be64(mask);
+       } else {
+               umr->flags = 2 << 5; /* fail if free */
+               mask = MLX5_MKEY_MASK_FREE;
+               umr->mkey_mask = cpu_to_be64(mask);
+       }
+
+       if (!wr->num_sge)
+               umr->flags |= (1 << 7); /* inline */
+}
+
+static u8 get_umr_flags(int acc)
+{
+       return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC       : 0) |
+              (acc & IB_ACCESS_REMOTE_WRITE  ? MLX5_PERM_REMOTE_WRITE : 0) |
+              (acc & IB_ACCESS_REMOTE_READ   ? MLX5_PERM_REMOTE_READ  : 0) |
+              (acc & IB_ACCESS_LOCAL_WRITE   ? MLX5_PERM_LOCAL_WRITE  : 0) |
+               MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
+}
+
+static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
+                            int li, int *writ)
+{
+       memset(seg, 0, sizeof(*seg));
+       if (li) {
+               seg->status = 1 << 6;
+               return;
+       }
+
+       seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags);
+       *writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE);
+       seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00);
+       seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
+       seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
+       seg->len = cpu_to_be64(wr->wr.fast_reg.length);
+       seg->xlt_oct_size = cpu_to_be32((wr->wr.fast_reg.page_list_len + 1) / 2);
+       seg->log2_page_size = wr->wr.fast_reg.page_shift;
+}
+
+static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr)
+{
+       memset(seg, 0, sizeof(*seg));
+       if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) {
+               seg->status = 1 << 6;
+               return;
+       }
+
+       seg->flags = convert_access(wr->wr.fast_reg.access_flags);
+       seg->flags_pd = cpu_to_be32(to_mpd((struct ib_pd *)wr->wr.fast_reg.page_list)->pdn);
+       seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
+       seg->len = cpu_to_be64(wr->wr.fast_reg.length);
+       seg->log2_page_size = wr->wr.fast_reg.page_shift;
+       seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+}
+
+static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
+                          struct ib_send_wr *wr,
+                          struct mlx5_core_dev *mdev,
+                          struct mlx5_ib_pd *pd,
+                          int writ)
+{
+       struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
+       u64 *page_list = wr->wr.fast_reg.page_list->page_list;
+       u64 perm = MLX5_EN_RD | (writ ? MLX5_EN_WR : 0);
+       int i;
+
+       for (i = 0; i < wr->wr.fast_reg.page_list_len; i++)
+               mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm);
+       dseg->addr = cpu_to_be64(mfrpl->map);
+       dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * wr->wr.fast_reg.page_list_len, 64));
+       dseg->lkey = cpu_to_be32(pd->pa_lkey);
+}
+
+static __be32 send_ieth(struct ib_send_wr *wr)
+{
+       switch (wr->opcode) {
+       case IB_WR_SEND_WITH_IMM:
+       case IB_WR_RDMA_WRITE_WITH_IMM:
+               return wr->ex.imm_data;
+
+       case IB_WR_SEND_WITH_INV:
+               return cpu_to_be32(wr->ex.invalidate_rkey);
+
+       default:
+               return 0;
+       }
+}
+
+static u8 calc_sig(void *wqe, int size)
+{
+       u8 *p = wqe;
+       u8 res = 0;
+       int i;
+
+       for (i = 0; i < size; i++)
+               res ^= p[i];
+
+       return ~res;
+}
+
+static u8 wq_sig(void *wqe)
+{
+       return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4);
+}
+
+static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr,
+                           void *wqe, int *sz)
+{
+       struct mlx5_wqe_inline_seg *seg;
+       void *qend = qp->sq.qend;
+       void *addr;
+       int inl = 0;
+       int copy;
+       int len;
+       int i;
+
+       seg = wqe;
+       wqe += sizeof(*seg);
+       for (i = 0; i < wr->num_sge; i++) {
+               addr = (void *)(unsigned long)(wr->sg_list[i].addr);
+               len  = wr->sg_list[i].length;
+               inl += len;
+
+               if (unlikely(inl > qp->max_inline_data))
+                       return -ENOMEM;
+
+               if (unlikely(wqe + len > qend)) {
+                       copy = qend - wqe;
+                       memcpy(wqe, addr, copy);
+                       addr += copy;
+                       len -= copy;
+                       wqe = mlx5_get_send_wqe(qp, 0);
+               }
+               memcpy(wqe, addr, len);
+               wqe += len;
+       }
+
+       seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
+
+       *sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
+
+       return 0;
+}
+
+static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,
+                         struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp)
+{
+       int writ = 0;
+       int li;
+
+       li = wr->opcode == IB_WR_LOCAL_INV ? 1 : 0;
+       if (unlikely(wr->send_flags & IB_SEND_INLINE))
+               return -EINVAL;
+
+       set_frwr_umr_segment(*seg, wr, li);
+       *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
+       *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
+       if (unlikely((*seg == qp->sq.qend)))
+               *seg = mlx5_get_send_wqe(qp, 0);
+       set_mkey_segment(*seg, wr, li, &writ);
+       *seg += sizeof(struct mlx5_mkey_seg);
+       *size += sizeof(struct mlx5_mkey_seg) / 16;
+       if (unlikely((*seg == qp->sq.qend)))
+               *seg = mlx5_get_send_wqe(qp, 0);
+       if (!li) {
+               set_frwr_pages(*seg, wr, mdev, pd, writ);
+               *seg += sizeof(struct mlx5_wqe_data_seg);
+               *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
+       }
+       return 0;
+}
+
+static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
+{
+       __be32 *p = NULL;
+       int tidx = idx;
+       int i, j;
+
+       pr_debug("dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx));
+       for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
+               if ((i & 0xf) == 0) {
+                       void *buf = mlx5_get_send_wqe(qp, tidx);
+                       tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1);
+                       p = buf;
+                       j = 0;
+               }
+               pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
+                        be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]),
+                        be32_to_cpu(p[j + 3]));
+       }
+}
+
+static void mlx5_bf_copy(u64 __iomem *dst, u64 *src,
+                        unsigned bytecnt, struct mlx5_ib_qp *qp)
+{
+       while (bytecnt > 0) {
+               __iowrite64_copy(dst++, src++, 8);
+               __iowrite64_copy(dst++, src++, 8);
+               __iowrite64_copy(dst++, src++, 8);
+               __iowrite64_copy(dst++, src++, 8);
+               __iowrite64_copy(dst++, src++, 8);
+               __iowrite64_copy(dst++, src++, 8);
+               __iowrite64_copy(dst++, src++, 8);
+               __iowrite64_copy(dst++, src++, 8);
+               bytecnt -= 64;
+               if (unlikely(src == qp->sq.qend))
+                       src = mlx5_get_send_wqe(qp, 0);
+       }
+}
+
+static u8 get_fence(u8 fence, struct ib_send_wr *wr)
+{
+       if (unlikely(wr->opcode == IB_WR_LOCAL_INV &&
+                    wr->send_flags & IB_SEND_FENCE))
+               return MLX5_FENCE_MODE_STRONG_ORDERING;
+
+       if (unlikely(fence)) {
+               if (wr->send_flags & IB_SEND_FENCE)
+                       return MLX5_FENCE_MODE_SMALL_AND_FENCE;
+               else
+                       return fence;
+
+       } else {
+               return 0;
+       }
+}
+
+int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+                     struct ib_send_wr **bad_wr)
+{
+       struct mlx5_wqe_ctrl_seg *ctrl = NULL;  /* compiler warning */
+       struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+       struct mlx5_core_dev *mdev = &dev->mdev;
+       struct mlx5_ib_qp *qp = to_mqp(ibqp);
+       struct mlx5_wqe_data_seg *dpseg;
+       struct mlx5_wqe_xrc_seg *xrc;
+       struct mlx5_bf *bf = qp->bf;
+       int uninitialized_var(size);
+       void *qend = qp->sq.qend;
+       unsigned long flags;
+       u32 mlx5_opcode;
+       unsigned idx;
+       int err = 0;
+       int inl = 0;
+       int num_sge;
+       void *seg;
+       int nreq;
+       int i;
+       u8 next_fence = 0;
+       u8 opmod = 0;
+       u8 fence;
+
+       spin_lock_irqsave(&qp->sq.lock, flags);
+
+       for (nreq = 0; wr; nreq++, wr = wr->next) {
+               if (unlikely(wr->opcode >= sizeof(mlx5_ib_opcode) / sizeof(mlx5_ib_opcode[0]))) {
+                       mlx5_ib_warn(dev, "\n");
+                       err = -EINVAL;
+                       *bad_wr = wr;
+                       goto out;
+               }
+
+               if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
+                       mlx5_ib_warn(dev, "\n");
+                       err = -ENOMEM;
+                       *bad_wr = wr;
+                       goto out;
+               }
+
+               fence = qp->fm_cache;
+               num_sge = wr->num_sge;
+               if (unlikely(num_sge > qp->sq.max_gs)) {
+                       mlx5_ib_warn(dev, "\n");
+                       err = -ENOMEM;
+                       *bad_wr = wr;
+                       goto out;
+               }
+
+               idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
+               seg = mlx5_get_send_wqe(qp, idx);
+               ctrl = seg;
+               *(uint32_t *)(seg + 8) = 0;
+               ctrl->imm = send_ieth(wr);
+               ctrl->fm_ce_se = qp->sq_signal_bits |
+                       (wr->send_flags & IB_SEND_SIGNALED ?
+                        MLX5_WQE_CTRL_CQ_UPDATE : 0) |
+                       (wr->send_flags & IB_SEND_SOLICITED ?
+                        MLX5_WQE_CTRL_SOLICITED : 0);
+
+               seg += sizeof(*ctrl);
+               size = sizeof(*ctrl) / 16;
+
+               switch (ibqp->qp_type) {
+               case IB_QPT_XRC_INI:
+                       xrc = seg;
+                       xrc->xrc_srqn = htonl(wr->xrc_remote_srq_num);
+                       seg += sizeof(*xrc);
+                       size += sizeof(*xrc) / 16;
+                       /* fall through */
+               case IB_QPT_RC:
+                       switch (wr->opcode) {
+                       case IB_WR_RDMA_READ:
+                       case IB_WR_RDMA_WRITE:
+                       case IB_WR_RDMA_WRITE_WITH_IMM:
+                               set_raddr_seg(seg, wr->wr.rdma.remote_addr,
+                                             wr->wr.rdma.rkey);
+                               seg  += sizeof(struct mlx5_wqe_raddr_seg);
+                               size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
+                               break;
+
+                       case IB_WR_ATOMIC_CMP_AND_SWP:
+                       case IB_WR_ATOMIC_FETCH_AND_ADD:
+                               set_raddr_seg(seg, wr->wr.atomic.remote_addr,
+                                             wr->wr.atomic.rkey);
+                               seg  += sizeof(struct mlx5_wqe_raddr_seg);
+
+                               set_atomic_seg(seg, wr);
+                               seg  += sizeof(struct mlx5_wqe_atomic_seg);
+
+                               size += (sizeof(struct mlx5_wqe_raddr_seg) +
+                                        sizeof(struct mlx5_wqe_atomic_seg)) / 16;
+                               break;
+
+                       case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
+                               set_raddr_seg(seg, wr->wr.atomic.remote_addr,
+                                             wr->wr.atomic.rkey);
+                               seg  += sizeof(struct mlx5_wqe_raddr_seg);
+
+                               set_masked_atomic_seg(seg, wr);
+                               seg  += sizeof(struct mlx5_wqe_masked_atomic_seg);
+
+                               size += (sizeof(struct mlx5_wqe_raddr_seg) +
+                                        sizeof(struct mlx5_wqe_masked_atomic_seg)) / 16;
+                               break;
+
+                       case IB_WR_LOCAL_INV:
+                               next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
+                               qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
+                               ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
+                               err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp);
+                               if (err) {
+                                       mlx5_ib_warn(dev, "\n");
+                                       *bad_wr = wr;
+                                       goto out;
+                               }
+                               num_sge = 0;
+                               break;
+
+                       case IB_WR_FAST_REG_MR:
+                               next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
+                               qp->sq.wr_data[idx] = IB_WR_FAST_REG_MR;
+                               ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey);
+                               err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp);
+                               if (err) {
+                                       mlx5_ib_warn(dev, "\n");
+                                       *bad_wr = wr;
+                                       goto out;
+                               }
+                               num_sge = 0;
+                               break;
+
+                       default:
+                               break;
+                       }
+                       break;
+
+               case IB_QPT_UC:
+                       switch (wr->opcode) {
+                       case IB_WR_RDMA_WRITE:
+                       case IB_WR_RDMA_WRITE_WITH_IMM:
+                               set_raddr_seg(seg, wr->wr.rdma.remote_addr,
+                                             wr->wr.rdma.rkey);
+                               seg  += sizeof(struct mlx5_wqe_raddr_seg);
+                               size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
+                               break;
+
+                       default:
+                               break;
+                       }
+                       break;
+
+               case IB_QPT_UD:
+               case IB_QPT_SMI:
+               case IB_QPT_GSI:
+                       set_datagram_seg(seg, wr);
+                       seg  += sizeof(struct mlx5_wqe_datagram_seg);
+                       size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
+                       if (unlikely((seg == qend)))
+                               seg = mlx5_get_send_wqe(qp, 0);
+                       break;
+
+               case MLX5_IB_QPT_REG_UMR:
+                       if (wr->opcode != MLX5_IB_WR_UMR) {
+                               err = -EINVAL;
+                               mlx5_ib_warn(dev, "bad opcode\n");
+                               goto out;
+                       }
+                       qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
+                       ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey);
+                       set_reg_umr_segment(seg, wr);
+                       seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
+                       size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
+                       if (unlikely((seg == qend)))
+                               seg = mlx5_get_send_wqe(qp, 0);
+                       set_reg_mkey_segment(seg, wr);
+                       seg += sizeof(struct mlx5_mkey_seg);
+                       size += sizeof(struct mlx5_mkey_seg) / 16;
+                       if (unlikely((seg == qend)))
+                               seg = mlx5_get_send_wqe(qp, 0);
+                       break;
+
+               default:
+                       break;
+               }
+
+               if (wr->send_flags & IB_SEND_INLINE && num_sge) {
+                       int uninitialized_var(sz);
+
+                       err = set_data_inl_seg(qp, wr, seg, &sz);
+                       if (unlikely(err)) {
+                               mlx5_ib_warn(dev, "\n");
+                               *bad_wr = wr;
+                               goto out;
+                       }
+                       inl = 1;
+                       size += sz;
+               } else {
+                       dpseg = seg;
+                       for (i = 0; i < num_sge; i++) {
+                               if (unlikely(dpseg == qend)) {
+                                       seg = mlx5_get_send_wqe(qp, 0);
+                                       dpseg = seg;
+                               }
+                               if (likely(wr->sg_list[i].length)) {
+                                       set_data_ptr_seg(dpseg, wr->sg_list + i);
+                                       size += sizeof(struct mlx5_wqe_data_seg) / 16;
+                                       dpseg++;
+                               }
+                       }
+               }
+
+               mlx5_opcode = mlx5_ib_opcode[wr->opcode];
+               ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8)      |
+                                                    mlx5_opcode                        |
+                                                    ((u32)opmod << 24));
+               ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
+               ctrl->fm_ce_se |= get_fence(fence, wr);
+               qp->fm_cache = next_fence;
+               if (unlikely(qp->wq_sig))
+                       ctrl->signature = wq_sig(ctrl);
+
+               qp->sq.wrid[idx] = wr->wr_id;
+               qp->sq.w_list[idx].opcode = mlx5_opcode;
+               qp->sq.wqe_head[idx] = qp->sq.head + nreq;
+               qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
+               qp->sq.w_list[idx].next = qp->sq.cur_post;
+
+               if (0)
+                       dump_wqe(qp, idx, size);
+       }
+
+out:
+       if (likely(nreq)) {
+               qp->sq.head += nreq;
+
+               /* Make sure that descriptors are written before
+                * updating doorbell record and ringing the doorbell
+                */
+               wmb();
+
+               qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
+
+               if (bf->need_lock)
+                       spin_lock(&bf->lock);
+
+               /* TBD enable WC */
+               if (0 && nreq == 1 && bf->uuarn && inl && size > 1 && size <= bf->buf_size / 16) {
+                       mlx5_bf_copy(bf->reg + bf->offset, (u64 *)ctrl, ALIGN(size * 16, 64), qp);
+                       /* wc_wmb(); */
+               } else {
+                       mlx5_write64((__be32 *)ctrl, bf->regreg + bf->offset,
+                                    MLX5_GET_DOORBELL_LOCK(&bf->lock32));
+                       /* Make sure doorbells don't leak out of SQ spinlock
+                        * and reach the HCA out of order.
+                        */
+                       mmiowb();
+               }
+               bf->offset ^= bf->buf_size;
+               if (bf->need_lock)
+                       spin_unlock(&bf->lock);
+       }
+
+       spin_unlock_irqrestore(&qp->sq.lock, flags);
+
+       return err;
+}
+
+static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size)
+{
+       sig->signature = calc_sig(sig, size);
+}
+
+int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+                     struct ib_recv_wr **bad_wr)
+{
+       struct mlx5_ib_qp *qp = to_mqp(ibqp);
+       struct mlx5_wqe_data_seg *scat;
+       struct mlx5_rwqe_sig *sig;
+       unsigned long flags;
+       int err = 0;
+       int nreq;
+       int ind;
+       int i;
+
+       spin_lock_irqsave(&qp->rq.lock, flags);
+
+       ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
+
+       for (nreq = 0; wr; nreq++, wr = wr->next) {
+               if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
+                       err = -ENOMEM;
+                       *bad_wr = wr;
+                       goto out;
+               }
+
+               if (unlikely(wr->num_sge > qp->rq.max_gs)) {
+                       err = -EINVAL;
+                       *bad_wr = wr;
+                       goto out;
+               }
+
+               scat = get_recv_wqe(qp, ind);
+               if (qp->wq_sig)
+                       scat++;
+
+               for (i = 0; i < wr->num_sge; i++)
+                       set_data_ptr_seg(scat + i, wr->sg_list + i);
+
+               if (i < qp->rq.max_gs) {
+                       scat[i].byte_count = 0;
+                       scat[i].lkey       = cpu_to_be32(MLX5_INVALID_LKEY);
+                       scat[i].addr       = 0;
+               }
+
+               if (qp->wq_sig) {
+                       sig = (struct mlx5_rwqe_sig *)scat;
+                       set_sig_seg(sig, (qp->rq.max_gs + 1) << 2);
+               }
+
+               qp->rq.wrid[ind] = wr->wr_id;
+
+               ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
+       }
+
+out:
+       if (likely(nreq)) {
+               qp->rq.head += nreq;
+
+               /* Make sure that descriptors are written before
+                * doorbell record.
+                */
+               wmb();
+
+               *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
+       }
+
+       spin_unlock_irqrestore(&qp->rq.lock, flags);
+
+       return err;
+}
+
+static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state)
+{
+       switch (mlx5_state) {
+       case MLX5_QP_STATE_RST:      return IB_QPS_RESET;
+       case MLX5_QP_STATE_INIT:     return IB_QPS_INIT;
+       case MLX5_QP_STATE_RTR:      return IB_QPS_RTR;
+       case MLX5_QP_STATE_RTS:      return IB_QPS_RTS;
+       case MLX5_QP_STATE_SQ_DRAINING:
+       case MLX5_QP_STATE_SQD:      return IB_QPS_SQD;
+       case MLX5_QP_STATE_SQER:     return IB_QPS_SQE;
+       case MLX5_QP_STATE_ERR:      return IB_QPS_ERR;
+       default:                     return -1;
+       }
+}
+
+static inline enum ib_mig_state to_ib_mig_state(int mlx5_mig_state)
+{
+       switch (mlx5_mig_state) {
+       case MLX5_QP_PM_ARMED:          return IB_MIG_ARMED;
+       case MLX5_QP_PM_REARM:          return IB_MIG_REARM;
+       case MLX5_QP_PM_MIGRATED:       return IB_MIG_MIGRATED;
+       default: return -1;
+       }
+}
+
+static int to_ib_qp_access_flags(int mlx5_flags)
+{
+       int ib_flags = 0;
+
+       if (mlx5_flags & MLX5_QP_BIT_RRE)
+               ib_flags |= IB_ACCESS_REMOTE_READ;
+       if (mlx5_flags & MLX5_QP_BIT_RWE)
+               ib_flags |= IB_ACCESS_REMOTE_WRITE;
+       if (mlx5_flags & MLX5_QP_BIT_RAE)
+               ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
+
+       return ib_flags;
+}
+
+static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
+                               struct mlx5_qp_path *path)
+{
+       struct mlx5_core_dev *dev = &ibdev->mdev;
+
+       memset(ib_ah_attr, 0, sizeof(*ib_ah_attr));
+       ib_ah_attr->port_num      = path->port;
+
+       if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
+               return;
+
+       ib_ah_attr->sl = path->sl & 0xf;
+
+       ib_ah_attr->dlid          = be16_to_cpu(path->rlid);
+       ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f;
+       ib_ah_attr->static_rate   = path->static_rate ? path->static_rate - 5 : 0;
+       ib_ah_attr->ah_flags      = (path->grh_mlid & (1 << 7)) ? IB_AH_GRH : 0;
+       if (ib_ah_attr->ah_flags) {
+               ib_ah_attr->grh.sgid_index = path->mgid_index;
+               ib_ah_attr->grh.hop_limit  = path->hop_limit;
+               ib_ah_attr->grh.traffic_class =
+                       (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
+               ib_ah_attr->grh.flow_label =
+                       be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
+               memcpy(ib_ah_attr->grh.dgid.raw,
+                      path->rgid, sizeof(ib_ah_attr->grh.dgid.raw));
+       }
+}
+
+int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
+                    struct ib_qp_init_attr *qp_init_attr)
+{
+       struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+       struct mlx5_ib_qp *qp = to_mqp(ibqp);
+       struct mlx5_query_qp_mbox_out *outb;
+       struct mlx5_qp_context *context;
+       int mlx5_state;
+       int err = 0;
+
+       mutex_lock(&qp->mutex);
+       outb = kzalloc(sizeof(*outb), GFP_KERNEL);
+       if (!outb) {
+               err = -ENOMEM;
+               goto out;
+       }
+       context = &outb->ctx;
+       err = mlx5_core_qp_query(&dev->mdev, &qp->mqp, outb, sizeof(*outb));
+       if (err)
+               goto out_free;
+
+       mlx5_state = be32_to_cpu(context->flags) >> 28;
+
+       qp->state                    = to_ib_qp_state(mlx5_state);
+       qp_attr->qp_state            = qp->state;
+       qp_attr->path_mtu            = context->mtu_msgmax >> 5;
+       qp_attr->path_mig_state      =
+               to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
+       qp_attr->qkey                = be32_to_cpu(context->qkey);
+       qp_attr->rq_psn              = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff;
+       qp_attr->sq_psn              = be32_to_cpu(context->next_send_psn) & 0xffffff;
+       qp_attr->dest_qp_num         = be32_to_cpu(context->log_pg_sz_remote_qpn) & 0xffffff;
+       qp_attr->qp_access_flags     =
+               to_ib_qp_access_flags(be32_to_cpu(context->params2));
+
+       if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
+               to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
+               to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
+               qp_attr->alt_pkey_index = context->alt_path.pkey_index & 0x7f;
+               qp_attr->alt_port_num   = qp_attr->alt_ah_attr.port_num;
+       }
+
+       qp_attr->pkey_index = context->pri_path.pkey_index & 0x7f;
+       qp_attr->port_num = context->pri_path.port;
+
+       /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
+       qp_attr->sq_draining = mlx5_state == MLX5_QP_STATE_SQ_DRAINING;
+
+       qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7);
+
+       qp_attr->max_dest_rd_atomic =
+               1 << ((be32_to_cpu(context->params2) >> 21) & 0x7);
+       qp_attr->min_rnr_timer      =
+               (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;
+       qp_attr->timeout            = context->pri_path.ackto_lt >> 3;
+       qp_attr->retry_cnt          = (be32_to_cpu(context->params1) >> 16) & 0x7;
+       qp_attr->rnr_retry          = (be32_to_cpu(context->params1) >> 13) & 0x7;
+       qp_attr->alt_timeout        = context->alt_path.ackto_lt >> 3;
+       qp_attr->cur_qp_state        = qp_attr->qp_state;
+       qp_attr->cap.max_recv_wr     = qp->rq.wqe_cnt;
+       qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
+
+       if (!ibqp->uobject) {
+               qp_attr->cap.max_send_wr  = qp->sq.wqe_cnt;
+               qp_attr->cap.max_send_sge = qp->sq.max_gs;
+       } else {
+               qp_attr->cap.max_send_wr  = 0;
+               qp_attr->cap.max_send_sge = 0;
+       }
+
+       /* We don't support inline sends for kernel QPs (yet), and we
+        * don't know what userspace's value should be.
+        */
+       qp_attr->cap.max_inline_data = 0;
+
+       qp_init_attr->cap            = qp_attr->cap;
+
+       qp_init_attr->create_flags = 0;
+       if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
+               qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
+
+       qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
+               IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
+
+out_free:
+       kfree(outb);
+
+out:
+       mutex_unlock(&qp->mutex);
+       return err;
+}
+
+struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
+                                         struct ib_ucontext *context,
+                                         struct ib_udata *udata)
+{
+       struct mlx5_ib_dev *dev = to_mdev(ibdev);
+       struct mlx5_ib_xrcd *xrcd;
+       int err;
+
+       if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_XRC))
+               return ERR_PTR(-ENOSYS);
+
+       xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL);
+       if (!xrcd)
+               return ERR_PTR(-ENOMEM);
+
+       err = mlx5_core_xrcd_alloc(&dev->mdev, &xrcd->xrcdn);
+       if (err) {
+               kfree(xrcd);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       return &xrcd->ibxrcd;
+}
+
+int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
+{
+       struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
+       u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
+       int err;
+
+       err = mlx5_core_xrcd_dealloc(&dev->mdev, xrcdn);
+       if (err) {
+               mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
+               return err;
+       }
+
+       kfree(xrcd);
+
+       return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
new file mode 100644 (file)
index 0000000..84d297a
--- /dev/null
@@ -0,0 +1,473 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/srq.h>
+#include <linux/slab.h>
+#include <rdma/ib_umem.h>
+
+#include "mlx5_ib.h"
+#include "user.h"
+
+/* not supported currently */
+static int srq_signature;
+
+static void *get_wqe(struct mlx5_ib_srq *srq, int n)
+{
+       return mlx5_buf_offset(&srq->buf, n << srq->msrq.wqe_shift);
+}
+
+static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type)
+{
+       struct ib_event event;
+       struct ib_srq *ibsrq = &to_mibsrq(srq)->ibsrq;
+
+       if (ibsrq->event_handler) {
+               event.device      = ibsrq->device;
+               event.element.srq = ibsrq;
+               switch (type) {
+               case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+                       event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+                       break;
+               case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+                       event.event = IB_EVENT_SRQ_ERR;
+                       break;
+               default:
+                       pr_warn("mlx5_ib: Unexpected event type %d on SRQ %06x\n",
+                               type, srq->srqn);
+                       return;
+               }
+
+               ibsrq->event_handler(&event, ibsrq->srq_context);
+       }
+}
+
+static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
+                          struct mlx5_create_srq_mbox_in **in,
+                          struct ib_udata *udata, int buf_size, int *inlen)
+{
+       struct mlx5_ib_dev *dev = to_mdev(pd->device);
+       struct mlx5_ib_create_srq ucmd;
+       int err;
+       int npages;
+       int page_shift;
+       int ncont;
+       u32 offset;
+
+       if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
+               mlx5_ib_dbg(dev, "failed copy udata\n");
+               return -EFAULT;
+       }
+       srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
+
+       srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,
+                               0, 0);
+       if (IS_ERR(srq->umem)) {
+               mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size);
+               err = PTR_ERR(srq->umem);
+               return err;
+       }
+
+       mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, &npages,
+                          &page_shift, &ncont, NULL);
+       err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift,
+                                    &offset);
+       if (err) {
+               mlx5_ib_warn(dev, "bad offset\n");
+               goto err_umem;
+       }
+
+       *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
+       *in = mlx5_vzalloc(*inlen);
+       if (!(*in)) {
+               err = -ENOMEM;
+               goto err_umem;
+       }
+
+       mlx5_ib_populate_pas(dev, srq->umem, page_shift, (*in)->pas, 0);
+
+       err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context),
+                                 ucmd.db_addr, &srq->db);
+       if (err) {
+               mlx5_ib_dbg(dev, "map doorbell failed\n");
+               goto err_in;
+       }
+
+       (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
+       (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
+
+       return 0;
+
+err_in:
+       mlx5_vfree(*in);
+
+err_umem:
+       ib_umem_release(srq->umem);
+
+       return err;
+}
+
+static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
+                            struct mlx5_create_srq_mbox_in **in, int buf_size,
+                            int *inlen)
+{
+       int err;
+       int i;
+       struct mlx5_wqe_srq_next_seg *next;
+       int page_shift;
+       int npages;
+
+       err = mlx5_db_alloc(&dev->mdev, &srq->db);
+       if (err) {
+               mlx5_ib_warn(dev, "alloc dbell rec failed\n");
+               return err;
+       }
+
+       *srq->db.db = 0;
+
+       if (mlx5_buf_alloc(&dev->mdev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
+               mlx5_ib_dbg(dev, "buf alloc failed\n");
+               err = -ENOMEM;
+               goto err_db;
+       }
+       page_shift = srq->buf.page_shift;
+
+       srq->head    = 0;
+       srq->tail    = srq->msrq.max - 1;
+       srq->wqe_ctr = 0;
+
+       for (i = 0; i < srq->msrq.max; i++) {
+               next = get_wqe(srq, i);
+               next->next_wqe_index =
+                       cpu_to_be16((i + 1) & (srq->msrq.max - 1));
+       }
+
+       npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT));
+       mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n",
+                   buf_size, page_shift, srq->buf.npages, npages);
+       *inlen = sizeof(**in) + sizeof(*(*in)->pas) * npages;
+       *in = mlx5_vzalloc(*inlen);
+       if (!*in) {
+               err = -ENOMEM;
+               goto err_buf;
+       }
+       mlx5_fill_page_array(&srq->buf, (*in)->pas);
+
+       srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL);
+       if (!srq->wrid) {
+               mlx5_ib_dbg(dev, "kmalloc failed %lu\n",
+                           (unsigned long)(srq->msrq.max * sizeof(u64)));
+               err = -ENOMEM;
+               goto err_in;
+       }
+       srq->wq_sig = !!srq_signature;
+
+       (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
+
+       return 0;
+
+err_in:
+       mlx5_vfree(*in);
+
+err_buf:
+       mlx5_buf_free(&dev->mdev, &srq->buf);
+
+err_db:
+       mlx5_db_free(&dev->mdev, &srq->db);
+       return err;
+}
+
+static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq)
+{
+       mlx5_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
+       ib_umem_release(srq->umem);
+}
+
+
+static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq)
+{
+       kfree(srq->wrid);
+       mlx5_buf_free(&dev->mdev, &srq->buf);
+       mlx5_db_free(&dev->mdev, &srq->db);
+}
+
+struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
+                                 struct ib_srq_init_attr *init_attr,
+                                 struct ib_udata *udata)
+{
+       struct mlx5_ib_dev *dev = to_mdev(pd->device);
+       struct mlx5_ib_srq *srq;
+       int desc_size;
+       int buf_size;
+       int err;
+       struct mlx5_create_srq_mbox_in *uninitialized_var(in);
+       int uninitialized_var(inlen);
+       int is_xrc;
+       u32 flgs, xrcdn;
+
+       /* Sanity check SRQ size before proceeding */
+       if (init_attr->attr.max_wr >= dev->mdev.caps.max_srq_wqes) {
+               mlx5_ib_dbg(dev, "max_wr %d, cap %d\n",
+                           init_attr->attr.max_wr,
+                           dev->mdev.caps.max_srq_wqes);
+               return ERR_PTR(-EINVAL);
+       }
+
+       srq = kmalloc(sizeof(*srq), GFP_KERNEL);
+       if (!srq)
+               return ERR_PTR(-ENOMEM);
+
+       mutex_init(&srq->mutex);
+       spin_lock_init(&srq->lock);
+       srq->msrq.max    = roundup_pow_of_two(init_attr->attr.max_wr + 1);
+       srq->msrq.max_gs = init_attr->attr.max_sge;
+
+       desc_size = sizeof(struct mlx5_wqe_srq_next_seg) +
+                   srq->msrq.max_gs * sizeof(struct mlx5_wqe_data_seg);
+       desc_size = roundup_pow_of_two(desc_size);
+       desc_size = max_t(int, 32, desc_size);
+       srq->msrq.max_avail_gather = (desc_size - sizeof(struct mlx5_wqe_srq_next_seg)) /
+               sizeof(struct mlx5_wqe_data_seg);
+       srq->msrq.wqe_shift = ilog2(desc_size);
+       buf_size = srq->msrq.max * desc_size;
+       mlx5_ib_dbg(dev, "desc_size 0x%x, req wr 0x%x, srq size 0x%x, max_gs 0x%x, max_avail_gather 0x%x\n",
+                   desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs,
+                   srq->msrq.max_avail_gather);
+
+       if (pd->uobject)
+               err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen);
+       else
+               err = create_srq_kernel(dev, srq, &in, buf_size, &inlen);
+
+       if (err) {
+               mlx5_ib_warn(dev, "create srq %s failed, err %d\n",
+                            pd->uobject ? "user" : "kernel", err);
+               goto err_srq;
+       }
+
+       is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
+       in->ctx.state_log_sz = ilog2(srq->msrq.max);
+       flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24;
+       xrcdn = 0;
+       if (is_xrc) {
+               xrcdn = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn;
+               in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(init_attr->ext.xrc.cq)->mcq.cqn);
+       } else if (init_attr->srq_type == IB_SRQT_BASIC) {
+               xrcdn = to_mxrcd(dev->devr.x0)->xrcdn;
+               in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(dev->devr.c0)->mcq.cqn);
+       }
+
+       in->ctx.flags_xrcd = cpu_to_be32((flgs & 0xFF000000) | (xrcdn & 0xFFFFFF));
+
+       in->ctx.pd = cpu_to_be32(to_mpd(pd)->pdn);
+       in->ctx.db_record = cpu_to_be64(srq->db.dma);
+       err = mlx5_core_create_srq(&dev->mdev, &srq->msrq, in, inlen);
+       mlx5_vfree(in);
+       if (err) {
+               mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err);
+               goto err_srq;
+       }
+
+       mlx5_ib_dbg(dev, "create SRQ with srqn 0x%x\n", srq->msrq.srqn);
+
+       srq->msrq.event = mlx5_ib_srq_event;
+       srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
+
+       if (pd->uobject)
+               if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) {
+                       mlx5_ib_dbg(dev, "copy to user failed\n");
+                       err = -EFAULT;
+                       goto err_core;
+               }
+
+       init_attr->attr.max_wr = srq->msrq.max - 1;
+
+       return &srq->ibsrq;
+
+err_core:
+       mlx5_core_destroy_srq(&dev->mdev, &srq->msrq);
+       if (pd->uobject)
+               destroy_srq_user(pd, srq);
+       else
+               destroy_srq_kernel(dev, srq);
+
+err_srq:
+       kfree(srq);
+
+       return ERR_PTR(err);
+}
+
+int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+                      enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
+{
+       struct mlx5_ib_dev *dev = to_mdev(ibsrq->device);
+       struct mlx5_ib_srq *srq = to_msrq(ibsrq);
+       int ret;
+
+       /* We don't support resizing SRQs yet */
+       if (attr_mask & IB_SRQ_MAX_WR)
+               return -EINVAL;
+
+       if (attr_mask & IB_SRQ_LIMIT) {
+               if (attr->srq_limit >= srq->msrq.max)
+                       return -EINVAL;
+
+               mutex_lock(&srq->mutex);
+               ret = mlx5_core_arm_srq(&dev->mdev, &srq->msrq, attr->srq_limit, 1);
+               mutex_unlock(&srq->mutex);
+
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
+{
+       struct mlx5_ib_dev *dev = to_mdev(ibsrq->device);
+       struct mlx5_ib_srq *srq = to_msrq(ibsrq);
+       int ret;
+       struct mlx5_query_srq_mbox_out *out;
+
+       out = kzalloc(sizeof(*out), GFP_KERNEL);
+       if (!out)
+               return -ENOMEM;
+
+       ret = mlx5_core_query_srq(&dev->mdev, &srq->msrq, out);
+       if (ret)
+               goto out_box;
+
+       srq_attr->srq_limit = be16_to_cpu(out->ctx.lwm);
+       srq_attr->max_wr    = srq->msrq.max - 1;
+       srq_attr->max_sge   = srq->msrq.max_gs;
+
+out_box:
+       kfree(out);
+       return ret;
+}
+
+int mlx5_ib_destroy_srq(struct ib_srq *srq)
+{
+       struct mlx5_ib_dev *dev = to_mdev(srq->device);
+       struct mlx5_ib_srq *msrq = to_msrq(srq);
+
+       mlx5_core_destroy_srq(&dev->mdev, &msrq->msrq);
+
+       if (srq->uobject) {
+               mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
+               ib_umem_release(msrq->umem);
+       } else {
+               kfree(msrq->wrid);
+               mlx5_buf_free(&dev->mdev, &msrq->buf);
+               mlx5_db_free(&dev->mdev, &msrq->db);
+       }
+
+       kfree(srq);
+       return 0;
+}
+
+void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index)
+{
+       struct mlx5_wqe_srq_next_seg *next;
+
+       /* always called with interrupts disabled. */
+       spin_lock(&srq->lock);
+
+       next = get_wqe(srq, srq->tail);
+       next->next_wqe_index = cpu_to_be16(wqe_index);
+       srq->tail = wqe_index;
+
+       spin_unlock(&srq->lock);
+}
+
+int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+                         struct ib_recv_wr **bad_wr)
+{
+       struct mlx5_ib_srq *srq = to_msrq(ibsrq);
+       struct mlx5_wqe_srq_next_seg *next;
+       struct mlx5_wqe_data_seg *scat;
+       unsigned long flags;
+       int err = 0;
+       int nreq;
+       int i;
+
+       spin_lock_irqsave(&srq->lock, flags);
+
+       for (nreq = 0; wr; nreq++, wr = wr->next) {
+               if (unlikely(wr->num_sge > srq->msrq.max_gs)) {
+                       err = -EINVAL;
+                       *bad_wr = wr;
+                       break;
+               }
+
+               if (unlikely(srq->head == srq->tail)) {
+                       err = -ENOMEM;
+                       *bad_wr = wr;
+                       break;
+               }
+
+               srq->wrid[srq->head] = wr->wr_id;
+
+               next      = get_wqe(srq, srq->head);
+               srq->head = be16_to_cpu(next->next_wqe_index);
+               scat      = (struct mlx5_wqe_data_seg *)(next + 1);
+
+               for (i = 0; i < wr->num_sge; i++) {
+                       scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length);
+                       scat[i].lkey       = cpu_to_be32(wr->sg_list[i].lkey);
+                       scat[i].addr       = cpu_to_be64(wr->sg_list[i].addr);
+               }
+
+               if (i < srq->msrq.max_avail_gather) {
+                       scat[i].byte_count = 0;
+                       scat[i].lkey       = cpu_to_be32(MLX5_INVALID_LKEY);
+                       scat[i].addr       = 0;
+               }
+       }
+
+       if (likely(nreq)) {
+               srq->wqe_ctr += nreq;
+
+               /* Make sure that descriptors are written before
+                * doorbell record.
+                */
+               wmb();
+
+               *srq->db.db = cpu_to_be32(srq->wqe_ctr);
+       }
+
+       spin_unlock_irqrestore(&srq->lock, flags);
+
+       return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h
new file mode 100644 (file)
index 0000000..a886de3
--- /dev/null
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_IB_USER_H
+#define MLX5_IB_USER_H
+
+#include <linux/types.h>
+
+enum {
+       MLX5_QP_FLAG_SIGNATURE          = 1 << 0,
+       MLX5_QP_FLAG_SCATTER_CQE        = 1 << 1,
+};
+
+enum {
+       MLX5_SRQ_FLAG_SIGNATURE         = 1 << 0,
+};
+
+
+/* Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define MLX5_IB_UVERBS_ABI_VERSION     1
+
+/* Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct mlx5_ib_alloc_ucontext_req {
+       __u32   total_num_uuars;
+       __u32   num_low_latency_uuars;
+};
+
+struct mlx5_ib_alloc_ucontext_resp {
+       __u32   qp_tab_size;
+       __u32   bf_reg_size;
+       __u32   tot_uuars;
+       __u32   cache_line_size;
+       __u16   max_sq_desc_sz;
+       __u16   max_rq_desc_sz;
+       __u32   max_send_wqebb;
+       __u32   max_recv_wr;
+       __u32   max_srq_recv_wr;
+       __u16   num_ports;
+       __u16   reserved;
+};
+
+struct mlx5_ib_alloc_pd_resp {
+       __u32   pdn;
+};
+
+struct mlx5_ib_create_cq {
+       __u64   buf_addr;
+       __u64   db_addr;
+       __u32   cqe_size;
+};
+
+struct mlx5_ib_create_cq_resp {
+       __u32   cqn;
+       __u32   reserved;
+};
+
+struct mlx5_ib_resize_cq {
+       __u64   buf_addr;
+};
+
+struct mlx5_ib_create_srq {
+       __u64   buf_addr;
+       __u64   db_addr;
+       __u32   flags;
+};
+
+struct mlx5_ib_create_srq_resp {
+       __u32   srqn;
+       __u32   reserved;
+};
+
+struct mlx5_ib_create_qp {
+       __u64   buf_addr;
+       __u64   db_addr;
+       __u32   sq_wqe_count;
+       __u32   rq_wqe_count;
+       __u32   rq_wqe_shift;
+       __u32   flags;
+};
+
+struct mlx5_ib_create_qp_resp {
+       __u32   uuar_index;
+};
+#endif /* MLX5_IB_USER_H */
index bcdbc14..8cf7563 100644 (file)
@@ -19,5 +19,6 @@ config NET_VENDOR_MELLANOX
 if NET_VENDOR_MELLANOX
 
 source "drivers/net/ethernet/mellanox/mlx4/Kconfig"
+source "drivers/net/ethernet/mellanox/mlx5/core/Kconfig"
 
 endif # NET_VENDOR_MELLANOX
index 37afb96..38fe32e 100644 (file)
@@ -3,3 +3,4 @@
 #
 
 obj-$(CONFIG_MLX4_CORE) += mlx4/
+obj-$(CONFIG_MLX5_CORE) += mlx5/core/
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
new file mode 100644 (file)
index 0000000..2196282
--- /dev/null
@@ -0,0 +1,18 @@
+#
+# Mellanox driver configuration
+#
+
+config MLX5_CORE
+       tristate
+       depends on PCI && X86
+       default n
+
+config MLX5_DEBUG
+       bool "Verbose debugging output" if (MLX5_CORE && EXPERT)
+       depends on MLX5_CORE
+       default y
+       ---help---
+         This option causes debugging code to be compiled into the
+         mlx5_core driver.  The output can be turned on via the
+         debug_mask module parameter (which can also be set after
+         the driver is loaded through sysfs).
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
new file mode 100644 (file)
index 0000000..105780b
--- /dev/null
@@ -0,0 +1,5 @@
+obj-$(CONFIG_MLX5_CORE)                += mlx5_core.o
+
+mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
+               health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o   \
+               mad.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
new file mode 100644 (file)
index 0000000..b215742
--- /dev/null
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/export.h>
+#include <linux/bitmap.h>
+#include <linux/dma-mapping.h>
+#include <linux/vmalloc.h>
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+
+/* Handling for queue buffers -- we allocate a bunch of memory and
+ * register it in a memory region at HCA virtual address 0.  If the
+ * requested size is > max_direct, we split the allocation into
+ * multiple pages, so we don't require too much contiguous memory.
+ */
+
+int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, int max_direct,
+                  struct mlx5_buf *buf)
+{
+       dma_addr_t t;
+
+       buf->size = size;
+       if (size <= max_direct) {
+               buf->nbufs        = 1;
+               buf->npages       = 1;
+               buf->page_shift   = get_order(size) + PAGE_SHIFT;
+               buf->direct.buf   = dma_zalloc_coherent(&dev->pdev->dev,
+                                                       size, &t, GFP_KERNEL);
+               if (!buf->direct.buf)
+                       return -ENOMEM;
+
+               buf->direct.map = t;
+
+               while (t & ((1 << buf->page_shift) - 1)) {
+                       --buf->page_shift;
+                       buf->npages *= 2;
+               }
+       } else {
+               int i;
+
+               buf->direct.buf  = NULL;
+               buf->nbufs       = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+               buf->npages      = buf->nbufs;
+               buf->page_shift  = PAGE_SHIFT;
+               buf->page_list   = kcalloc(buf->nbufs, sizeof(*buf->page_list),
+                                          GFP_KERNEL);
+               if (!buf->page_list)
+                       return -ENOMEM;
+
+               for (i = 0; i < buf->nbufs; i++) {
+                       buf->page_list[i].buf =
+                               dma_zalloc_coherent(&dev->pdev->dev, PAGE_SIZE,
+                                                   &t, GFP_KERNEL);
+                       if (!buf->page_list[i].buf)
+                               goto err_free;
+
+                       buf->page_list[i].map = t;
+               }
+
+               if (BITS_PER_LONG == 64) {
+                       struct page **pages;
+                       pages = kmalloc(sizeof(*pages) * buf->nbufs, GFP_KERNEL);
+                       if (!pages)
+                               goto err_free;
+                       for (i = 0; i < buf->nbufs; i++)
+                               pages[i] = virt_to_page(buf->page_list[i].buf);
+                       buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL);
+                       kfree(pages);
+                       if (!buf->direct.buf)
+                               goto err_free;
+               }
+       }
+
+       return 0;
+
+err_free:
+       mlx5_buf_free(dev, buf);
+
+       return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(mlx5_buf_alloc);
+
+void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf)
+{
+       int i;
+
+       if (buf->nbufs == 1)
+               dma_free_coherent(&dev->pdev->dev, buf->size, buf->direct.buf,
+                                 buf->direct.map);
+       else {
+               if (BITS_PER_LONG == 64 && buf->direct.buf)
+                       vunmap(buf->direct.buf);
+
+               for (i = 0; i < buf->nbufs; i++)
+                       if (buf->page_list[i].buf)
+                               dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+                                                 buf->page_list[i].buf,
+                                                 buf->page_list[i].map);
+               kfree(buf->page_list);
+       }
+}
+EXPORT_SYMBOL_GPL(mlx5_buf_free);
+
+static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct device *dma_device)
+{
+       struct mlx5_db_pgdir *pgdir;
+
+       pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL);
+       if (!pgdir)
+               return NULL;
+
+       bitmap_fill(pgdir->bitmap, MLX5_DB_PER_PAGE);
+       pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE,
+                                           &pgdir->db_dma, GFP_KERNEL);
+       if (!pgdir->db_page) {
+               kfree(pgdir);
+               return NULL;
+       }
+
+       return pgdir;
+}
+
+static int mlx5_alloc_db_from_pgdir(struct mlx5_db_pgdir *pgdir,
+                                   struct mlx5_db *db)
+{
+       int offset;
+       int i;
+
+       i = find_first_bit(pgdir->bitmap, MLX5_DB_PER_PAGE);
+       if (i >= MLX5_DB_PER_PAGE)
+               return -ENOMEM;
+
+       __clear_bit(i, pgdir->bitmap);
+
+       db->u.pgdir = pgdir;
+       db->index   = i;
+       offset = db->index * L1_CACHE_BYTES;
+       db->db      = pgdir->db_page + offset / sizeof(*pgdir->db_page);
+       db->dma     = pgdir->db_dma  + offset;
+
+       return 0;
+}
+
+int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db)
+{
+       struct mlx5_db_pgdir *pgdir;
+       int ret = 0;
+
+       mutex_lock(&dev->priv.pgdir_mutex);
+
+       list_for_each_entry(pgdir, &dev->priv.pgdir_list, list)
+               if (!mlx5_alloc_db_from_pgdir(pgdir, db))
+                       goto out;
+
+       pgdir = mlx5_alloc_db_pgdir(&(dev->pdev->dev));
+       if (!pgdir) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       list_add(&pgdir->list, &dev->priv.pgdir_list);
+
+       /* This should never fail -- we just allocated an empty page: */
+       WARN_ON(mlx5_alloc_db_from_pgdir(pgdir, db));
+
+out:
+       mutex_unlock(&dev->priv.pgdir_mutex);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(mlx5_db_alloc);
+
+void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db)
+{
+       mutex_lock(&dev->priv.pgdir_mutex);
+
+       __set_bit(db->index, db->u.pgdir->bitmap);
+
+       if (bitmap_full(db->u.pgdir->bitmap, MLX5_DB_PER_PAGE)) {
+               dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
+                                 db->u.pgdir->db_page, db->u.pgdir->db_dma);
+               list_del(&db->u.pgdir->list);
+               kfree(db->u.pgdir);
+       }
+
+       mutex_unlock(&dev->priv.pgdir_mutex);
+}
+EXPORT_SYMBOL_GPL(mlx5_db_free);
+
+
+void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas)
+{
+       u64 addr;
+       int i;
+
+       for (i = 0; i < buf->npages; i++) {
+               if (buf->nbufs == 1)
+                       addr = buf->direct.map + (i << buf->page_shift);
+               else
+                       addr = buf->page_list[i].map;
+
+               pas[i] = cpu_to_be64(addr);
+       }
+}
+EXPORT_SYMBOL_GPL(mlx5_fill_page_array);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
new file mode 100644 (file)
index 0000000..c1c0eef
--- /dev/null
@@ -0,0 +1,1515 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <asm-generic/kmap_types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/random.h>
+#include <linux/io-mapping.h>
+#include <linux/mlx5/driver.h>
+#include <linux/debugfs.h>
+
+#include "mlx5_core.h"
+
+enum {
+       CMD_IF_REV = 3,
+};
+
+enum {
+       CMD_MODE_POLLING,
+       CMD_MODE_EVENTS
+};
+
+enum {
+       NUM_LONG_LISTS    = 2,
+       NUM_MED_LISTS     = 64,
+       LONG_LIST_SIZE    = (2ULL * 1024 * 1024 * 1024 / PAGE_SIZE) * 8 + 16 +
+                               MLX5_CMD_DATA_BLOCK_SIZE,
+       MED_LIST_SIZE     = 16 + MLX5_CMD_DATA_BLOCK_SIZE,
+};
+
+enum {
+       MLX5_CMD_DELIVERY_STAT_OK                       = 0x0,
+       MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR               = 0x1,
+       MLX5_CMD_DELIVERY_STAT_TOK_ERR                  = 0x2,
+       MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR          = 0x3,
+       MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR        = 0x4,
+       MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR         = 0x5,
+       MLX5_CMD_DELIVERY_STAT_FW_ERR                   = 0x6,
+       MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR            = 0x7,
+       MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR           = 0x8,
+       MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR      = 0x9,
+       MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR            = 0x10,
+};
+
+enum {
+       MLX5_CMD_STAT_OK                        = 0x0,
+       MLX5_CMD_STAT_INT_ERR                   = 0x1,
+       MLX5_CMD_STAT_BAD_OP_ERR                = 0x2,
+       MLX5_CMD_STAT_BAD_PARAM_ERR             = 0x3,
+       MLX5_CMD_STAT_BAD_SYS_STATE_ERR         = 0x4,
+       MLX5_CMD_STAT_BAD_RES_ERR               = 0x5,
+       MLX5_CMD_STAT_RES_BUSY                  = 0x6,
+       MLX5_CMD_STAT_LIM_ERR                   = 0x8,
+       MLX5_CMD_STAT_BAD_RES_STATE_ERR         = 0x9,
+       MLX5_CMD_STAT_IX_ERR                    = 0xa,
+       MLX5_CMD_STAT_NO_RES_ERR                = 0xf,
+       MLX5_CMD_STAT_BAD_INP_LEN_ERR           = 0x50,
+       MLX5_CMD_STAT_BAD_OUTP_LEN_ERR          = 0x51,
+       MLX5_CMD_STAT_BAD_QP_STATE_ERR          = 0x10,
+       MLX5_CMD_STAT_BAD_PKT_ERR               = 0x30,
+       MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR    = 0x40,
+};
+
+static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
+                                          struct mlx5_cmd_msg *in,
+                                          struct mlx5_cmd_msg *out,
+                                          mlx5_cmd_cbk_t cbk,
+                                          void *context, int page_queue)
+{
+       gfp_t alloc_flags = cbk ? GFP_ATOMIC : GFP_KERNEL;
+       struct mlx5_cmd_work_ent *ent;
+
+       ent = kzalloc(sizeof(*ent), alloc_flags);
+       if (!ent)
+               return ERR_PTR(-ENOMEM);
+
+       ent->in         = in;
+       ent->out        = out;
+       ent->callback   = cbk;
+       ent->context    = context;
+       ent->cmd        = cmd;
+       ent->page_queue = page_queue;
+
+       return ent;
+}
+
+static u8 alloc_token(struct mlx5_cmd *cmd)
+{
+       u8 token;
+
+       spin_lock(&cmd->token_lock);
+       token = cmd->token++ % 255 + 1;
+       spin_unlock(&cmd->token_lock);
+
+       return token;
+}
+
+static int alloc_ent(struct mlx5_cmd *cmd)
+{
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&cmd->alloc_lock, flags);
+       ret = find_first_bit(&cmd->bitmask, cmd->max_reg_cmds);
+       if (ret < cmd->max_reg_cmds)
+               clear_bit(ret, &cmd->bitmask);
+       spin_unlock_irqrestore(&cmd->alloc_lock, flags);
+
+       return ret < cmd->max_reg_cmds ? ret : -ENOMEM;
+}
+
+static void free_ent(struct mlx5_cmd *cmd, int idx)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&cmd->alloc_lock, flags);
+       set_bit(idx, &cmd->bitmask);
+       spin_unlock_irqrestore(&cmd->alloc_lock, flags);
+}
+
+static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx)
+{
+       return cmd->cmd_buf + (idx << cmd->log_stride);
+}
+
+static u8 xor8_buf(void *buf, int len)
+{
+       u8 *ptr = buf;
+       u8 sum = 0;
+       int i;
+
+       for (i = 0; i < len; i++)
+               sum ^= ptr[i];
+
+       return sum;
+}
+
+static int verify_block_sig(struct mlx5_cmd_prot_block *block)
+{
+       if (xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 1) != 0xff)
+               return -EINVAL;
+
+       if (xor8_buf(block, sizeof(*block)) != 0xff)
+               return -EINVAL;
+
+       return 0;
+}
+
+static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token)
+{
+       block->token = token;
+       block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 2);
+       block->sig = ~xor8_buf(block, sizeof(*block) - 1);
+}
+
+static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token)
+{
+       struct mlx5_cmd_mailbox *next = msg->next;
+
+       while (next) {
+               calc_block_sig(next->buf, token);
+               next = next->next;
+       }
+}
+
+static void set_signature(struct mlx5_cmd_work_ent *ent)
+{
+       ent->lay->sig = ~xor8_buf(ent->lay, sizeof(*ent->lay));
+       calc_chain_sig(ent->in, ent->token);
+       calc_chain_sig(ent->out, ent->token);
+}
+
+static void poll_timeout(struct mlx5_cmd_work_ent *ent)
+{
+       unsigned long poll_end = jiffies + msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC + 1000);
+       u8 own;
+
+       do {
+               own = ent->lay->status_own;
+               if (!(own & CMD_OWNER_HW)) {
+                       ent->ret = 0;
+                       return;
+               }
+               usleep_range(5000, 10000);
+       } while (time_before(jiffies, poll_end));
+
+       ent->ret = -ETIMEDOUT;
+}
+
+static void free_cmd(struct mlx5_cmd_work_ent *ent)
+{
+       kfree(ent);
+}
+
+
+static int verify_signature(struct mlx5_cmd_work_ent *ent)
+{
+       struct mlx5_cmd_mailbox *next = ent->out->next;
+       int err;
+       u8 sig;
+
+       sig = xor8_buf(ent->lay, sizeof(*ent->lay));
+       if (sig != 0xff)
+               return -EINVAL;
+
+       while (next) {
+               err = verify_block_sig(next->buf);
+               if (err)
+                       return err;
+
+               next = next->next;
+       }
+
+       return 0;
+}
+
+static void dump_buf(void *buf, int size, int data_only, int offset)
+{
+       __be32 *p = buf;
+       int i;
+
+       for (i = 0; i < size; i += 16) {
+               pr_debug("%03x: %08x %08x %08x %08x\n", offset, be32_to_cpu(p[0]),
+                        be32_to_cpu(p[1]), be32_to_cpu(p[2]),
+                        be32_to_cpu(p[3]));
+               p += 4;
+               offset += 16;
+       }
+       if (!data_only)
+               pr_debug("\n");
+}
+
+const char *mlx5_command_str(int command)
+{
+       switch (command) {
+       case MLX5_CMD_OP_QUERY_HCA_CAP:
+               return "QUERY_HCA_CAP";
+
+       case MLX5_CMD_OP_SET_HCA_CAP:
+               return "SET_HCA_CAP";
+
+       case MLX5_CMD_OP_QUERY_ADAPTER:
+               return "QUERY_ADAPTER";
+
+       case MLX5_CMD_OP_INIT_HCA:
+               return "INIT_HCA";
+
+       case MLX5_CMD_OP_TEARDOWN_HCA:
+               return "TEARDOWN_HCA";
+
+       case MLX5_CMD_OP_QUERY_PAGES:
+               return "QUERY_PAGES";
+
+       case MLX5_CMD_OP_MANAGE_PAGES:
+               return "MANAGE_PAGES";
+
+       case MLX5_CMD_OP_CREATE_MKEY:
+               return "CREATE_MKEY";
+
+       case MLX5_CMD_OP_QUERY_MKEY:
+               return "QUERY_MKEY";
+
+       case MLX5_CMD_OP_DESTROY_MKEY:
+               return "DESTROY_MKEY";
+
+       case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS:
+               return "QUERY_SPECIAL_CONTEXTS";
+
+       case MLX5_CMD_OP_CREATE_EQ:
+               return "CREATE_EQ";
+
+       case MLX5_CMD_OP_DESTROY_EQ:
+               return "DESTROY_EQ";
+
+       case MLX5_CMD_OP_QUERY_EQ:
+               return "QUERY_EQ";
+
+       case MLX5_CMD_OP_CREATE_CQ:
+               return "CREATE_CQ";
+
+       case MLX5_CMD_OP_DESTROY_CQ:
+               return "DESTROY_CQ";
+
+       case MLX5_CMD_OP_QUERY_CQ:
+               return "QUERY_CQ";
+
+       case MLX5_CMD_OP_MODIFY_CQ:
+               return "MODIFY_CQ";
+
+       case MLX5_CMD_OP_CREATE_QP:
+               return "CREATE_QP";
+
+       case MLX5_CMD_OP_DESTROY_QP:
+               return "DESTROY_QP";
+
+       case MLX5_CMD_OP_RST2INIT_QP:
+               return "RST2INIT_QP";
+
+       case MLX5_CMD_OP_INIT2RTR_QP:
+               return "INIT2RTR_QP";
+
+       case MLX5_CMD_OP_RTR2RTS_QP:
+               return "RTR2RTS_QP";
+
+       case MLX5_CMD_OP_RTS2RTS_QP:
+               return "RTS2RTS_QP";
+
+       case MLX5_CMD_OP_SQERR2RTS_QP:
+               return "SQERR2RTS_QP";
+
+       case MLX5_CMD_OP_2ERR_QP:
+               return "2ERR_QP";
+
+       case MLX5_CMD_OP_RTS2SQD_QP:
+               return "RTS2SQD_QP";
+
+       case MLX5_CMD_OP_SQD2RTS_QP:
+               return "SQD2RTS_QP";
+
+       case MLX5_CMD_OP_2RST_QP:
+               return "2RST_QP";
+
+       case MLX5_CMD_OP_QUERY_QP:
+               return "QUERY_QP";
+
+       case MLX5_CMD_OP_CONF_SQP:
+               return "CONF_SQP";
+
+       case MLX5_CMD_OP_MAD_IFC:
+               return "MAD_IFC";
+
+       case MLX5_CMD_OP_INIT2INIT_QP:
+               return "INIT2INIT_QP";
+
+       case MLX5_CMD_OP_SUSPEND_QP:
+               return "SUSPEND_QP";
+
+       case MLX5_CMD_OP_UNSUSPEND_QP:
+               return "UNSUSPEND_QP";
+
+       case MLX5_CMD_OP_SQD2SQD_QP:
+               return "SQD2SQD_QP";
+
+       case MLX5_CMD_OP_ALLOC_QP_COUNTER_SET:
+               return "ALLOC_QP_COUNTER_SET";
+
+       case MLX5_CMD_OP_DEALLOC_QP_COUNTER_SET:
+               return "DEALLOC_QP_COUNTER_SET";
+
+       case MLX5_CMD_OP_QUERY_QP_COUNTER_SET:
+               return "QUERY_QP_COUNTER_SET";
+
+       case MLX5_CMD_OP_CREATE_PSV:
+               return "CREATE_PSV";
+
+       case MLX5_CMD_OP_DESTROY_PSV:
+               return "DESTROY_PSV";
+
+       case MLX5_CMD_OP_QUERY_PSV:
+               return "QUERY_PSV";
+
+       case MLX5_CMD_OP_QUERY_SIG_RULE_TABLE:
+               return "QUERY_SIG_RULE_TABLE";
+
+       case MLX5_CMD_OP_QUERY_BLOCK_SIZE_TABLE:
+               return "QUERY_BLOCK_SIZE_TABLE";
+
+       case MLX5_CMD_OP_CREATE_SRQ:
+               return "CREATE_SRQ";
+
+       case MLX5_CMD_OP_DESTROY_SRQ:
+               return "DESTROY_SRQ";
+
+       case MLX5_CMD_OP_QUERY_SRQ:
+               return "QUERY_SRQ";
+
+       case MLX5_CMD_OP_ARM_RQ:
+               return "ARM_RQ";
+
+       case MLX5_CMD_OP_RESIZE_SRQ:
+               return "RESIZE_SRQ";
+
+       case MLX5_CMD_OP_ALLOC_PD:
+               return "ALLOC_PD";
+
+       case MLX5_CMD_OP_DEALLOC_PD:
+               return "DEALLOC_PD";
+
+       case MLX5_CMD_OP_ALLOC_UAR:
+               return "ALLOC_UAR";
+
+       case MLX5_CMD_OP_DEALLOC_UAR:
+               return "DEALLOC_UAR";
+
+       case MLX5_CMD_OP_ATTACH_TO_MCG:
+               return "ATTACH_TO_MCG";
+
+       case MLX5_CMD_OP_DETACH_FROM_MCG:
+               return "DETACH_FROM_MCG";
+
+       case MLX5_CMD_OP_ALLOC_XRCD:
+               return "ALLOC_XRCD";
+
+       case MLX5_CMD_OP_DEALLOC_XRCD:
+               return "DEALLOC_XRCD";
+
+       case MLX5_CMD_OP_ACCESS_REG:
+               return "MLX5_CMD_OP_ACCESS_REG";
+
+       default: return "unknown command opcode";
+       }
+}
+
+static void dump_command(struct mlx5_core_dev *dev,
+                        struct mlx5_cmd_work_ent *ent, int input)
+{
+       u16 op = be16_to_cpu(((struct mlx5_inbox_hdr *)(ent->lay->in))->opcode);
+       struct mlx5_cmd_msg *msg = input ? ent->in : ent->out;
+       struct mlx5_cmd_mailbox *next = msg->next;
+       int data_only;
+       int offset = 0;
+       int dump_len;
+
+       data_only = !!(mlx5_core_debug_mask & (1 << MLX5_CMD_DATA));
+
+       if (data_only)
+               mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_DATA,
+                                  "dump command data %s(0x%x) %s\n",
+                                  mlx5_command_str(op), op,
+                                  input ? "INPUT" : "OUTPUT");
+       else
+               mlx5_core_dbg(dev, "dump command %s(0x%x) %s\n",
+                             mlx5_command_str(op), op,
+                             input ? "INPUT" : "OUTPUT");
+
+       if (data_only) {
+               if (input) {
+                       dump_buf(ent->lay->in, sizeof(ent->lay->in), 1, offset);
+                       offset += sizeof(ent->lay->in);
+               } else {
+                       dump_buf(ent->lay->out, sizeof(ent->lay->out), 1, offset);
+                       offset += sizeof(ent->lay->out);
+               }
+       } else {
+               dump_buf(ent->lay, sizeof(*ent->lay), 0, offset);
+               offset += sizeof(*ent->lay);
+       }
+
+       while (next && offset < msg->len) {
+               if (data_only) {
+                       dump_len = min_t(int, MLX5_CMD_DATA_BLOCK_SIZE, msg->len - offset);
+                       dump_buf(next->buf, dump_len, 1, offset);
+                       offset += MLX5_CMD_DATA_BLOCK_SIZE;
+               } else {
+                       mlx5_core_dbg(dev, "command block:\n");
+                       dump_buf(next->buf, sizeof(struct mlx5_cmd_prot_block), 0, offset);
+                       offset += sizeof(struct mlx5_cmd_prot_block);
+               }
+               next = next->next;
+       }
+
+       if (data_only)
+               pr_debug("\n");
+}
+
+static void cmd_work_handler(struct work_struct *work)
+{
+       struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
+       struct mlx5_cmd *cmd = ent->cmd;
+       struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd);
+       struct mlx5_cmd_layout *lay;
+       struct semaphore *sem;
+
+       sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
+       down(sem);
+       if (!ent->page_queue) {
+               ent->idx = alloc_ent(cmd);
+               if (ent->idx < 0) {
+                       mlx5_core_err(dev, "failed to allocate command entry\n");
+                       up(sem);
+                       return;
+               }
+       } else {
+               ent->idx = cmd->max_reg_cmds;
+       }
+
+       ent->token = alloc_token(cmd);
+       cmd->ent_arr[ent->idx] = ent;
+       lay = get_inst(cmd, ent->idx);
+       ent->lay = lay;
+       memset(lay, 0, sizeof(*lay));
+       memcpy(lay->in, ent->in->first.data, sizeof(lay->in));
+       if (ent->in->next)
+               lay->in_ptr = cpu_to_be64(ent->in->next->dma);
+       lay->inlen = cpu_to_be32(ent->in->len);
+       if (ent->out->next)
+               lay->out_ptr = cpu_to_be64(ent->out->next->dma);
+       lay->outlen = cpu_to_be32(ent->out->len);
+       lay->type = MLX5_PCI_CMD_XPORT;
+       lay->token = ent->token;
+       lay->status_own = CMD_OWNER_HW;
+       if (!cmd->checksum_disabled)
+               set_signature(ent);
+       dump_command(dev, ent, 1);
+       ktime_get_ts(&ent->ts1);
+
+       /* ring doorbell after the descriptor is valid */
+       wmb();
+       iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell);
+       mlx5_core_dbg(dev, "write 0x%x to command doorbell\n", 1 << ent->idx);
+       mmiowb();
+       if (cmd->mode == CMD_MODE_POLLING) {
+               poll_timeout(ent);
+               /* make sure we read the descriptor after ownership is SW */
+               rmb();
+               mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
+       }
+}
+
+static const char *deliv_status_to_str(u8 status)
+{
+       switch (status) {
+       case MLX5_CMD_DELIVERY_STAT_OK:
+               return "no errors";
+       case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR:
+               return "signature error";
+       case MLX5_CMD_DELIVERY_STAT_TOK_ERR:
+               return "token error";
+       case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR:
+               return "bad block number";
+       case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR:
+               return "output pointer not aligned to block size";
+       case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR:
+               return "input pointer not aligned to block size";
+       case MLX5_CMD_DELIVERY_STAT_FW_ERR:
+               return "firmware internal error";
+       case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR:
+               return "command input length error";
+       case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR:
+               return "command ouput length error";
+       case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR:
+               return "reserved fields not cleared";
+       case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR:
+               return "bad command descriptor type";
+       default:
+               return "unknown status code";
+       }
+}
+
+static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
+{
+       struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data);
+
+       return be16_to_cpu(hdr->opcode);
+}
+
+static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
+{
+       unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
+       struct mlx5_cmd *cmd = &dev->cmd;
+       int err;
+
+       if (cmd->mode == CMD_MODE_POLLING) {
+               wait_for_completion(&ent->done);
+               err = ent->ret;
+       } else {
+               if (!wait_for_completion_timeout(&ent->done, timeout))
+                       err = -ETIMEDOUT;
+               else
+                       err = 0;
+       }
+       if (err == -ETIMEDOUT) {
+               mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
+                              mlx5_command_str(msg_to_opcode(ent->in)),
+                              msg_to_opcode(ent->in));
+       }
+       mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n", err,
+                     deliv_status_to_str(ent->status), ent->status);
+
+       return err;
+}
+
+/*  Notes:
+ *    1. Callback functions may not sleep
+ *    2. page queue commands do not support asynchrous completion
+ */
+static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
+                          struct mlx5_cmd_msg *out, mlx5_cmd_cbk_t callback,
+                          void *context, int page_queue, u8 *status)
+{
+       struct mlx5_cmd *cmd = &dev->cmd;
+       struct mlx5_cmd_work_ent *ent;
+       ktime_t t1, t2, delta;
+       struct mlx5_cmd_stats *stats;
+       int err = 0;
+       s64 ds;
+       u16 op;
+
+       if (callback && page_queue)
+               return -EINVAL;
+
+       ent = alloc_cmd(cmd, in, out, callback, context, page_queue);
+       if (IS_ERR(ent))
+               return PTR_ERR(ent);
+
+       if (!callback)
+               init_completion(&ent->done);
+
+       INIT_WORK(&ent->work, cmd_work_handler);
+       if (page_queue) {
+               cmd_work_handler(&ent->work);
+       } else if (!queue_work(cmd->wq, &ent->work)) {
+               mlx5_core_warn(dev, "failed to queue work\n");
+               err = -ENOMEM;
+               goto out_free;
+       }
+
+       if (!callback) {
+               err = wait_func(dev, ent);
+               if (err == -ETIMEDOUT)
+                       goto out;
+
+               t1 = timespec_to_ktime(ent->ts1);
+               t2 = timespec_to_ktime(ent->ts2);
+               delta = ktime_sub(t2, t1);
+               ds = ktime_to_ns(delta);
+               op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
+               if (op < ARRAY_SIZE(cmd->stats)) {
+                       stats = &cmd->stats[op];
+                       spin_lock(&stats->lock);
+                       stats->sum += ds;
+                       ++stats->n;
+                       spin_unlock(&stats->lock);
+               }
+               mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
+                                  "fw exec time for %s is %lld nsec\n",
+                                  mlx5_command_str(op), ds);
+               *status = ent->status;
+               free_cmd(ent);
+       }
+
+       return err;
+
+out_free:
+       free_cmd(ent);
+out:
+       return err;
+}
+
+static ssize_t dbg_write(struct file *filp, const char __user *buf,
+                        size_t count, loff_t *pos)
+{
+       struct mlx5_core_dev *dev = filp->private_data;
+       struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+       char lbuf[3];
+       int err;
+
+       if (!dbg->in_msg || !dbg->out_msg)
+               return -ENOMEM;
+
+       if (copy_from_user(lbuf, buf, sizeof(lbuf)))
+               return -EPERM;
+
+       lbuf[sizeof(lbuf) - 1] = 0;
+
+       if (strcmp(lbuf, "go"))
+               return -EINVAL;
+
+       err = mlx5_cmd_exec(dev, dbg->in_msg, dbg->inlen, dbg->out_msg, dbg->outlen);
+
+       return err ? err : count;
+}
+
+
+static const struct file_operations fops = {
+       .owner  = THIS_MODULE,
+       .open   = simple_open,
+       .write  = dbg_write,
+};
+
+static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size)
+{
+       struct mlx5_cmd_prot_block *block;
+       struct mlx5_cmd_mailbox *next;
+       int copy;
+
+       if (!to || !from)
+               return -ENOMEM;
+
+       copy = min_t(int, size, sizeof(to->first.data));
+       memcpy(to->first.data, from, copy);
+       size -= copy;
+       from += copy;
+
+       next = to->next;
+       while (size) {
+               if (!next) {
+                       /* this is a BUG */
+                       return -ENOMEM;
+               }
+
+               copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE);
+               block = next->buf;
+               memcpy(block->data, from, copy);
+               from += copy;
+               size -= copy;
+               next = next->next;
+       }
+
+       return 0;
+}
+
+static int mlx5_copy_from_msg(void *to, struct mlx5_cmd_msg *from, int size)
+{
+       struct mlx5_cmd_prot_block *block;
+       struct mlx5_cmd_mailbox *next;
+       int copy;
+
+       if (!to || !from)
+               return -ENOMEM;
+
+       copy = min_t(int, size, sizeof(from->first.data));
+       memcpy(to, from->first.data, copy);
+       size -= copy;
+       to += copy;
+
+       next = from->next;
+       while (size) {
+               if (!next) {
+                       /* this is a BUG */
+                       return -ENOMEM;
+               }
+
+               copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE);
+               block = next->buf;
+               if (xor8_buf(block, sizeof(*block)) != 0xff)
+                       return -EINVAL;
+
+               memcpy(to, block->data, copy);
+               to += copy;
+               size -= copy;
+               next = next->next;
+       }
+
+       return 0;
+}
+
+static struct mlx5_cmd_mailbox *alloc_cmd_box(struct mlx5_core_dev *dev,
+                                             gfp_t flags)
+{
+       struct mlx5_cmd_mailbox *mailbox;
+
+       mailbox = kmalloc(sizeof(*mailbox), flags);
+       if (!mailbox)
+               return ERR_PTR(-ENOMEM);
+
+       mailbox->buf = pci_pool_alloc(dev->cmd.pool, flags,
+                                     &mailbox->dma);
+       if (!mailbox->buf) {
+               mlx5_core_dbg(dev, "failed allocation\n");
+               kfree(mailbox);
+               return ERR_PTR(-ENOMEM);
+       }
+       memset(mailbox->buf, 0, sizeof(struct mlx5_cmd_prot_block));
+       mailbox->next = NULL;
+
+       return mailbox;
+}
+
+static void free_cmd_box(struct mlx5_core_dev *dev,
+                        struct mlx5_cmd_mailbox *mailbox)
+{
+       pci_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma);
+       kfree(mailbox);
+}
+
+static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
+                                              gfp_t flags, int size)
+{
+       struct mlx5_cmd_mailbox *tmp, *head = NULL;
+       struct mlx5_cmd_prot_block *block;
+       struct mlx5_cmd_msg *msg;
+       int blen;
+       int err;
+       int n;
+       int i;
+
+       msg = kzalloc(sizeof(*msg), GFP_KERNEL);
+       if (!msg)
+               return ERR_PTR(-ENOMEM);
+
+       blen = size - min_t(int, sizeof(msg->first.data), size);
+       n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1) / MLX5_CMD_DATA_BLOCK_SIZE;
+
+       for (i = 0; i < n; i++) {
+               tmp = alloc_cmd_box(dev, flags);
+               if (IS_ERR(tmp)) {
+                       mlx5_core_warn(dev, "failed allocating block\n");
+                       err = PTR_ERR(tmp);
+                       goto err_alloc;
+               }
+
+               block = tmp->buf;
+               tmp->next = head;
+               block->next = cpu_to_be64(tmp->next ? tmp->next->dma : 0);
+               block->block_num = cpu_to_be32(n - i - 1);
+               head = tmp;
+       }
+       msg->next = head;
+       msg->len = size;
+       return msg;
+
+err_alloc:
+       while (head) {
+               tmp = head->next;
+               free_cmd_box(dev, head);
+               head = tmp;
+       }
+       kfree(msg);
+
+       return ERR_PTR(err);
+}
+
+static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev,
+                                 struct mlx5_cmd_msg *msg)
+{
+       struct mlx5_cmd_mailbox *head = msg->next;
+       struct mlx5_cmd_mailbox *next;
+
+       while (head) {
+               next = head->next;
+               free_cmd_box(dev, head);
+               head = next;
+       }
+       kfree(msg);
+}
+
+static ssize_t data_write(struct file *filp, const char __user *buf,
+                         size_t count, loff_t *pos)
+{
+       struct mlx5_core_dev *dev = filp->private_data;
+       struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+       void *ptr;
+       int err;
+
+       if (*pos != 0)
+               return -EINVAL;
+
+       kfree(dbg->in_msg);
+       dbg->in_msg = NULL;
+       dbg->inlen = 0;
+
+       ptr = kzalloc(count, GFP_KERNEL);
+       if (!ptr)
+               return -ENOMEM;
+
+       if (copy_from_user(ptr, buf, count)) {
+               err = -EPERM;
+               goto out;
+       }
+       dbg->in_msg = ptr;
+       dbg->inlen = count;
+
+       *pos = count;
+
+       return count;
+
+out:
+       kfree(ptr);
+       return err;
+}
+
+static ssize_t data_read(struct file *filp, char __user *buf, size_t count,
+                        loff_t *pos)
+{
+       struct mlx5_core_dev *dev = filp->private_data;
+       struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+       int copy;
+
+       if (*pos)
+               return 0;
+
+       if (!dbg->out_msg)
+               return -ENOMEM;
+
+       copy = min_t(int, count, dbg->outlen);
+       if (copy_to_user(buf, dbg->out_msg, copy))
+               return -EPERM;
+
+       *pos += copy;
+
+       return copy;
+}
+
+static const struct file_operations dfops = {
+       .owner  = THIS_MODULE,
+       .open   = simple_open,
+       .write  = data_write,
+       .read   = data_read,
+};
+
+static ssize_t outlen_read(struct file *filp, char __user *buf, size_t count,
+                          loff_t *pos)
+{
+       struct mlx5_core_dev *dev = filp->private_data;
+       struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+       char outlen[8];
+       int err;
+
+       if (*pos)
+               return 0;
+
+       err = snprintf(outlen, sizeof(outlen), "%d", dbg->outlen);
+       if (err < 0)
+               return err;
+
+       if (copy_to_user(buf, &outlen, err))
+               return -EPERM;
+
+       *pos += err;
+
+       return err;
+}
+
+static ssize_t outlen_write(struct file *filp, const char __user *buf,
+                           size_t count, loff_t *pos)
+{
+       struct mlx5_core_dev *dev = filp->private_data;
+       struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+       char outlen_str[8];
+       int outlen;
+       void *ptr;
+       int err;
+
+       if (*pos != 0 || count > 6)
+               return -EINVAL;
+
+       kfree(dbg->out_msg);
+       dbg->out_msg = NULL;
+       dbg->outlen = 0;
+
+       if (copy_from_user(outlen_str, buf, count))
+               return -EPERM;
+
+       outlen_str[7] = 0;
+
+       err = sscanf(outlen_str, "%d", &outlen);
+       if (err < 0)
+               return err;
+
+       ptr = kzalloc(outlen, GFP_KERNEL);
+       if (!ptr)
+               return -ENOMEM;
+
+       dbg->out_msg = ptr;
+       dbg->outlen = outlen;
+
+       *pos = count;
+
+       return count;
+}
+
+static const struct file_operations olfops = {
+       .owner  = THIS_MODULE,
+       .open   = simple_open,
+       .write  = outlen_write,
+       .read   = outlen_read,
+};
+
+static void set_wqname(struct mlx5_core_dev *dev)
+{
+       struct mlx5_cmd *cmd = &dev->cmd;
+
+       snprintf(cmd->wq_name, sizeof(cmd->wq_name), "mlx5_cmd_%s",
+                dev_name(&dev->pdev->dev));
+}
+
+static void clean_debug_files(struct mlx5_core_dev *dev)
+{
+       struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+
+       if (!mlx5_debugfs_root)
+               return;
+
+       mlx5_cmdif_debugfs_cleanup(dev);
+       debugfs_remove_recursive(dbg->dbg_root);
+}
+
+static int create_debugfs_files(struct mlx5_core_dev *dev)
+{
+       struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+       int err = -ENOMEM;
+
+       if (!mlx5_debugfs_root)
+               return 0;
+
+       dbg->dbg_root = debugfs_create_dir("cmd", dev->priv.dbg_root);
+       if (!dbg->dbg_root)
+               return err;
+
+       dbg->dbg_in = debugfs_create_file("in", 0400, dbg->dbg_root,
+                                         dev, &dfops);
+       if (!dbg->dbg_in)
+               goto err_dbg;
+
+       dbg->dbg_out = debugfs_create_file("out", 0200, dbg->dbg_root,
+                                          dev, &dfops);
+       if (!dbg->dbg_out)
+               goto err_dbg;
+
+       dbg->dbg_outlen = debugfs_create_file("out_len", 0600, dbg->dbg_root,
+                                             dev, &olfops);
+       if (!dbg->dbg_outlen)
+               goto err_dbg;
+
+       dbg->dbg_status = debugfs_create_u8("status", 0600, dbg->dbg_root,
+                                           &dbg->status);
+       if (!dbg->dbg_status)
+               goto err_dbg;
+
+       dbg->dbg_run = debugfs_create_file("run", 0200, dbg->dbg_root, dev, &fops);
+       if (!dbg->dbg_run)
+               goto err_dbg;
+
+       mlx5_cmdif_debugfs_init(dev);
+
+       return 0;
+
+err_dbg:
+       clean_debug_files(dev);
+       return err;
+}
+
+void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
+{
+       struct mlx5_cmd *cmd = &dev->cmd;
+       int i;
+
+       for (i = 0; i < cmd->max_reg_cmds; i++)
+               down(&cmd->sem);
+
+       down(&cmd->pages_sem);
+
+       flush_workqueue(cmd->wq);
+
+       cmd->mode = CMD_MODE_EVENTS;
+
+       up(&cmd->pages_sem);
+       for (i = 0; i < cmd->max_reg_cmds; i++)
+               up(&cmd->sem);
+}
+
+void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
+{
+       struct mlx5_cmd *cmd = &dev->cmd;
+       int i;
+
+       for (i = 0; i < cmd->max_reg_cmds; i++)
+               down(&cmd->sem);
+
+       down(&cmd->pages_sem);
+
+       flush_workqueue(cmd->wq);
+       cmd->mode = CMD_MODE_POLLING;
+
+       up(&cmd->pages_sem);
+       for (i = 0; i < cmd->max_reg_cmds; i++)
+               up(&cmd->sem);
+}
+
+void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
+{
+       struct mlx5_cmd *cmd = &dev->cmd;
+       struct mlx5_cmd_work_ent *ent;
+       mlx5_cmd_cbk_t callback;
+       void *context;
+       int err;
+       int i;
+
+       for (i = 0; i < (1 << cmd->log_sz); i++) {
+               if (test_bit(i, &vector)) {
+                       ent = cmd->ent_arr[i];
+                       ktime_get_ts(&ent->ts2);
+                       memcpy(ent->out->first.data, ent->lay->out, sizeof(ent->lay->out));
+                       dump_command(dev, ent, 0);
+                       if (!ent->ret) {
+                               if (!cmd->checksum_disabled)
+                                       ent->ret = verify_signature(ent);
+                               else
+                                       ent->ret = 0;
+                               ent->status = ent->lay->status_own >> 1;
+                               mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n",
+                                             ent->ret, deliv_status_to_str(ent->status), ent->status);
+                       }
+                       free_ent(cmd, ent->idx);
+                       if (ent->callback) {
+                               callback = ent->callback;
+                               context = ent->context;
+                               err = ent->ret;
+                               free_cmd(ent);
+                               callback(err, context);
+                       } else {
+                               complete(&ent->done);
+                       }
+                       if (ent->page_queue)
+                               up(&cmd->pages_sem);
+                       else
+                               up(&cmd->sem);
+               }
+       }
+}
+EXPORT_SYMBOL(mlx5_cmd_comp_handler);
+
+static int status_to_err(u8 status)
+{
+       return status ? -1 : 0; /* TBD more meaningful codes */
+}
+
+static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size)
+{
+       struct mlx5_cmd_msg *msg = ERR_PTR(-ENOMEM);
+       struct mlx5_cmd *cmd = &dev->cmd;
+       struct cache_ent *ent = NULL;
+
+       if (in_size > MED_LIST_SIZE && in_size <= LONG_LIST_SIZE)
+               ent = &cmd->cache.large;
+       else if (in_size > 16 && in_size <= MED_LIST_SIZE)
+               ent = &cmd->cache.med;
+
+       if (ent) {
+               spin_lock(&ent->lock);
+               if (!list_empty(&ent->head)) {
+                       msg = list_entry(ent->head.next, typeof(*msg), list);
+                       /* For cached lists, we must explicitly state what is
+                        * the real size
+                        */
+                       msg->len = in_size;
+                       list_del(&msg->list);
+               }
+               spin_unlock(&ent->lock);
+       }
+
+       if (IS_ERR(msg))
+               msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, in_size);
+
+       return msg;
+}
+
+static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
+{
+       if (msg->cache) {
+               spin_lock(&msg->cache->lock);
+               list_add_tail(&msg->list, &msg->cache->head);
+               spin_unlock(&msg->cache->lock);
+       } else {
+               mlx5_free_cmd_msg(dev, msg);
+       }
+}
+
+static int is_manage_pages(struct mlx5_inbox_hdr *in)
+{
+       return be16_to_cpu(in->opcode) == MLX5_CMD_OP_MANAGE_PAGES;
+}
+
+int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+                 int out_size)
+{
+       struct mlx5_cmd_msg *inb;
+       struct mlx5_cmd_msg *outb;
+       int pages_queue;
+       int err;
+       u8 status = 0;
+
+       pages_queue = is_manage_pages(in);
+
+       inb = alloc_msg(dev, in_size);
+       if (IS_ERR(inb)) {
+               err = PTR_ERR(inb);
+               return err;
+       }
+
+       err = mlx5_copy_to_msg(inb, in, in_size);
+       if (err) {
+               mlx5_core_warn(dev, "err %d\n", err);
+               goto out_in;
+       }
+
+       outb = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, out_size);
+       if (IS_ERR(outb)) {
+               err = PTR_ERR(outb);
+               goto out_in;
+       }
+
+       err = mlx5_cmd_invoke(dev, inb, outb, NULL, NULL, pages_queue, &status);
+       if (err)
+               goto out_out;
+
+       mlx5_core_dbg(dev, "err %d, status %d\n", err, status);
+       if (status) {
+               err = status_to_err(status);
+               goto out_out;
+       }
+
+       err = mlx5_copy_from_msg(out, outb, out_size);
+
+out_out:
+       mlx5_free_cmd_msg(dev, outb);
+
+out_in:
+       free_msg(dev, inb);
+       return err;
+}
+EXPORT_SYMBOL(mlx5_cmd_exec);
+
+static void destroy_msg_cache(struct mlx5_core_dev *dev)
+{
+       struct mlx5_cmd *cmd = &dev->cmd;
+       struct mlx5_cmd_msg *msg;
+       struct mlx5_cmd_msg *n;
+
+       list_for_each_entry_safe(msg, n, &cmd->cache.large.head, list) {
+               list_del(&msg->list);
+               mlx5_free_cmd_msg(dev, msg);
+       }
+
+       list_for_each_entry_safe(msg, n, &cmd->cache.med.head, list) {
+               list_del(&msg->list);
+               mlx5_free_cmd_msg(dev, msg);
+       }
+}
+
+static int create_msg_cache(struct mlx5_core_dev *dev)
+{
+       struct mlx5_cmd *cmd = &dev->cmd;
+       struct mlx5_cmd_msg *msg;
+       int err;
+       int i;
+
+       spin_lock_init(&cmd->cache.large.lock);
+       INIT_LIST_HEAD(&cmd->cache.large.head);
+       spin_lock_init(&cmd->cache.med.lock);
+       INIT_LIST_HEAD(&cmd->cache.med.head);
+
+       for (i = 0; i < NUM_LONG_LISTS; i++) {
+               msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE);
+               if (IS_ERR(msg)) {
+                       err = PTR_ERR(msg);
+                       goto ex_err;
+               }
+               msg->cache = &cmd->cache.large;
+               list_add_tail(&msg->list, &cmd->cache.large.head);
+       }
+
+       for (i = 0; i < NUM_MED_LISTS; i++) {
+               msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE);
+               if (IS_ERR(msg)) {
+                       err = PTR_ERR(msg);
+                       goto ex_err;
+               }
+               msg->cache = &cmd->cache.med;
+               list_add_tail(&msg->list, &cmd->cache.med.head);
+       }
+
+       return 0;
+
+ex_err:
+       destroy_msg_cache(dev);
+       return err;
+}
+
+int mlx5_cmd_init(struct mlx5_core_dev *dev)
+{
+       int size = sizeof(struct mlx5_cmd_prot_block);
+       int align = roundup_pow_of_two(size);
+       struct mlx5_cmd *cmd = &dev->cmd;
+       u32 cmd_h, cmd_l;
+       u16 cmd_if_rev;
+       int err;
+       int i;
+
+       cmd_if_rev = cmdif_rev(dev);
+       if (cmd_if_rev != CMD_IF_REV) {
+               dev_err(&dev->pdev->dev,
+                       "Driver cmdif rev(%d) differs from firmware's(%d)\n",
+                       CMD_IF_REV, cmd_if_rev);
+               return -EINVAL;
+       }
+
+       cmd->pool = pci_pool_create("mlx5_cmd", dev->pdev, size, align, 0);
+       if (!cmd->pool)
+               return -ENOMEM;
+
+       cmd->cmd_buf = (void *)__get_free_pages(GFP_ATOMIC, 0);
+       if (!cmd->cmd_buf) {
+               err = -ENOMEM;
+               goto err_free_pool;
+       }
+       cmd->dma = dma_map_single(&dev->pdev->dev, cmd->cmd_buf, PAGE_SIZE,
+                                 DMA_BIDIRECTIONAL);
+       if (dma_mapping_error(&dev->pdev->dev, cmd->dma)) {
+               err = -ENOMEM;
+               goto err_free;
+       }
+
+       cmd_l = ioread32be(&dev->iseg->cmdq_addr_l_sz) & 0xff;
+       cmd->log_sz = cmd_l >> 4 & 0xf;
+       cmd->log_stride = cmd_l & 0xf;
+       if (1 << cmd->log_sz > MLX5_MAX_COMMANDS) {
+               dev_err(&dev->pdev->dev, "firmware reports too many outstanding commands %d\n",
+                       1 << cmd->log_sz);
+               err = -EINVAL;
+               goto err_map;
+       }
+
+       if (cmd->log_sz + cmd->log_stride > PAGE_SHIFT) {
+               dev_err(&dev->pdev->dev, "command queue size overflow\n");
+               err = -EINVAL;
+               goto err_map;
+       }
+
+       cmd->max_reg_cmds = (1 << cmd->log_sz) - 1;
+       cmd->bitmask = (1 << cmd->max_reg_cmds) - 1;
+
+       cmd->cmdif_rev = ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16;
+       if (cmd->cmdif_rev > CMD_IF_REV) {
+               dev_err(&dev->pdev->dev, "driver does not support command interface version. driver %d, firmware %d\n",
+                       CMD_IF_REV, cmd->cmdif_rev);
+               err = -ENOTSUPP;
+               goto err_map;
+       }
+
+       spin_lock_init(&cmd->alloc_lock);
+       spin_lock_init(&cmd->token_lock);
+       for (i = 0; i < ARRAY_SIZE(cmd->stats); i++)
+               spin_lock_init(&cmd->stats[i].lock);
+
+       sema_init(&cmd->sem, cmd->max_reg_cmds);
+       sema_init(&cmd->pages_sem, 1);
+
+       cmd_h = (u32)((u64)(cmd->dma) >> 32);
+       cmd_l = (u32)(cmd->dma);
+       if (cmd_l & 0xfff) {
+               dev_err(&dev->pdev->dev, "invalid command queue address\n");
+               err = -ENOMEM;
+               goto err_map;
+       }
+
+       iowrite32be(cmd_h, &dev->iseg->cmdq_addr_h);
+       iowrite32be(cmd_l, &dev->iseg->cmdq_addr_l_sz);
+
+       /* Make sure firmware sees the complete address before we proceed */
+       wmb();
+
+       mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma));
+
+       cmd->mode = CMD_MODE_POLLING;
+
+       err = create_msg_cache(dev);
+       if (err) {
+               dev_err(&dev->pdev->dev, "failed to create command cache\n");
+               goto err_map;
+       }
+
+       set_wqname(dev);
+       cmd->wq = create_singlethread_workqueue(cmd->wq_name);
+       if (!cmd->wq) {
+               dev_err(&dev->pdev->dev, "failed to create command workqueue\n");
+               err = -ENOMEM;
+               goto err_cache;
+       }
+
+       err = create_debugfs_files(dev);
+       if (err) {
+               err = -ENOMEM;
+               goto err_wq;
+       }
+
+       return 0;
+
+err_wq:
+       destroy_workqueue(cmd->wq);
+
+err_cache:
+       destroy_msg_cache(dev);
+
+err_map:
+       dma_unmap_single(&dev->pdev->dev, cmd->dma, PAGE_SIZE,
+                        DMA_BIDIRECTIONAL);
+err_free:
+       free_pages((unsigned long)cmd->cmd_buf, 0);
+
+err_free_pool:
+       pci_pool_destroy(cmd->pool);
+
+       return err;
+}
+EXPORT_SYMBOL(mlx5_cmd_init);
+
+void mlx5_cmd_cleanup(struct mlx5_core_dev *dev)
+{
+       struct mlx5_cmd *cmd = &dev->cmd;
+
+       clean_debug_files(dev);
+       destroy_workqueue(cmd->wq);
+       destroy_msg_cache(dev);
+       dma_unmap_single(&dev->pdev->dev, cmd->dma, PAGE_SIZE,
+                        DMA_BIDIRECTIONAL);
+       free_pages((unsigned long)cmd->cmd_buf, 0);
+       pci_pool_destroy(cmd->pool);
+}
+EXPORT_SYMBOL(mlx5_cmd_cleanup);
+
+static const char *cmd_status_str(u8 status)
+{
+       switch (status) {
+       case MLX5_CMD_STAT_OK:
+               return "OK";
+       case MLX5_CMD_STAT_INT_ERR:
+               return "internal error";
+       case MLX5_CMD_STAT_BAD_OP_ERR:
+               return "bad operation";
+       case MLX5_CMD_STAT_BAD_PARAM_ERR:
+               return "bad parameter";
+       case MLX5_CMD_STAT_BAD_SYS_STATE_ERR:
+               return "bad system state";
+       case MLX5_CMD_STAT_BAD_RES_ERR:
+               return "bad resource";
+       case MLX5_CMD_STAT_RES_BUSY:
+               return "resource busy";
+       case MLX5_CMD_STAT_LIM_ERR:
+               return "limits exceeded";
+       case MLX5_CMD_STAT_BAD_RES_STATE_ERR:
+               return "bad resource state";
+       case MLX5_CMD_STAT_IX_ERR:
+               return "bad index";
+       case MLX5_CMD_STAT_NO_RES_ERR:
+               return "no resources";
+       case MLX5_CMD_STAT_BAD_INP_LEN_ERR:
+               return "bad input length";
+       case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR:
+               return "bad output length";
+       case MLX5_CMD_STAT_BAD_QP_STATE_ERR:
+               return "bad QP state";
+       case MLX5_CMD_STAT_BAD_PKT_ERR:
+               return "bad packet (discarded)";
+       case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR:
+               return "bad size too many outstanding CQEs";
+       default:
+               return "unknown status";
+       }
+}
+
+int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr)
+{
+       if (!hdr->status)
+               return 0;
+
+       pr_warn("command failed, status %s(0x%x), syndrome 0x%x\n",
+               cmd_status_str(hdr->status), hdr->status,
+               be32_to_cpu(hdr->syndrome));
+
+       switch (hdr->status) {
+       case MLX5_CMD_STAT_OK:                          return 0;
+       case MLX5_CMD_STAT_INT_ERR:                     return -EIO;
+       case MLX5_CMD_STAT_BAD_OP_ERR:                  return -EINVAL;
+       case MLX5_CMD_STAT_BAD_PARAM_ERR:               return -EINVAL;
+       case MLX5_CMD_STAT_BAD_SYS_STATE_ERR:           return -EIO;
+       case MLX5_CMD_STAT_BAD_RES_ERR:                 return -EINVAL;
+       case MLX5_CMD_STAT_RES_BUSY:                    return -EBUSY;
+       case MLX5_CMD_STAT_LIM_ERR:                     return -EINVAL;
+       case MLX5_CMD_STAT_BAD_RES_STATE_ERR:           return -EINVAL;
+       case MLX5_CMD_STAT_IX_ERR:                      return -EINVAL;
+       case MLX5_CMD_STAT_NO_RES_ERR:                  return -EAGAIN;
+       case MLX5_CMD_STAT_BAD_INP_LEN_ERR:             return -EIO;
+       case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR:            return -EIO;
+       case MLX5_CMD_STAT_BAD_QP_STATE_ERR:            return -EINVAL;
+       case MLX5_CMD_STAT_BAD_PKT_ERR:                 return -EINVAL;
+       case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR:      return -EINVAL;
+       default:                                        return -EIO;
+       }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
new file mode 100644 (file)
index 0000000..c2d660b
--- /dev/null
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/hardirq.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include <rdma/ib_verbs.h>
+#include <linux/mlx5/cq.h>
+#include "mlx5_core.h"
+
+void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn)
+{
+       struct mlx5_core_cq *cq;
+       struct mlx5_cq_table *table = &dev->priv.cq_table;
+
+       spin_lock(&table->lock);
+       cq = radix_tree_lookup(&table->tree, cqn);
+       if (likely(cq))
+               atomic_inc(&cq->refcount);
+       spin_unlock(&table->lock);
+
+       if (!cq) {
+               mlx5_core_warn(dev, "Completion event for bogus CQ 0x%x\n", cqn);
+               return;
+       }
+
+       ++cq->arm_sn;
+
+       cq->comp(cq);
+
+       if (atomic_dec_and_test(&cq->refcount))
+               complete(&cq->free);
+}
+
+void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type)
+{
+       struct mlx5_cq_table *table = &dev->priv.cq_table;
+       struct mlx5_core_cq *cq;
+
+       spin_lock(&table->lock);
+
+       cq = radix_tree_lookup(&table->tree, cqn);
+       if (cq)
+               atomic_inc(&cq->refcount);
+
+       spin_unlock(&table->lock);
+
+       if (!cq) {
+               mlx5_core_warn(dev, "Async event for bogus CQ 0x%x\n", cqn);
+               return;
+       }
+
+       cq->event(cq, event_type);
+
+       if (atomic_dec_and_test(&cq->refcount))
+               complete(&cq->free);
+}
+
+
+int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+                       struct mlx5_create_cq_mbox_in *in, int inlen)
+{
+       int err;
+       struct mlx5_cq_table *table = &dev->priv.cq_table;
+       struct mlx5_create_cq_mbox_out out;
+       struct mlx5_destroy_cq_mbox_in din;
+       struct mlx5_destroy_cq_mbox_out dout;
+
+       in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_CQ);
+       memset(&out, 0, sizeof(out));
+       err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+       if (err)
+               return err;
+
+       if (out.hdr.status)
+               return mlx5_cmd_status_to_err(&out.hdr);
+
+       cq->cqn = be32_to_cpu(out.cqn) & 0xffffff;
+       cq->cons_index = 0;
+       cq->arm_sn     = 0;
+       atomic_set(&cq->refcount, 1);
+       init_completion(&cq->free);
+
+       spin_lock_irq(&table->lock);
+       err = radix_tree_insert(&table->tree, cq->cqn, cq);
+       spin_unlock_irq(&table->lock);
+       if (err)
+               goto err_cmd;
+
+       cq->pid = current->pid;
+       err = mlx5_debug_cq_add(dev, cq);
+       if (err)
+               mlx5_core_dbg(dev, "failed adding CP 0x%x to debug file system\n",
+                             cq->cqn);
+
+       return 0;
+
+err_cmd:
+       memset(&din, 0, sizeof(din));
+       memset(&dout, 0, sizeof(dout));
+       din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_CQ);
+       mlx5_cmd_exec(dev, &din, sizeof(din), &dout, sizeof(dout));
+       return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_cq);
+
+int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
+{
+       struct mlx5_cq_table *table = &dev->priv.cq_table;
+       struct mlx5_destroy_cq_mbox_in in;
+       struct mlx5_destroy_cq_mbox_out out;
+       struct mlx5_core_cq *tmp;
+       int err;
+
+       spin_lock_irq(&table->lock);
+       tmp = radix_tree_delete(&table->tree, cq->cqn);
+       spin_unlock_irq(&table->lock);
+       if (!tmp) {
+               mlx5_core_warn(dev, "cq 0x%x not found in tree\n", cq->cqn);
+               return -EINVAL;
+       }
+       if (tmp != cq) {
+               mlx5_core_warn(dev, "corruption on srqn 0x%x\n", cq->cqn);
+               return -EINVAL;
+       }
+
+       memset(&in, 0, sizeof(in));
+       memset(&out, 0, sizeof(out));
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_CQ);
+       in.cqn = cpu_to_be32(cq->cqn);
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+       if (err)
+               return err;
+
+       if (out.hdr.status)
+               return mlx5_cmd_status_to_err(&out.hdr);
+
+       synchronize_irq(cq->irqn);
+
+       mlx5_debug_cq_remove(dev, cq);
+       if (atomic_dec_and_test(&cq->refcount))
+               complete(&cq->free);
+       wait_for_completion(&cq->free);
+
+       return 0;
+}
+EXPORT_SYMBOL(mlx5_core_destroy_cq);
+
+int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+                      struct mlx5_query_cq_mbox_out *out)
+{
+       struct mlx5_query_cq_mbox_in in;
+       int err;
+
+       memset(&in, 0, sizeof(in));
+       memset(out, 0, sizeof(*out));
+
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_CQ);
+       in.cqn = cpu_to_be32(cq->cqn);
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out));
+       if (err)
+               return err;
+
+       if (out->hdr.status)
+               return mlx5_cmd_status_to_err(&out->hdr);
+
+       return err;
+}
+EXPORT_SYMBOL(mlx5_core_query_cq);
+
+
+int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+                       int type, struct mlx5_cq_modify_params *params)
+{
+       return -ENOSYS;
+}
+
+int mlx5_init_cq_table(struct mlx5_core_dev *dev)
+{
+       struct mlx5_cq_table *table = &dev->priv.cq_table;
+       int err;
+
+       spin_lock_init(&table->lock);
+       INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+       err = mlx5_cq_debugfs_init(dev);
+
+       return err;
+}
+
+void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev)
+{
+       mlx5_cq_debugfs_cleanup(dev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
new file mode 100644 (file)
index 0000000..5e9cf2b
--- /dev/null
@@ -0,0 +1,587 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+enum {
+       QP_PID,
+       QP_STATE,
+       QP_XPORT,
+       QP_MTU,
+       QP_N_RECV,
+       QP_RECV_SZ,
+       QP_N_SEND,
+       QP_LOG_PG_SZ,
+       QP_RQPN,
+};
+
+static char *qp_fields[] = {
+       [QP_PID]        = "pid",
+       [QP_STATE]      = "state",
+       [QP_XPORT]      = "transport",
+       [QP_MTU]        = "mtu",
+       [QP_N_RECV]     = "num_recv",
+       [QP_RECV_SZ]    = "rcv_wqe_sz",
+       [QP_N_SEND]     = "num_send",
+       [QP_LOG_PG_SZ]  = "log2_page_sz",
+       [QP_RQPN]       = "remote_qpn",
+};
+
+enum {
+       EQ_NUM_EQES,
+       EQ_INTR,
+       EQ_LOG_PG_SZ,
+};
+
+static char *eq_fields[] = {
+       [EQ_NUM_EQES]   = "num_eqes",
+       [EQ_INTR]       = "intr",
+       [EQ_LOG_PG_SZ]  = "log_page_size",
+};
+
+enum {
+       CQ_PID,
+       CQ_NUM_CQES,
+       CQ_LOG_PG_SZ,
+};
+
+static char *cq_fields[] = {
+       [CQ_PID]        = "pid",
+       [CQ_NUM_CQES]   = "num_cqes",
+       [CQ_LOG_PG_SZ]  = "log_page_size",
+};
+
+struct dentry *mlx5_debugfs_root;
+EXPORT_SYMBOL(mlx5_debugfs_root);
+
+void mlx5_register_debugfs(void)
+{
+       mlx5_debugfs_root = debugfs_create_dir("mlx5", NULL);
+       if (IS_ERR_OR_NULL(mlx5_debugfs_root))
+               mlx5_debugfs_root = NULL;
+}
+
+void mlx5_unregister_debugfs(void)
+{
+       debugfs_remove(mlx5_debugfs_root);
+}
+
+int mlx5_qp_debugfs_init(struct mlx5_core_dev *dev)
+{
+       if (!mlx5_debugfs_root)
+               return 0;
+
+       atomic_set(&dev->num_qps, 0);
+
+       dev->priv.qp_debugfs = debugfs_create_dir("QPs",  dev->priv.dbg_root);
+       if (!dev->priv.qp_debugfs)
+               return -ENOMEM;
+
+       return 0;
+}
+
+void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev)
+{
+       if (!mlx5_debugfs_root)
+               return;
+
+       debugfs_remove_recursive(dev->priv.qp_debugfs);
+}
+
+int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev)
+{
+       if (!mlx5_debugfs_root)
+               return 0;
+
+       dev->priv.eq_debugfs = debugfs_create_dir("EQs",  dev->priv.dbg_root);
+       if (!dev->priv.eq_debugfs)
+               return -ENOMEM;
+
+       return 0;
+}
+
+void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev)
+{
+       if (!mlx5_debugfs_root)
+               return;
+
+       debugfs_remove_recursive(dev->priv.eq_debugfs);
+}
+
+static ssize_t average_read(struct file *filp, char __user *buf, size_t count,
+                           loff_t *pos)
+{
+       struct mlx5_cmd_stats *stats;
+       u64 field = 0;
+       int ret;
+       int err;
+       char tbuf[22];
+
+       if (*pos)
+               return 0;
+
+       stats = filp->private_data;
+       spin_lock(&stats->lock);
+       if (stats->n)
+               field = stats->sum / stats->n;
+       spin_unlock(&stats->lock);
+       ret = snprintf(tbuf, sizeof(tbuf), "%llu\n", field);
+       if (ret > 0) {
+               err = copy_to_user(buf, tbuf, ret);
+               if (err)
+                       return err;
+       }
+
+       *pos += ret;
+       return ret;
+}
+
+
+static ssize_t average_write(struct file *filp, const char __user *buf,
+                            size_t count, loff_t *pos)
+{
+       struct mlx5_cmd_stats *stats;
+
+       stats = filp->private_data;
+       spin_lock(&stats->lock);
+       stats->sum = 0;
+       stats->n = 0;
+       spin_unlock(&stats->lock);
+
+       *pos += count;
+
+       return count;
+}
+
+static const struct file_operations stats_fops = {
+       .owner  = THIS_MODULE,
+       .open   = simple_open,
+       .read   = average_read,
+       .write  = average_write,
+};
+
+int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev)
+{
+       struct mlx5_cmd_stats *stats;
+       struct dentry **cmd;
+       const char *namep;
+       int err;
+       int i;
+
+       if (!mlx5_debugfs_root)
+               return 0;
+
+       cmd = &dev->priv.cmdif_debugfs;
+       *cmd = debugfs_create_dir("commands", dev->priv.dbg_root);
+       if (!*cmd)
+               return -ENOMEM;
+
+       for (i = 0; i < ARRAY_SIZE(dev->cmd.stats); i++) {
+               stats = &dev->cmd.stats[i];
+               namep = mlx5_command_str(i);
+               if (strcmp(namep, "unknown command opcode")) {
+                       stats->root = debugfs_create_dir(namep, *cmd);
+                       if (!stats->root) {
+                               mlx5_core_warn(dev, "failed adding command %d\n",
+                                              i);
+                               err = -ENOMEM;
+                               goto out;
+                       }
+
+                       stats->avg = debugfs_create_file("average", 0400,
+                                                        stats->root, stats,
+                                                        &stats_fops);
+                       if (!stats->avg) {
+                               mlx5_core_warn(dev, "failed creating debugfs file\n");
+                               err = -ENOMEM;
+                               goto out;
+                       }
+
+                       stats->count = debugfs_create_u64("n", 0400,
+                                                         stats->root,
+                                                         &stats->n);
+                       if (!stats->count) {
+                               mlx5_core_warn(dev, "failed creating debugfs file\n");
+                               err = -ENOMEM;
+                               goto out;
+                       }
+               }
+       }
+
+       return 0;
+out:
+       debugfs_remove_recursive(dev->priv.cmdif_debugfs);
+       return err;
+}
+
+void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev)
+{
+       if (!mlx5_debugfs_root)
+               return;
+
+       debugfs_remove_recursive(dev->priv.cmdif_debugfs);
+}
+
+int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev)
+{
+       if (!mlx5_debugfs_root)
+               return 0;
+
+       dev->priv.cq_debugfs = debugfs_create_dir("CQs",  dev->priv.dbg_root);
+       if (!dev->priv.cq_debugfs)
+               return -ENOMEM;
+
+       return 0;
+}
+
+void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev)
+{
+       if (!mlx5_debugfs_root)
+               return;
+
+       debugfs_remove_recursive(dev->priv.cq_debugfs);
+}
+
+static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
+                        int index)
+{
+       struct mlx5_query_qp_mbox_out *out;
+       struct mlx5_qp_context *ctx;
+       u64 param = 0;
+       int err;
+       int no_sq;
+
+       out = kzalloc(sizeof(*out), GFP_KERNEL);
+       if (!out)
+               return param;
+
+       err = mlx5_core_qp_query(dev, qp, out, sizeof(*out));
+       if (err) {
+               mlx5_core_warn(dev, "failed to query qp\n");
+               goto out;
+       }
+
+       ctx = &out->ctx;
+       switch (index) {
+       case QP_PID:
+               param = qp->pid;
+               break;
+       case QP_STATE:
+               param = be32_to_cpu(ctx->flags) >> 28;
+               break;
+       case QP_XPORT:
+               param = (be32_to_cpu(ctx->flags) >> 16) & 0xff;
+               break;
+       case QP_MTU:
+               param = ctx->mtu_msgmax >> 5;
+               break;
+       case QP_N_RECV:
+               param = 1 << ((ctx->rq_size_stride >> 3) & 0xf);
+               break;
+       case QP_RECV_SZ:
+               param = 1 << ((ctx->rq_size_stride & 7) + 4);
+               break;
+       case QP_N_SEND:
+               no_sq = be16_to_cpu(ctx->sq_crq_size) >> 15;
+               if (!no_sq)
+                       param = 1 << (be16_to_cpu(ctx->sq_crq_size) >> 11);
+               else
+                       param = 0;
+               break;
+       case QP_LOG_PG_SZ:
+               param = ((cpu_to_be32(ctx->log_pg_sz_remote_qpn) >> 24) & 0x1f);
+               param += 12;
+               break;
+       case QP_RQPN:
+               param = cpu_to_be32(ctx->log_pg_sz_remote_qpn) & 0xffffff;
+               break;
+       }
+
+out:
+       kfree(out);
+       return param;
+}
+
+static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+                        int index)
+{
+       struct mlx5_query_eq_mbox_out *out;
+       struct mlx5_eq_context *ctx;
+       u64 param = 0;
+       int err;
+
+       out = kzalloc(sizeof(*out), GFP_KERNEL);
+       if (!out)
+               return param;
+
+       ctx = &out->ctx;
+
+       err = mlx5_core_eq_query(dev, eq, out, sizeof(*out));
+       if (err) {
+               mlx5_core_warn(dev, "failed to query eq\n");
+               goto out;
+       }
+
+       switch (index) {
+       case EQ_NUM_EQES:
+               param = 1 << ((be32_to_cpu(ctx->log_sz_usr_page) >> 24) & 0x1f);
+               break;
+       case EQ_INTR:
+               param = ctx->intr;
+               break;
+       case EQ_LOG_PG_SZ:
+               param = (ctx->log_page_size & 0x1f) + 12;
+               break;
+       }
+
+out:
+       kfree(out);
+       return param;
+}
+
+static u64 cq_read_field(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+                        int index)
+{
+       struct mlx5_query_cq_mbox_out *out;
+       struct mlx5_cq_context *ctx;
+       u64 param = 0;
+       int err;
+
+       out = kzalloc(sizeof(*out), GFP_KERNEL);
+       if (!out)
+               return param;
+
+       ctx = &out->ctx;
+
+       err = mlx5_core_query_cq(dev, cq, out);
+       if (err) {
+               mlx5_core_warn(dev, "failed to query cq\n");
+               goto out;
+       }
+
+       switch (index) {
+       case CQ_PID:
+               param = cq->pid;
+               break;
+       case CQ_NUM_CQES:
+               param = 1 << ((be32_to_cpu(ctx->log_sz_usr_page) >> 24) & 0x1f);
+               break;
+       case CQ_LOG_PG_SZ:
+               param = (ctx->log_pg_sz & 0x1f) + 12;
+               break;
+       }
+
+out:
+       kfree(out);
+       return param;
+}
+
+static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count,
+                       loff_t *pos)
+{
+       struct mlx5_field_desc *desc;
+       struct mlx5_rsc_debug *d;
+       char tbuf[18];
+       u64 field;
+       int ret;
+       int err;
+
+       if (*pos)
+               return 0;
+
+       desc = filp->private_data;
+       d = (void *)(desc - desc->i) - sizeof(*d);
+       switch (d->type) {
+       case MLX5_DBG_RSC_QP:
+               field = qp_read_field(d->dev, d->object, desc->i);
+               break;
+
+       case MLX5_DBG_RSC_EQ:
+               field = eq_read_field(d->dev, d->object, desc->i);
+               break;
+
+       case MLX5_DBG_RSC_CQ:
+               field = cq_read_field(d->dev, d->object, desc->i);
+               break;
+
+       default:
+               mlx5_core_warn(d->dev, "invalid resource type %d\n", d->type);
+               return -EINVAL;
+       }
+
+       ret = snprintf(tbuf, sizeof(tbuf), "0x%llx\n", field);
+       if (ret > 0) {
+               err = copy_to_user(buf, tbuf, ret);
+               if (err)
+                       return err;
+       }
+
+       *pos += ret;
+       return ret;
+}
+
+static const struct file_operations fops = {
+       .owner  = THIS_MODULE,
+       .open   = simple_open,
+       .read   = dbg_read,
+};
+
+static int add_res_tree(struct mlx5_core_dev *dev, enum dbg_rsc_type type,
+                       struct dentry *root, struct mlx5_rsc_debug **dbg,
+                       int rsn, char **field, int nfile, void *data)
+{
+       struct mlx5_rsc_debug *d;
+       char resn[32];
+       int err;
+       int i;
+
+       d = kzalloc(sizeof(*d) + nfile * sizeof(d->fields[0]), GFP_KERNEL);
+       if (!d)
+               return -ENOMEM;
+
+       d->dev = dev;
+       d->object = data;
+       d->type = type;
+       sprintf(resn, "0x%x", rsn);
+       d->root = debugfs_create_dir(resn,  root);
+       if (!d->root) {
+               err = -ENOMEM;
+               goto out_free;
+       }
+
+       for (i = 0; i < nfile; i++) {
+               d->fields[i].i = i;
+               d->fields[i].dent = debugfs_create_file(field[i], 0400,
+                                                       d->root, &d->fields[i],
+                                                       &fops);
+               if (!d->fields[i].dent) {
+                       err = -ENOMEM;
+                       goto out_rem;
+               }
+       }
+       *dbg = d;
+
+       return 0;
+out_rem:
+       debugfs_remove_recursive(d->root);
+
+out_free:
+       kfree(d);
+       return err;
+}
+
+static void rem_res_tree(struct mlx5_rsc_debug *d)
+{
+       debugfs_remove_recursive(d->root);
+       kfree(d);
+}
+
+int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp)
+{
+       int err;
+
+       if (!mlx5_debugfs_root)
+               return 0;
+
+       err = add_res_tree(dev, MLX5_DBG_RSC_QP, dev->priv.qp_debugfs,
+                          &qp->dbg, qp->qpn, qp_fields,
+                          ARRAY_SIZE(qp_fields), qp);
+       if (err)
+               qp->dbg = NULL;
+
+       return err;
+}
+
+void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp)
+{
+       if (!mlx5_debugfs_root)
+               return;
+
+       if (qp->dbg)
+               rem_res_tree(qp->dbg);
+}
+
+
+int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+       int err;
+
+       if (!mlx5_debugfs_root)
+               return 0;
+
+       err = add_res_tree(dev, MLX5_DBG_RSC_EQ, dev->priv.eq_debugfs,
+                          &eq->dbg, eq->eqn, eq_fields,
+                          ARRAY_SIZE(eq_fields), eq);
+       if (err)
+               eq->dbg = NULL;
+
+       return err;
+}
+
+void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+       if (!mlx5_debugfs_root)
+               return;
+
+       if (eq->dbg)
+               rem_res_tree(eq->dbg);
+}
+
+int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
+{
+       int err;
+
+       if (!mlx5_debugfs_root)
+               return 0;
+
+       err = add_res_tree(dev, MLX5_DBG_RSC_CQ, dev->priv.cq_debugfs,
+                          &cq->dbg, cq->cqn, cq_fields,
+                          ARRAY_SIZE(cq_fields), cq);
+       if (err)
+               cq->dbg = NULL;
+
+       return err;
+}
+
+void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
+{
+       if (!mlx5_debugfs_root)
+               return;
+
+       if (cq->dbg)
+               rem_res_tree(cq->dbg);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
new file mode 100644 (file)
index 0000000..c02cbcf
--- /dev/null
@@ -0,0 +1,521 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+enum {
+       MLX5_EQE_SIZE           = sizeof(struct mlx5_eqe),
+       MLX5_EQE_OWNER_INIT_VAL = 0x1,
+};
+
+enum {
+       MLX5_EQ_STATE_ARMED             = 0x9,
+       MLX5_EQ_STATE_FIRED             = 0xa,
+       MLX5_EQ_STATE_ALWAYS_ARMED      = 0xb,
+};
+
+enum {
+       MLX5_NUM_SPARE_EQE      = 0x80,
+       MLX5_NUM_ASYNC_EQE      = 0x100,
+       MLX5_NUM_CMD_EQE        = 32,
+};
+
+enum {
+       MLX5_EQ_DOORBEL_OFFSET  = 0x40,
+};
+
+#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG)          | \
+                              (1ull << MLX5_EVENT_TYPE_COMM_EST)           | \
+                              (1ull << MLX5_EVENT_TYPE_SQ_DRAINED)         | \
+                              (1ull << MLX5_EVENT_TYPE_CQ_ERROR)           | \
+                              (1ull << MLX5_EVENT_TYPE_WQ_CATAS_ERROR)     | \
+                              (1ull << MLX5_EVENT_TYPE_PATH_MIG_FAILED)    | \
+                              (1ull << MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
+                              (1ull << MLX5_EVENT_TYPE_WQ_ACCESS_ERROR)    | \
+                              (1ull << MLX5_EVENT_TYPE_PORT_CHANGE)        | \
+                              (1ull << MLX5_EVENT_TYPE_SRQ_CATAS_ERROR)    | \
+                              (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE)       | \
+                              (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT))
+
+struct map_eq_in {
+       u64     mask;
+       u32     reserved;
+       u32     unmap_eqn;
+};
+
+struct cre_des_eq {
+       u8      reserved[15];
+       u8      eqn;
+};
+
+static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
+{
+       struct mlx5_destroy_eq_mbox_in in;
+       struct mlx5_destroy_eq_mbox_out out;
+       int err;
+
+       memset(&in, 0, sizeof(in));
+       memset(&out, 0, sizeof(out));
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_EQ);
+       in.eqn = eqn;
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+       if (!err)
+               goto ex;
+
+       if (out.hdr.status)
+               err = mlx5_cmd_status_to_err(&out.hdr);
+
+ex:
+       return err;
+}
+
+static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
+{
+       return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
+}
+
+static struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
+{
+       struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
+
+       return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
+}
+
+static const char *eqe_type_str(u8 type)
+{
+       switch (type) {
+       case MLX5_EVENT_TYPE_COMP:
+               return "MLX5_EVENT_TYPE_COMP";
+       case MLX5_EVENT_TYPE_PATH_MIG:
+               return "MLX5_EVENT_TYPE_PATH_MIG";
+       case MLX5_EVENT_TYPE_COMM_EST:
+               return "MLX5_EVENT_TYPE_COMM_EST";
+       case MLX5_EVENT_TYPE_SQ_DRAINED:
+               return "MLX5_EVENT_TYPE_SQ_DRAINED";
+       case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+               return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
+       case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+               return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
+       case MLX5_EVENT_TYPE_CQ_ERROR:
+               return "MLX5_EVENT_TYPE_CQ_ERROR";
+       case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+               return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
+       case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+               return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
+       case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+               return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
+       case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+               return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
+       case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+               return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
+       case MLX5_EVENT_TYPE_INTERNAL_ERROR:
+               return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
+       case MLX5_EVENT_TYPE_PORT_CHANGE:
+               return "MLX5_EVENT_TYPE_PORT_CHANGE";
+       case MLX5_EVENT_TYPE_GPIO_EVENT:
+               return "MLX5_EVENT_TYPE_GPIO_EVENT";
+       case MLX5_EVENT_TYPE_REMOTE_CONFIG:
+               return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
+       case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
+               return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
+       case MLX5_EVENT_TYPE_STALL_EVENT:
+               return "MLX5_EVENT_TYPE_STALL_EVENT";
+       case MLX5_EVENT_TYPE_CMD:
+               return "MLX5_EVENT_TYPE_CMD";
+       case MLX5_EVENT_TYPE_PAGE_REQUEST:
+               return "MLX5_EVENT_TYPE_PAGE_REQUEST";
+       default:
+               return "Unrecognized event";
+       }
+}
+
+static enum mlx5_dev_event port_subtype_event(u8 subtype)
+{
+       switch (subtype) {
+       case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+               return MLX5_DEV_EVENT_PORT_DOWN;
+       case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+               return MLX5_DEV_EVENT_PORT_UP;
+       case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
+               return MLX5_DEV_EVENT_PORT_INITIALIZED;
+       case MLX5_PORT_CHANGE_SUBTYPE_LID:
+               return MLX5_DEV_EVENT_LID_CHANGE;
+       case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
+               return MLX5_DEV_EVENT_PKEY_CHANGE;
+       case MLX5_PORT_CHANGE_SUBTYPE_GUID:
+               return MLX5_DEV_EVENT_GUID_CHANGE;
+       case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
+               return MLX5_DEV_EVENT_CLIENT_REREG;
+       }
+       return -1;
+}
+
+static void eq_update_ci(struct mlx5_eq *eq, int arm)
+{
+       __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
+       u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
+       __raw_writel((__force u32) cpu_to_be32(val), addr);
+       /* We still want ordering, just not swabbing, so add a barrier */
+       mb();
+}
+
+static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+       struct mlx5_eqe *eqe;
+       int eqes_found = 0;
+       int set_ci = 0;
+       u32 cqn;
+       u32 srqn;
+       u8 port;
+
+       while ((eqe = next_eqe_sw(eq))) {
+               /*
+                * Make sure we read EQ entry contents after we've
+                * checked the ownership bit.
+                */
+               rmb();
+
+               mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n", eq->eqn, eqe_type_str(eqe->type));
+               switch (eqe->type) {
+               case MLX5_EVENT_TYPE_COMP:
+                       cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
+                       mlx5_cq_completion(dev, cqn);
+                       break;
+
+               case MLX5_EVENT_TYPE_PATH_MIG:
+               case MLX5_EVENT_TYPE_COMM_EST:
+               case MLX5_EVENT_TYPE_SQ_DRAINED:
+               case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+               case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+               case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+               case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+               case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+                       mlx5_core_dbg(dev, "event %s(%d) arrived\n",
+                                     eqe_type_str(eqe->type), eqe->type);
+                       mlx5_qp_event(dev, be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff,
+                                     eqe->type);
+                       break;
+
+               case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+               case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+                       srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+                       mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n",
+                                     eqe_type_str(eqe->type), eqe->type, srqn);
+                       mlx5_srq_event(dev, srqn, eqe->type);
+                       break;
+
+               case MLX5_EVENT_TYPE_CMD:
+                       mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector));
+                       break;
+
+               case MLX5_EVENT_TYPE_PORT_CHANGE:
+                       port = (eqe->data.port.port >> 4) & 0xf;
+                       switch (eqe->sub_type) {
+                       case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+                       case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+                       case MLX5_PORT_CHANGE_SUBTYPE_LID:
+                       case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
+                       case MLX5_PORT_CHANGE_SUBTYPE_GUID:
+                       case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
+                       case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
+                               dev->event(dev, port_subtype_event(eqe->sub_type), &port);
+                               break;
+                       default:
+                               mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n",
+                                              port, eqe->sub_type);
+                       }
+                       break;
+               case MLX5_EVENT_TYPE_CQ_ERROR:
+                       cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
+                       mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrom 0x%x\n",
+                                      cqn, eqe->data.cq_err.syndrome);
+                       mlx5_cq_event(dev, cqn, eqe->type);
+                       break;
+
+               case MLX5_EVENT_TYPE_PAGE_REQUEST:
+                       {
+                               u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id);
+                               s16 npages = be16_to_cpu(eqe->data.req_pages.num_pages);
+
+                               mlx5_core_dbg(dev, "page request for func 0x%x, napges %d\n", func_id, npages);
+                               mlx5_core_req_pages_handler(dev, func_id, npages);
+                       }
+                       break;
+
+
+               default:
+                       mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n", eqe->type, eq->eqn);
+                       break;
+               }
+
+               ++eq->cons_index;
+               eqes_found = 1;
+               ++set_ci;
+
+               /* The HCA will think the queue has overflowed if we
+                * don't tell it we've been processing events.  We
+                * create our EQs with MLX5_NUM_SPARE_EQE extra
+                * entries, so we must update our consumer index at
+                * least that often.
+                */
+               if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
+                       eq_update_ci(eq, 0);
+                       set_ci = 0;
+               }
+       }
+
+       eq_update_ci(eq, 1);
+
+       return eqes_found;
+}
+
+static irqreturn_t mlx5_msix_handler(int irq, void *eq_ptr)
+{
+       struct mlx5_eq *eq = eq_ptr;
+       struct mlx5_core_dev *dev = eq->dev;
+
+       mlx5_eq_int(dev, eq);
+
+       /* MSI-X vectors always belong to us */
+       return IRQ_HANDLED;
+}
+
+static void init_eq_buf(struct mlx5_eq *eq)
+{
+       struct mlx5_eqe *eqe;
+       int i;
+
+       for (i = 0; i < eq->nent; i++) {
+               eqe = get_eqe(eq, i);
+               eqe->owner = MLX5_EQE_OWNER_INIT_VAL;
+       }
+}
+
+int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
+                      int nent, u64 mask, const char *name, struct mlx5_uar *uar)
+{
+       struct mlx5_eq_table *table = &dev->priv.eq_table;
+       struct mlx5_create_eq_mbox_in *in;
+       struct mlx5_create_eq_mbox_out out;
+       int err;
+       int inlen;
+
+       eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
+       err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, 2 * PAGE_SIZE,
+                            &eq->buf);
+       if (err)
+               return err;
+
+       init_eq_buf(eq);
+
+       inlen = sizeof(*in) + sizeof(in->pas[0]) * eq->buf.npages;
+       in = mlx5_vzalloc(inlen);
+       if (!in) {
+               err = -ENOMEM;
+               goto err_buf;
+       }
+       memset(&out, 0, sizeof(out));
+
+       mlx5_fill_page_array(&eq->buf, in->pas);
+
+       in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ);
+       in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | uar->index);
+       in->ctx.intr = vecidx;
+       in->ctx.log_page_size = PAGE_SHIFT - 12;
+       in->events_mask = cpu_to_be64(mask);
+
+       err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+       if (err)
+               goto err_in;
+
+       if (out.hdr.status) {
+               err = mlx5_cmd_status_to_err(&out.hdr);
+               goto err_in;
+       }
+
+       eq->eqn = out.eq_number;
+       err = request_irq(table->msix_arr[vecidx].vector, mlx5_msix_handler, 0,
+                         name, eq);
+       if (err)
+               goto err_eq;
+
+       eq->irqn = vecidx;
+       eq->dev = dev;
+       eq->doorbell = uar->map + MLX5_EQ_DOORBEL_OFFSET;
+
+       err = mlx5_debug_eq_add(dev, eq);
+       if (err)
+               goto err_irq;
+
+       /* EQs are created in ARMED state
+        */
+       eq_update_ci(eq, 1);
+
+       mlx5_vfree(in);
+       return 0;
+
+err_irq:
+       free_irq(table->msix_arr[vecidx].vector, eq);
+
+err_eq:
+       mlx5_cmd_destroy_eq(dev, eq->eqn);
+
+err_in:
+       mlx5_vfree(in);
+
+err_buf:
+       mlx5_buf_free(dev, &eq->buf);
+       return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_create_map_eq);
+
+int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+       struct mlx5_eq_table *table = &dev->priv.eq_table;
+       int err;
+
+       mlx5_debug_eq_remove(dev, eq);
+       free_irq(table->msix_arr[eq->irqn].vector, eq);
+       err = mlx5_cmd_destroy_eq(dev, eq->eqn);
+       if (err)
+               mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
+                              eq->eqn);
+       mlx5_buf_free(dev, &eq->buf);
+
+       return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_destroy_unmap_eq);
+
+int mlx5_eq_init(struct mlx5_core_dev *dev)
+{
+       int err;
+
+       spin_lock_init(&dev->priv.eq_table.lock);
+
+       err = mlx5_eq_debugfs_init(dev);
+
+       return err;
+}
+
+
+void mlx5_eq_cleanup(struct mlx5_core_dev *dev)
+{
+       mlx5_eq_debugfs_cleanup(dev);
+}
+
+int mlx5_start_eqs(struct mlx5_core_dev *dev)
+{
+       struct mlx5_eq_table *table = &dev->priv.eq_table;
+       int err;
+
+       err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
+                                MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
+                                "mlx5_cmd_eq", &dev->priv.uuari.uars[0]);
+       if (err) {
+               mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
+               return err;
+       }
+
+       mlx5_cmd_use_events(dev);
+
+       err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC,
+                                MLX5_NUM_ASYNC_EQE, MLX5_ASYNC_EVENT_MASK,
+                                "mlx5_async_eq", &dev->priv.uuari.uars[0]);
+       if (err) {
+               mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
+               goto err1;
+       }
+
+       err = mlx5_create_map_eq(dev, &table->pages_eq,
+                                MLX5_EQ_VEC_PAGES,
+                                dev->caps.max_vf + 1,
+                                1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq",
+                                &dev->priv.uuari.uars[0]);
+       if (err) {
+               mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
+               goto err2;
+       }
+
+       return err;
+
+err2:
+       mlx5_destroy_unmap_eq(dev, &table->async_eq);
+
+err1:
+       mlx5_cmd_use_polling(dev);
+       mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
+       return err;
+}
+
+int mlx5_stop_eqs(struct mlx5_core_dev *dev)
+{
+       struct mlx5_eq_table *table = &dev->priv.eq_table;
+       int err;
+
+       err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
+       if (err)
+               return err;
+
+       mlx5_destroy_unmap_eq(dev, &table->async_eq);
+       mlx5_cmd_use_polling(dev);
+
+       err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
+       if (err)
+               mlx5_cmd_use_events(dev);
+
+       return err;
+}
+
+int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+                      struct mlx5_query_eq_mbox_out *out, int outlen)
+{
+       struct mlx5_query_eq_mbox_in in;
+       int err;
+
+       memset(&in, 0, sizeof(in));
+       memset(out, 0, outlen);
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_EQ);
+       in.eqn = eq->eqn;
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
+       if (err)
+               return err;
+
+       if (out->hdr.status)
+               err = mlx5_cmd_status_to_err(&out->hdr);
+
+       return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_eq_query);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
new file mode 100644 (file)
index 0000000..72a5222
--- /dev/null
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include <linux/module.h>
+#include "mlx5_core.h"
+
+int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev)
+{
+       struct mlx5_cmd_query_adapter_mbox_out *out;
+       struct mlx5_cmd_query_adapter_mbox_in in;
+       int err;
+
+       out = kzalloc(sizeof(*out), GFP_KERNEL);
+       if (!out)
+               return -ENOMEM;
+
+       memset(&in, 0, sizeof(in));
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_ADAPTER);
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out));
+       if (err)
+               goto out_out;
+
+       if (out->hdr.status) {
+               err = mlx5_cmd_status_to_err(&out->hdr);
+               goto out_out;
+       }
+
+       memcpy(dev->board_id, out->vsd_psid, sizeof(out->vsd_psid));
+
+out_out:
+       kfree(out);
+
+       return err;
+}
+
+int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev,
+                          struct mlx5_caps *caps)
+{
+       struct mlx5_cmd_query_hca_cap_mbox_out *out;
+       struct mlx5_cmd_query_hca_cap_mbox_in in;
+       struct mlx5_query_special_ctxs_mbox_out ctx_out;
+       struct mlx5_query_special_ctxs_mbox_in ctx_in;
+       int err;
+       u16 t16;
+
+       out = kzalloc(sizeof(*out), GFP_KERNEL);
+       if (!out)
+               return -ENOMEM;
+
+       memset(&in, 0, sizeof(in));
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP);
+       in.hdr.opmod  = cpu_to_be16(0x1);
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out));
+       if (err)
+               goto out_out;
+
+       if (out->hdr.status) {
+               err = mlx5_cmd_status_to_err(&out->hdr);
+               goto out_out;
+       }
+
+
+       caps->log_max_eq = out->hca_cap.log_max_eq & 0xf;
+       caps->max_cqes = 1 << out->hca_cap.log_max_cq_sz;
+       caps->max_wqes = 1 << out->hca_cap.log_max_qp_sz;
+       caps->max_sq_desc_sz = be16_to_cpu(out->hca_cap.max_desc_sz_sq);
+       caps->max_rq_desc_sz = be16_to_cpu(out->hca_cap.max_desc_sz_rq);
+       caps->flags = be64_to_cpu(out->hca_cap.flags);
+       caps->stat_rate_support = be16_to_cpu(out->hca_cap.stat_rate_support);
+       caps->log_max_msg = out->hca_cap.log_max_msg & 0x1f;
+       caps->num_ports = out->hca_cap.num_ports & 0xf;
+       caps->log_max_cq = out->hca_cap.log_max_cq & 0x1f;
+       if (caps->num_ports > MLX5_MAX_PORTS) {
+               mlx5_core_err(dev, "device has %d ports while the driver supports max %d ports\n",
+                             caps->num_ports, MLX5_MAX_PORTS);
+               err = -EINVAL;
+               goto out_out;
+       }
+       caps->log_max_qp = out->hca_cap.log_max_qp & 0x1f;
+       caps->log_max_mkey = out->hca_cap.log_max_mkey & 0x3f;
+       caps->log_max_pd = out->hca_cap.log_max_pd & 0x1f;
+       caps->log_max_srq = out->hca_cap.log_max_srqs & 0x1f;
+       caps->local_ca_ack_delay = out->hca_cap.local_ca_ack_delay & 0x1f;
+       caps->log_max_mcg = out->hca_cap.log_max_mcg;
+       caps->max_qp_mcg = be16_to_cpu(out->hca_cap.max_qp_mcg);
+       caps->max_ra_res_qp = 1 << (out->hca_cap.log_max_ra_res_qp & 0x3f);
+       caps->max_ra_req_qp = 1 << (out->hca_cap.log_max_ra_req_qp & 0x3f);
+       caps->max_srq_wqes = 1 << out->hca_cap.log_max_srq_sz;
+       t16 = be16_to_cpu(out->hca_cap.bf_log_bf_reg_size);
+       if (t16 & 0x8000) {
+               caps->bf_reg_size = 1 << (t16 & 0x1f);
+               caps->bf_regs_per_page = MLX5_BF_REGS_PER_PAGE;
+       } else {
+               caps->bf_reg_size = 0;
+               caps->bf_regs_per_page = 0;
+       }
+       caps->min_page_sz = ~(u32)((1 << out->hca_cap.log_pg_sz) - 1);
+
+       memset(&ctx_in, 0, sizeof(ctx_in));
+       memset(&ctx_out, 0, sizeof(ctx_out));
+       ctx_in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
+       err = mlx5_cmd_exec(dev, &ctx_in, sizeof(ctx_in),
+                                &ctx_out, sizeof(ctx_out));
+       if (err)
+               goto out_out;
+
+       if (ctx_out.hdr.status)
+               err = mlx5_cmd_status_to_err(&ctx_out.hdr);
+
+       caps->reserved_lkey = be32_to_cpu(ctx_out.reserved_lkey);
+
+out_out:
+       kfree(out);
+
+       return err;
+}
+
+int mlx5_cmd_init_hca(struct mlx5_core_dev *dev)
+{
+       struct mlx5_cmd_init_hca_mbox_in in;
+       struct mlx5_cmd_init_hca_mbox_out out;
+       int err;
+
+       memset(&in, 0, sizeof(in));
+       memset(&out, 0, sizeof(out));
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_INIT_HCA);
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+       if (err)
+               return err;
+
+       if (out.hdr.status)
+               err = mlx5_cmd_status_to_err(&out.hdr);
+
+       return err;
+}
+
+int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev)
+{
+       struct mlx5_cmd_teardown_hca_mbox_in in;
+       struct mlx5_cmd_teardown_hca_mbox_out out;
+       int err;
+
+       memset(&in, 0, sizeof(in));
+       memset(&out, 0, sizeof(out));
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_TEARDOWN_HCA);
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+       if (err)
+               return err;
+
+       if (out.hdr.status)
+               err = mlx5_cmd_status_to_err(&out.hdr);
+
+       return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
new file mode 100644 (file)
index 0000000..ea4b9bc
--- /dev/null
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/vmalloc.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+enum {
+       MLX5_HEALTH_POLL_INTERVAL       = 2 * HZ,
+       MAX_MISSES                      = 3,
+};
+
+enum {
+       MLX5_HEALTH_SYNDR_FW_ERR                = 0x1,
+       MLX5_HEALTH_SYNDR_IRISC_ERR             = 0x7,
+       MLX5_HEALTH_SYNDR_CRC_ERR               = 0x9,
+       MLX5_HEALTH_SYNDR_FETCH_PCI_ERR         = 0xa,
+       MLX5_HEALTH_SYNDR_HW_FTL_ERR            = 0xb,
+       MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR  = 0xc,
+       MLX5_HEALTH_SYNDR_EQ_ERR                = 0xd,
+       MLX5_HEALTH_SYNDR_FFSER_ERR             = 0xf,
+};
+
+static DEFINE_SPINLOCK(health_lock);
+
+static LIST_HEAD(health_list);
+static struct work_struct health_work;
+
+static health_handler_t reg_handler;
+int mlx5_register_health_report_handler(health_handler_t handler)
+{
+       spin_lock_irq(&health_lock);
+       if (reg_handler) {
+               spin_unlock_irq(&health_lock);
+               return -EEXIST;
+       }
+       reg_handler = handler;
+       spin_unlock_irq(&health_lock);
+
+       return 0;
+}
+EXPORT_SYMBOL(mlx5_register_health_report_handler);
+
+void mlx5_unregister_health_report_handler(void)
+{
+       spin_lock_irq(&health_lock);
+       reg_handler = NULL;
+       spin_unlock_irq(&health_lock);
+}
+EXPORT_SYMBOL(mlx5_unregister_health_report_handler);
+
+static void health_care(struct work_struct *work)
+{
+       struct mlx5_core_health *health, *n;
+       struct mlx5_core_dev *dev;
+       struct mlx5_priv *priv;
+       LIST_HEAD(tlist);
+
+       spin_lock_irq(&health_lock);
+       list_splice_init(&health_list, &tlist);
+
+       spin_unlock_irq(&health_lock);
+
+       list_for_each_entry_safe(health, n, &tlist, list) {
+               priv = container_of(health, struct mlx5_priv, health);
+               dev = container_of(priv, struct mlx5_core_dev, priv);
+               mlx5_core_warn(dev, "handling bad device here\n");
+               spin_lock_irq(&health_lock);
+               if (reg_handler)
+                       reg_handler(dev->pdev, health->health,
+                                   sizeof(health->health));
+
+               list_del_init(&health->list);
+               spin_unlock_irq(&health_lock);
+       }
+}
+
+static const char *hsynd_str(u8 synd)
+{
+       switch (synd) {
+       case MLX5_HEALTH_SYNDR_FW_ERR:
+               return "firmware internal error";
+       case MLX5_HEALTH_SYNDR_IRISC_ERR:
+               return "irisc not responding";
+       case MLX5_HEALTH_SYNDR_CRC_ERR:
+               return "firmware CRC error";
+       case MLX5_HEALTH_SYNDR_FETCH_PCI_ERR:
+               return "ICM fetch PCI error";
+       case MLX5_HEALTH_SYNDR_HW_FTL_ERR:
+               return "HW fatal error\n";
+       case MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR:
+               return "async EQ buffer overrun";
+       case MLX5_HEALTH_SYNDR_EQ_ERR:
+               return "EQ error";
+       case MLX5_HEALTH_SYNDR_FFSER_ERR:
+               return "FFSER error";
+       default:
+               return "unrecognized error";
+       }
+}
+
+static void print_health_info(struct mlx5_core_dev *dev)
+{
+       struct mlx5_core_health *health = &dev->priv.health;
+       struct health_buffer __iomem *h = health->health;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
+               pr_info("assert_var[%d] 0x%08x\n", i, be32_to_cpu(h->assert_var[i]));
+
+       pr_info("assert_exit_ptr 0x%08x\n", be32_to_cpu(h->assert_exit_ptr));
+       pr_info("assert_callra 0x%08x\n", be32_to_cpu(h->assert_callra));
+       pr_info("fw_ver 0x%08x\n", be32_to_cpu(h->fw_ver));
+       pr_info("hw_id 0x%08x\n", be32_to_cpu(h->hw_id));
+       pr_info("irisc_index %d\n", h->irisc_index);
+       pr_info("synd 0x%x: %s\n", h->synd, hsynd_str(h->synd));
+       pr_info("ext_sync 0x%04x\n", be16_to_cpu(h->ext_sync));
+}
+
+static void poll_health(unsigned long data)
+{
+       struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data;
+       struct mlx5_core_health *health = &dev->priv.health;
+       unsigned long next;
+       u32 count;
+
+       count = ioread32be(health->health_counter);
+       if (count == health->prev)
+               ++health->miss_counter;
+       else
+               health->miss_counter = 0;
+
+       health->prev = count;
+       if (health->miss_counter == MAX_MISSES) {
+               mlx5_core_err(dev, "device's health compromised\n");
+               print_health_info(dev);
+               spin_lock_irq(&health_lock);
+               list_add_tail(&health->list, &health_list);
+               spin_unlock_irq(&health_lock);
+
+               queue_work(mlx5_core_wq, &health_work);
+       } else {
+               get_random_bytes(&next, sizeof(next));
+               next %= HZ;
+               next += jiffies + MLX5_HEALTH_POLL_INTERVAL;
+               mod_timer(&health->timer, next);
+       }
+}
+
+void mlx5_start_health_poll(struct mlx5_core_dev *dev)
+{
+       struct mlx5_core_health *health = &dev->priv.health;
+
+       INIT_LIST_HEAD(&health->list);
+       init_timer(&health->timer);
+       health->health = &dev->iseg->health;
+       health->health_counter = &dev->iseg->health_counter;
+
+       health->timer.data = (unsigned long)dev;
+       health->timer.function = poll_health;
+       health->timer.expires = round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL);
+       add_timer(&health->timer);
+}
+
+void mlx5_stop_health_poll(struct mlx5_core_dev *dev)
+{
+       struct mlx5_core_health *health = &dev->priv.health;
+
+       del_timer_sync(&health->timer);
+
+       spin_lock_irq(&health_lock);
+       if (!list_empty(&health->list))
+               list_del_init(&health->list);
+       spin_unlock_irq(&health_lock);
+}
+
+void mlx5_health_cleanup(void)
+{
+}
+
+void  __init mlx5_health_init(void)
+{
+       INIT_WORK(&health_work, health_care);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c
new file mode 100644 (file)
index 0000000..18d6fd5
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, void *inb, void *outb,
+                     u16 opmod, int port)
+{
+       struct mlx5_mad_ifc_mbox_in *in = NULL;
+       struct mlx5_mad_ifc_mbox_out *out = NULL;
+       int err;
+
+       in = kzalloc(sizeof(*in), GFP_KERNEL);
+       if (!in)
+               return -ENOMEM;
+
+       out = kzalloc(sizeof(*out), GFP_KERNEL);
+       if (!out) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MAD_IFC);
+       in->hdr.opmod = cpu_to_be16(opmod);
+       in->port = port;
+
+       memcpy(in->data, inb, sizeof(in->data));
+
+       err = mlx5_cmd_exec(dev, in, sizeof(*in), out, sizeof(*out));
+       if (err)
+               goto out;
+
+       if (out->hdr.status) {
+               err = mlx5_cmd_status_to_err(&out->hdr);
+               goto out;
+       }
+
+       memcpy(outb, out->data, sizeof(out->data));
+
+out:
+       kfree(out);
+       kfree(in);
+       return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_mad_ifc);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
new file mode 100644 (file)
index 0000000..f21cc39
--- /dev/null
@@ -0,0 +1,475 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <asm-generic/kmap_types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/io-mapping.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/srq.h>
+#include <linux/debugfs.h>
+#include "mlx5_core.h"
+
+#define DRIVER_NAME "mlx5_core"
+#define DRIVER_VERSION "1.0"
+#define DRIVER_RELDATE "June 2013"
+
+MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox ConnectX-IB HCA core library");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRIVER_VERSION);
+
+int mlx5_core_debug_mask;
+module_param_named(debug_mask, mlx5_core_debug_mask, int, 0644);
+MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0");
+
+struct workqueue_struct *mlx5_core_wq;
+
+static int set_dma_caps(struct pci_dev *pdev)
+{
+       int err;
+
+       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+       if (err) {
+               dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n");
+               err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+               if (err) {
+                       dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
+                       return err;
+               }
+       }
+
+       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+       if (err) {
+               dev_warn(&pdev->dev,
+                        "Warning: couldn't set 64-bit consistent PCI DMA mask.\n");
+               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+               if (err) {
+                       dev_err(&pdev->dev,
+                               "Can't set consistent PCI DMA mask, aborting.\n");
+                       return err;
+               }
+       }
+
+       dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024);
+       return err;
+}
+
+static int request_bar(struct pci_dev *pdev)
+{
+       int err = 0;
+
+       if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
+               dev_err(&pdev->dev, "Missing registers BAR, aborting.\n");
+               return -ENODEV;
+       }
+
+       err = pci_request_regions(pdev, DRIVER_NAME);
+       if (err)
+               dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
+
+       return err;
+}
+
+static void release_bar(struct pci_dev *pdev)
+{
+       pci_release_regions(pdev);
+}
+
+static int mlx5_enable_msix(struct mlx5_core_dev *dev)
+{
+       struct mlx5_eq_table *table = &dev->priv.eq_table;
+       int num_eqs = 1 << dev->caps.log_max_eq;
+       int nvec;
+       int err;
+       int i;
+
+       nvec = dev->caps.num_ports * num_online_cpus() + MLX5_EQ_VEC_COMP_BASE;
+       nvec = min_t(int, nvec, num_eqs);
+       if (nvec <= MLX5_EQ_VEC_COMP_BASE)
+               return -ENOMEM;
+
+       table->msix_arr = kzalloc(nvec * sizeof(*table->msix_arr), GFP_KERNEL);
+       if (!table->msix_arr)
+               return -ENOMEM;
+
+       for (i = 0; i < nvec; i++)
+               table->msix_arr[i].entry = i;
+
+retry:
+       table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
+       err = pci_enable_msix(dev->pdev, table->msix_arr, nvec);
+       if (err <= 0) {
+               return err;
+       } else if (err > 2) {
+               nvec = err;
+               goto retry;
+       }
+
+       mlx5_core_dbg(dev, "received %d MSI vectors out of %d requested\n", err, nvec);
+
+       return 0;
+}
+
+static void mlx5_disable_msix(struct mlx5_core_dev *dev)
+{
+       struct mlx5_eq_table *table = &dev->priv.eq_table;
+
+       pci_disable_msix(dev->pdev);
+       kfree(table->msix_arr);
+}
+
+struct mlx5_reg_host_endianess {
+       u8      he;
+       u8      rsvd[15];
+};
+
+static int handle_hca_cap(struct mlx5_core_dev *dev)
+{
+       struct mlx5_cmd_query_hca_cap_mbox_out *query_out = NULL;
+       struct mlx5_cmd_set_hca_cap_mbox_in *set_ctx = NULL;
+       struct mlx5_cmd_query_hca_cap_mbox_in query_ctx;
+       struct mlx5_cmd_set_hca_cap_mbox_out set_out;
+       struct mlx5_profile *prof = dev->profile;
+       u64 flags;
+       int csum = 1;
+       int err;
+
+       memset(&query_ctx, 0, sizeof(query_ctx));
+       query_out = kzalloc(sizeof(*query_out), GFP_KERNEL);
+       if (!query_out)
+               return -ENOMEM;
+
+       set_ctx = kzalloc(sizeof(*set_ctx), GFP_KERNEL);
+       if (!set_ctx) {
+               err = -ENOMEM;
+               goto query_ex;
+       }
+
+       query_ctx.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP);
+       query_ctx.hdr.opmod  = cpu_to_be16(0x1);
+       err = mlx5_cmd_exec(dev, &query_ctx, sizeof(query_ctx),
+                                query_out, sizeof(*query_out));
+       if (err)
+               goto query_ex;
+
+       err = mlx5_cmd_status_to_err(&query_out->hdr);
+       if (err) {
+               mlx5_core_warn(dev, "query hca cap failed, %d\n", err);
+               goto query_ex;
+       }
+
+       memcpy(&set_ctx->hca_cap, &query_out->hca_cap,
+              sizeof(set_ctx->hca_cap));
+
+       if (prof->mask & MLX5_PROF_MASK_CMDIF_CSUM) {
+               csum = !!prof->cmdif_csum;
+               flags = be64_to_cpu(set_ctx->hca_cap.flags);
+               if (csum)
+                       flags |= MLX5_DEV_CAP_FLAG_CMDIF_CSUM;
+               else
+                       flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM;
+
+               set_ctx->hca_cap.flags = cpu_to_be64(flags);
+       }
+
+       if (dev->profile->mask & MLX5_PROF_MASK_QP_SIZE)
+               set_ctx->hca_cap.log_max_qp = dev->profile->log_max_qp;
+
+       memset(&set_out, 0, sizeof(set_out));
+       set_ctx->hca_cap.uar_page_sz = cpu_to_be16(PAGE_SHIFT - 12);
+       set_ctx->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_SET_HCA_CAP);
+       err = mlx5_cmd_exec(dev, set_ctx, sizeof(*set_ctx),
+                                &set_out, sizeof(set_out));
+       if (err) {
+               mlx5_core_warn(dev, "set hca cap failed, %d\n", err);
+               goto query_ex;
+       }
+
+       err = mlx5_cmd_status_to_err(&set_out.hdr);
+       if (err)
+               goto query_ex;
+
+       if (!csum)
+               dev->cmd.checksum_disabled = 1;
+
+query_ex:
+       kfree(query_out);
+       kfree(set_ctx);
+
+       return err;
+}
+
+static int set_hca_ctrl(struct mlx5_core_dev *dev)
+{
+       struct mlx5_reg_host_endianess he_in;
+       struct mlx5_reg_host_endianess he_out;
+       int err;
+
+       memset(&he_in, 0, sizeof(he_in));
+       he_in.he = MLX5_SET_HOST_ENDIANNESS;
+       err = mlx5_core_access_reg(dev, &he_in,  sizeof(he_in),
+                                       &he_out, sizeof(he_out),
+                                       MLX5_REG_HOST_ENDIANNESS, 0, 1);
+       return err;
+}
+
+int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev)
+{
+       struct mlx5_priv *priv = &dev->priv;
+       int err;
+
+       dev->pdev = pdev;
+       pci_set_drvdata(dev->pdev, dev);
+       strncpy(priv->name, dev_name(&pdev->dev), MLX5_MAX_NAME_LEN);
+       priv->name[MLX5_MAX_NAME_LEN - 1] = 0;
+
+       mutex_init(&priv->pgdir_mutex);
+       INIT_LIST_HEAD(&priv->pgdir_list);
+       spin_lock_init(&priv->mkey_lock);
+
+       priv->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), mlx5_debugfs_root);
+       if (!priv->dbg_root)
+               return -ENOMEM;
+
+       err = pci_enable_device(pdev);
+       if (err) {
+               dev_err(&pdev->dev, "Cannot enable PCI device, aborting.\n");
+               goto err_dbg;
+       }
+
+       err = request_bar(pdev);
+       if (err) {
+               dev_err(&pdev->dev, "error requesting BARs, aborting.\n");
+               goto err_disable;
+       }
+
+       pci_set_master(pdev);
+
+       err = set_dma_caps(pdev);
+       if (err) {
+               dev_err(&pdev->dev, "Failed setting DMA capabilities mask, aborting\n");
+               goto err_clr_master;
+       }
+
+       dev->iseg_base = pci_resource_start(dev->pdev, 0);
+       dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg));
+       if (!dev->iseg) {
+               err = -ENOMEM;
+               dev_err(&pdev->dev, "Failed mapping initialization segment, aborting\n");
+               goto err_clr_master;
+       }
+       dev_info(&pdev->dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev),
+                fw_rev_min(dev), fw_rev_sub(dev));
+
+       err = mlx5_cmd_init(dev);
+       if (err) {
+               dev_err(&pdev->dev, "Failed initializing command interface, aborting\n");
+               goto err_unmap;
+       }
+
+       mlx5_pagealloc_init(dev);
+       err = set_hca_ctrl(dev);
+       if (err) {
+               dev_err(&pdev->dev, "set_hca_ctrl failed\n");
+               goto err_pagealloc_cleanup;
+       }
+
+       err = handle_hca_cap(dev);
+       if (err) {
+               dev_err(&pdev->dev, "handle_hca_cap failed\n");
+               goto err_pagealloc_cleanup;
+       }
+
+       err = mlx5_satisfy_startup_pages(dev);
+       if (err) {
+               dev_err(&pdev->dev, "failed to allocate startup pages\n");
+               goto err_pagealloc_cleanup;
+       }
+
+       err = mlx5_pagealloc_start(dev);
+       if (err) {
+               dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n");
+               goto err_reclaim_pages;
+       }
+
+       err = mlx5_cmd_init_hca(dev);
+       if (err) {
+               dev_err(&pdev->dev, "init hca failed\n");
+               goto err_pagealloc_stop;
+       }
+
+       mlx5_start_health_poll(dev);
+
+       err = mlx5_cmd_query_hca_cap(dev, &dev->caps);
+       if (err) {
+               dev_err(&pdev->dev, "query hca failed\n");
+               goto err_stop_poll;
+       }
+
+       err = mlx5_cmd_query_adapter(dev);
+       if (err) {
+               dev_err(&pdev->dev, "query adapter failed\n");
+               goto err_stop_poll;
+       }
+
+       err = mlx5_enable_msix(dev);
+       if (err) {
+               dev_err(&pdev->dev, "enable msix failed\n");
+               goto err_stop_poll;
+       }
+
+       err = mlx5_eq_init(dev);
+       if (err) {
+               dev_err(&pdev->dev, "failed to initialize eq\n");
+               goto disable_msix;
+       }
+
+       err = mlx5_alloc_uuars(dev, &priv->uuari);
+       if (err) {
+               dev_err(&pdev->dev, "Failed allocating uar, aborting\n");
+               goto err_eq_cleanup;
+       }
+
+       err = mlx5_start_eqs(dev);
+       if (err) {
+               dev_err(&pdev->dev, "Failed to start pages and async EQs\n");
+               goto err_free_uar;
+       }
+
+       MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock);
+
+       mlx5_init_cq_table(dev);
+       mlx5_init_qp_table(dev);
+       mlx5_init_srq_table(dev);
+
+       return 0;
+
+err_free_uar:
+       mlx5_free_uuars(dev, &priv->uuari);
+
+err_eq_cleanup:
+       mlx5_eq_cleanup(dev);
+
+disable_msix:
+       mlx5_disable_msix(dev);
+
+err_stop_poll:
+       mlx5_stop_health_poll(dev);
+       mlx5_cmd_teardown_hca(dev);
+
+err_pagealloc_stop:
+       mlx5_pagealloc_stop(dev);
+
+err_reclaim_pages:
+       mlx5_reclaim_startup_pages(dev);
+
+err_pagealloc_cleanup:
+       mlx5_pagealloc_cleanup(dev);
+       mlx5_cmd_cleanup(dev);
+
+err_unmap:
+       iounmap(dev->iseg);
+
+err_clr_master:
+       pci_clear_master(dev->pdev);
+       release_bar(dev->pdev);
+
+err_disable:
+       pci_disable_device(dev->pdev);
+
+err_dbg:
+       debugfs_remove(priv->dbg_root);
+       return err;
+}
+EXPORT_SYMBOL(mlx5_dev_init);
+
+void mlx5_dev_cleanup(struct mlx5_core_dev *dev)
+{
+       struct mlx5_priv *priv = &dev->priv;
+
+       mlx5_cleanup_srq_table(dev);
+       mlx5_cleanup_qp_table(dev);
+       mlx5_cleanup_cq_table(dev);
+       mlx5_stop_eqs(dev);
+       mlx5_free_uuars(dev, &priv->uuari);
+       mlx5_eq_cleanup(dev);
+       mlx5_disable_msix(dev);
+       mlx5_stop_health_poll(dev);
+       mlx5_cmd_teardown_hca(dev);
+       mlx5_pagealloc_stop(dev);
+       mlx5_reclaim_startup_pages(dev);
+       mlx5_pagealloc_cleanup(dev);
+       mlx5_cmd_cleanup(dev);
+       iounmap(dev->iseg);
+       pci_clear_master(dev->pdev);
+       release_bar(dev->pdev);
+       pci_disable_device(dev->pdev);
+       debugfs_remove(priv->dbg_root);
+}
+EXPORT_SYMBOL(mlx5_dev_cleanup);
+
+static int __init init(void)
+{
+       int err;
+
+       mlx5_register_debugfs();
+       mlx5_core_wq = create_singlethread_workqueue("mlx5_core_wq");
+       if (!mlx5_core_wq) {
+               err = -ENOMEM;
+               goto err_debug;
+       }
+       mlx5_health_init();
+
+       return 0;
+
+       mlx5_health_cleanup();
+err_debug:
+       mlx5_unregister_debugfs();
+       return err;
+}
+
+static void __exit cleanup(void)
+{
+       mlx5_health_cleanup();
+       destroy_workqueue(mlx5_core_wq);
+       mlx5_unregister_debugfs();
+}
+
+module_init(init);
+module_exit(cleanup);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
new file mode 100644 (file)
index 0000000..4483764
--- /dev/null
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include <rdma/ib_verbs.h>
+#include "mlx5_core.h"
+
+struct mlx5_attach_mcg_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be32                  qpn;
+       __be32                  rsvd;
+       u8                      gid[16];
+};
+
+struct mlx5_attach_mcg_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvf[8];
+};
+
+struct mlx5_detach_mcg_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be32                  qpn;
+       __be32                  rsvd;
+       u8                      gid[16];
+};
+
+struct mlx5_detach_mcg_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvf[8];
+};
+
+int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn)
+{
+       struct mlx5_attach_mcg_mbox_in in;
+       struct mlx5_attach_mcg_mbox_out out;
+       int err;
+
+       memset(&in, 0, sizeof(in));
+       memset(&out, 0, sizeof(out));
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ATTACH_TO_MCG);
+       memcpy(in.gid, mgid, sizeof(*mgid));
+       in.qpn = cpu_to_be32(qpn);
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+       if (err)
+               return err;
+
+       if (out.hdr.status)
+               err = mlx5_cmd_status_to_err(&out.hdr);
+
+       return err;
+}
+EXPORT_SYMBOL(mlx5_core_attach_mcg);
+
+int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn)
+{
+       struct mlx5_detach_mcg_mbox_in in;
+       struct mlx5_detach_mcg_mbox_out out;
+       int err;
+
+       memset(&in, 0, sizeof(in));
+       memset(&out, 0, sizeof(out));
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DETACH_FROM_MCG);
+       memcpy(in.gid, mgid, sizeof(*mgid));
+       in.qpn = cpu_to_be32(qpn);
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+       if (err)
+               return err;
+
+       if (out.hdr.status)
+               err = mlx5_cmd_status_to_err(&out.hdr);
+
+       return err;
+}
+EXPORT_SYMBOL(mlx5_core_detach_mcg);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
new file mode 100644 (file)
index 0000000..68b74e1
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_CORE_H__
+#define __MLX5_CORE_H__
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+extern int mlx5_core_debug_mask;
+
+#define mlx5_core_dbg(dev, format, arg...)                                    \
+pr_debug("%s:%s:%d:(pid %d): " format, (dev)->priv.name, __func__, __LINE__,   \
+        current->pid, ##arg)
+
+#define mlx5_core_dbg_mask(dev, mask, format, arg...)                         \
+do {                                                                          \
+       if ((mask) & mlx5_core_debug_mask)                                     \
+               pr_debug("%s:%s:%d:(pid %d): " format, (dev)->priv.name,       \
+                        __func__, __LINE__, current->pid, ##arg);             \
+} while (0)
+
+#define mlx5_core_err(dev, format, arg...) \
+pr_err("%s:%s:%d:(pid %d): " format, (dev)->priv.name, __func__, __LINE__,     \
+       current->pid, ##arg)
+
+#define mlx5_core_warn(dev, format, arg...) \
+pr_warn("%s:%s:%d:(pid %d): " format, (dev)->priv.name, __func__, __LINE__,    \
+       current->pid, ##arg)
+
+enum {
+       MLX5_CMD_DATA, /* print command payload only */
+       MLX5_CMD_TIME, /* print command execution time */
+};
+
+
+int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev,
+                          struct mlx5_caps *caps);
+int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev);
+int mlx5_cmd_init_hca(struct mlx5_core_dev *dev);
+int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
+
+#endif /* __MLX5_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
new file mode 100644 (file)
index 0000000..5b44e2e
--- /dev/null
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
+                         struct mlx5_create_mkey_mbox_in *in, int inlen)
+{
+       struct mlx5_create_mkey_mbox_out out;
+       int err;
+       u8 key;
+
+       memset(&out, 0, sizeof(out));
+       spin_lock(&dev->priv.mkey_lock);
+       key = dev->priv.mkey_key++;
+       spin_unlock(&dev->priv.mkey_lock);
+       in->seg.qpn_mkey7_0 |= cpu_to_be32(key);
+       in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_MKEY);
+       err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+       if (err) {
+               mlx5_core_dbg(dev, "cmd exec faile %d\n", err);
+               return err;
+       }
+
+       if (out.hdr.status) {
+               mlx5_core_dbg(dev, "status %d\n", out.hdr.status);
+               return mlx5_cmd_status_to_err(&out.hdr);
+       }
+
+       mr->key = mlx5_idx_to_mkey(be32_to_cpu(out.mkey) & 0xffffff) | key;
+       mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", be32_to_cpu(out.mkey), key, mr->key);
+
+       return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_mkey);
+
+int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr)
+{
+       struct mlx5_destroy_mkey_mbox_in in;
+       struct mlx5_destroy_mkey_mbox_out out;
+       int err;
+
+       memset(&in, 0, sizeof(in));
+       memset(&out, 0, sizeof(out));
+
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_MKEY);
+       in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mr->key));
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+       if (err)
+               return err;
+
+       if (out.hdr.status)
+               return mlx5_cmd_status_to_err(&out.hdr);
+
+       return err;
+}
+EXPORT_SYMBOL(mlx5_core_destroy_mkey);
+
+int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
+                        struct mlx5_query_mkey_mbox_out *out, int outlen)
+{
+       struct mlx5_destroy_mkey_mbox_in in;
+       int err;
+
+       memset(&in, 0, sizeof(in));
+       memset(out, 0, outlen);
+
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_MKEY);
+       in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mr->key));
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
+       if (err)
+               return err;
+
+       if (out->hdr.status)
+               return mlx5_cmd_status_to_err(&out->hdr);
+
+       return err;
+}
+EXPORT_SYMBOL(mlx5_core_query_mkey);
+
+int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
+                            u32 *mkey)
+{
+       struct mlx5_query_special_ctxs_mbox_in in;
+       struct mlx5_query_special_ctxs_mbox_out out;
+       int err;
+
+       memset(&in, 0, sizeof(in));
+       memset(&out, 0, sizeof(out));
+
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+       if (err)
+               return err;
+
+       if (out.hdr.status)
+               return mlx5_cmd_status_to_err(&out.hdr);
+
+       *mkey = be32_to_cpu(out.dump_fill_mkey);
+
+       return err;
+}
+EXPORT_SYMBOL(mlx5_core_dump_fill_mkey);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
new file mode 100644 (file)
index 0000000..f0bf463
--- /dev/null
@@ -0,0 +1,435 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <asm-generic/kmap_types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+enum {
+       MLX5_PAGES_CANT_GIVE    = 0,
+       MLX5_PAGES_GIVE         = 1,
+       MLX5_PAGES_TAKE         = 2
+};
+
+struct mlx5_pages_req {
+       struct mlx5_core_dev *dev;
+       u32     func_id;
+       s16     npages;
+       struct work_struct work;
+};
+
+struct fw_page {
+       struct rb_node  rb_node;
+       u64             addr;
+       struct page     *page;
+       u16             func_id;
+};
+
+struct mlx5_query_pages_inbox {
+       struct mlx5_inbox_hdr   hdr;
+       u8                      rsvd[8];
+};
+
+struct mlx5_query_pages_outbox {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      reserved[2];
+       __be16                  func_id;
+       __be16                  init_pages;
+       __be16                  num_pages;
+};
+
+struct mlx5_manage_pages_inbox {
+       struct mlx5_inbox_hdr   hdr;
+       __be16                  rsvd0;
+       __be16                  func_id;
+       __be16                  rsvd1;
+       __be16                  num_entries;
+       u8                      rsvd2[16];
+       __be64                  pas[0];
+};
+
+struct mlx5_manage_pages_outbox {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd0[2];
+       __be16                  num_entries;
+       u8                      rsvd1[20];
+       __be64                  pas[0];
+};
+
+static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id)
+{
+       struct rb_root *root = &dev->priv.page_root;
+       struct rb_node **new = &root->rb_node;
+       struct rb_node *parent = NULL;
+       struct fw_page *nfp;
+       struct fw_page *tfp;
+
+       while (*new) {
+               parent = *new;
+               tfp = rb_entry(parent, struct fw_page, rb_node);
+               if (tfp->addr < addr)
+                       new = &parent->rb_left;
+               else if (tfp->addr > addr)
+                       new = &parent->rb_right;
+               else
+                       return -EEXIST;
+       }
+
+       nfp = kmalloc(sizeof(*nfp), GFP_KERNEL);
+       if (!nfp)
+               return -ENOMEM;
+
+       nfp->addr = addr;
+       nfp->page = page;
+       nfp->func_id = func_id;
+
+       rb_link_node(&nfp->rb_node, parent, new);
+       rb_insert_color(&nfp->rb_node, root);
+
+       return 0;
+}
+
+static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr)
+{
+       struct rb_root *root = &dev->priv.page_root;
+       struct rb_node *tmp = root->rb_node;
+       struct page *result = NULL;
+       struct fw_page *tfp;
+
+       while (tmp) {
+               tfp = rb_entry(tmp, struct fw_page, rb_node);
+               if (tfp->addr < addr) {
+                       tmp = tmp->rb_left;
+               } else if (tfp->addr > addr) {
+                       tmp = tmp->rb_right;
+               } else {
+                       rb_erase(&tfp->rb_node, root);
+                       result = tfp->page;
+                       kfree(tfp);
+                       break;
+               }
+       }
+
+       return result;
+}
+
+static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
+                               s16 *pages, s16 *init_pages)
+{
+       struct mlx5_query_pages_inbox   in;
+       struct mlx5_query_pages_outbox  out;
+       int err;
+
+       memset(&in, 0, sizeof(in));
+       memset(&out, 0, sizeof(out));
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_PAGES);
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+       if (err)
+               return err;
+
+       if (out.hdr.status)
+               return mlx5_cmd_status_to_err(&out.hdr);
+
+       if (pages)
+               *pages = be16_to_cpu(out.num_pages);
+       if (init_pages)
+               *init_pages = be16_to_cpu(out.init_pages);
+       *func_id = be16_to_cpu(out.func_id);
+
+       return err;
+}
+
+static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
+                     int notify_fail)
+{
+       struct mlx5_manage_pages_inbox *in;
+       struct mlx5_manage_pages_outbox out;
+       struct page *page;
+       int inlen;
+       u64 addr;
+       int err;
+       int i;
+
+       inlen = sizeof(*in) + npages * sizeof(in->pas[0]);
+       in = mlx5_vzalloc(inlen);
+       if (!in) {
+               mlx5_core_warn(dev, "vzalloc failed %d\n", inlen);
+               return -ENOMEM;
+       }
+       memset(&out, 0, sizeof(out));
+
+       for (i = 0; i < npages; i++) {
+               page = alloc_page(GFP_HIGHUSER);
+               if (!page) {
+                       err = -ENOMEM;
+                       mlx5_core_warn(dev, "failed to allocate page\n");
+                       goto out_alloc;
+               }
+               addr = dma_map_page(&dev->pdev->dev, page, 0,
+                                   PAGE_SIZE, DMA_BIDIRECTIONAL);
+               if (dma_mapping_error(&dev->pdev->dev, addr)) {
+                       mlx5_core_warn(dev, "failed dma mapping page\n");
+                       __free_page(page);
+                       err = -ENOMEM;
+                       goto out_alloc;
+               }
+               err = insert_page(dev, addr, page, func_id);
+               if (err) {
+                       mlx5_core_err(dev, "failed to track allocated page\n");
+                       dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+                       __free_page(page);
+                       err = -ENOMEM;
+                       goto out_alloc;
+               }
+               in->pas[i] = cpu_to_be64(addr);
+       }
+
+       in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
+       in->hdr.opmod = cpu_to_be16(MLX5_PAGES_GIVE);
+       in->func_id = cpu_to_be16(func_id);
+       in->num_entries = cpu_to_be16(npages);
+       err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+       mlx5_core_dbg(dev, "err %d\n", err);
+       if (err) {
+               mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err);
+               goto out_alloc;
+       }
+       dev->priv.fw_pages += npages;
+
+       if (out.hdr.status) {
+               err = mlx5_cmd_status_to_err(&out.hdr);
+               if (err) {
+                       mlx5_core_warn(dev, "func_id 0x%x, npages %d, status %d\n", func_id, npages, out.hdr.status);
+                       goto out_alloc;
+               }
+       }
+
+       mlx5_core_dbg(dev, "err %d\n", err);
+
+       goto out_free;
+
+out_alloc:
+       if (notify_fail) {
+               memset(in, 0, inlen);
+               memset(&out, 0, sizeof(out));
+               in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
+               in->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE);
+               if (mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out)))
+                       mlx5_core_warn(dev, "\n");
+       }
+       for (i--; i >= 0; i--) {
+               addr = be64_to_cpu(in->pas[i]);
+               page = remove_page(dev, addr);
+               if (!page) {
+                       mlx5_core_err(dev, "BUG: can't remove page at addr 0x%llx\n",
+                                     addr);
+                       continue;
+               }
+               dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+               __free_page(page);
+       }
+
+out_free:
+       mlx5_vfree(in);
+       return err;
+}
+
+static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
+                        int *nclaimed)
+{
+       struct mlx5_manage_pages_inbox   in;
+       struct mlx5_manage_pages_outbox *out;
+       struct page *page;
+       int num_claimed;
+       int outlen;
+       u64 addr;
+       int err;
+       int i;
+
+       memset(&in, 0, sizeof(in));
+       outlen = sizeof(*out) + npages * sizeof(out->pas[0]);
+       out = mlx5_vzalloc(outlen);
+       if (!out)
+               return -ENOMEM;
+
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
+       in.hdr.opmod = cpu_to_be16(MLX5_PAGES_TAKE);
+       in.func_id = cpu_to_be16(func_id);
+       in.num_entries = cpu_to_be16(npages);
+       mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
+       if (err) {
+               mlx5_core_err(dev, "failed recliaming pages\n");
+               goto out_free;
+       }
+       dev->priv.fw_pages -= npages;
+
+       if (out->hdr.status) {
+               err = mlx5_cmd_status_to_err(&out->hdr);
+               goto out_free;
+       }
+
+       num_claimed = be16_to_cpu(out->num_entries);
+       if (nclaimed)
+               *nclaimed = num_claimed;
+
+       for (i = 0; i < num_claimed; i++) {
+               addr = be64_to_cpu(out->pas[i]);
+               page = remove_page(dev, addr);
+               if (!page) {
+                       mlx5_core_warn(dev, "FW reported unknown DMA address 0x%llx\n", addr);
+               } else {
+                       dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+                       __free_page(page);
+               }
+       }
+
+out_free:
+       mlx5_vfree(out);
+       return err;
+}
+
+static void pages_work_handler(struct work_struct *work)
+{
+       struct mlx5_pages_req *req = container_of(work, struct mlx5_pages_req, work);
+       struct mlx5_core_dev *dev = req->dev;
+       int err = 0;
+
+       if (req->npages < 0)
+               err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL);
+       else if (req->npages > 0)
+               err = give_pages(dev, req->func_id, req->npages, 1);
+
+       if (err)
+               mlx5_core_warn(dev, "%s fail %d\n", req->npages < 0 ?
+                              "reclaim" : "give", err);
+
+       kfree(req);
+}
+
+void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
+                                s16 npages)
+{
+       struct mlx5_pages_req *req;
+
+       req = kzalloc(sizeof(*req), GFP_ATOMIC);
+       if (!req) {
+               mlx5_core_warn(dev, "failed to allocate pages request\n");
+               return;
+       }
+
+       req->dev = dev;
+       req->func_id = func_id;
+       req->npages = npages;
+       INIT_WORK(&req->work, pages_work_handler);
+       queue_work(dev->priv.pg_wq, &req->work);
+}
+
+int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev)
+{
+       s16 uninitialized_var(init_pages);
+       u16 uninitialized_var(func_id);
+       int err;
+
+       err = mlx5_cmd_query_pages(dev, &func_id, NULL, &init_pages);
+       if (err)
+               return err;
+
+       mlx5_core_dbg(dev, "requested %d init pages for func_id 0x%x\n", init_pages, func_id);
+
+       return give_pages(dev, func_id, init_pages, 0);
+}
+
+static int optimal_reclaimed_pages(void)
+{
+       struct mlx5_cmd_prot_block *block;
+       struct mlx5_cmd_layout *lay;
+       int ret;
+
+       ret = (sizeof(lay->in) + sizeof(block->data) -
+              sizeof(struct mlx5_manage_pages_outbox)) / 8;
+
+       return ret;
+}
+
+int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
+{
+       unsigned long end = jiffies + msecs_to_jiffies(5000);
+       struct fw_page *fwp;
+       struct rb_node *p;
+       int err;
+
+       do {
+               p = rb_first(&dev->priv.page_root);
+               if (p) {
+                       fwp = rb_entry(p, struct fw_page, rb_node);
+                       err = reclaim_pages(dev, fwp->func_id, optimal_reclaimed_pages(), NULL);
+                       if (err) {
+                               mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", err);
+                               return err;
+                       }
+               }
+               if (time_after(jiffies, end)) {
+                       mlx5_core_warn(dev, "FW did not return all pages. giving up...\n");
+                       break;
+               }
+       } while (p);
+
+       return 0;
+}
+
+void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
+{
+       dev->priv.page_root = RB_ROOT;
+}
+
+void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
+{
+       /* nothing */
+}
+
+int mlx5_pagealloc_start(struct mlx5_core_dev *dev)
+{
+       dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
+       if (!dev->priv.pg_wq)
+               return -ENOMEM;
+
+       return 0;
+}
+
+void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
+{
+       destroy_workqueue(dev->priv.pg_wq);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pd.c b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
new file mode 100644 (file)
index 0000000..790da5c
--- /dev/null
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+struct mlx5_alloc_pd_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       u8                      rsvd[8];
+};
+
+struct mlx5_alloc_pd_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       __be32                  pdn;
+       u8                      rsvd[4];
+};
+
+struct mlx5_dealloc_pd_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be32                  pdn;
+       u8                      rsvd[4];
+};
+
+struct mlx5_dealloc_pd_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd[8];
+};
+
+int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn)
+{
+       struct mlx5_alloc_pd_mbox_in    in;
+       struct mlx5_alloc_pd_mbox_out   out;
+       int err;
+
+       memset(&in, 0, sizeof(in));
+       memset(&out, 0, sizeof(out));
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_PD);
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+       if (err)
+               return err;
+
+       if (out.hdr.status)
+               return mlx5_cmd_status_to_err(&out.hdr);
+
+       *pdn = be32_to_cpu(out.pdn) & 0xffffff;
+       return err;
+}
+EXPORT_SYMBOL(mlx5_core_alloc_pd);
+
+int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn)
+{
+       struct mlx5_dealloc_pd_mbox_in  in;
+       struct mlx5_dealloc_pd_mbox_out out;
+       int err;
+
+       memset(&in, 0, sizeof(in));
+       memset(&out, 0, sizeof(out));
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_PD);
+       in.pdn = cpu_to_be32(pdn);
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+       if (err)
+               return err;
+
+       if (out.hdr.status)
+               return mlx5_cmd_status_to_err(&out.hdr);
+
+       return err;
+}
+EXPORT_SYMBOL(mlx5_core_dealloc_pd);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
new file mode 100644 (file)
index 0000000..f6afe7b
--- /dev/null
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
+                        int size_in, void *data_out, int size_out,
+                        u16 reg_num, int arg, int write)
+{
+       struct mlx5_access_reg_mbox_in *in = NULL;
+       struct mlx5_access_reg_mbox_out *out = NULL;
+       int err = -ENOMEM;
+
+       in = mlx5_vzalloc(sizeof(*in) + size_in);
+       if (!in)
+               return -ENOMEM;
+
+       out = mlx5_vzalloc(sizeof(*out) + size_out);
+       if (!out)
+               goto ex1;
+
+       memcpy(in->data, data_in, size_in);
+       in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ACCESS_REG);
+       in->hdr.opmod = cpu_to_be16(!write);
+       in->arg = cpu_to_be32(arg);
+       in->register_id = cpu_to_be16(reg_num);
+       err = mlx5_cmd_exec(dev, in, sizeof(*in) + size_in, out,
+                           sizeof(out) + size_out);
+       if (err)
+               goto ex2;
+
+       if (out->hdr.status)
+               err = mlx5_cmd_status_to_err(&out->hdr);
+
+       if (!err)
+               memcpy(data_out, out->data, size_out);
+
+ex2:
+       mlx5_vfree(out);
+ex1:
+       mlx5_vfree(in);
+       return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_access_reg);
+
+
+struct mlx5_reg_pcap {
+       u8                      rsvd0;
+       u8                      port_num;
+       u8                      rsvd1[2];
+       __be32                  caps_127_96;
+       __be32                  caps_95_64;
+       __be32                  caps_63_32;
+       __be32                  caps_31_0;
+};
+
+int mlx5_set_port_caps(struct mlx5_core_dev *dev, int port_num, u32 caps)
+{
+       struct mlx5_reg_pcap in;
+       struct mlx5_reg_pcap out;
+       int err;
+
+       memset(&in, 0, sizeof(in));
+       in.caps_127_96 = cpu_to_be32(caps);
+       in.port_num = port_num;
+
+       err = mlx5_core_access_reg(dev, &in, sizeof(in), &out,
+                                  sizeof(out), MLX5_REG_PCAP, 0, 1);
+
+       return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_caps);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
new file mode 100644 (file)
index 0000000..54faf8b
--- /dev/null
@@ -0,0 +1,301 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#include <linux/gfp.h>
+#include <linux/export.h>
+#include <linux/mlx5/cmd.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+
+void mlx5_qp_event(struct mlx5_core_dev *dev, u32 qpn, int event_type)
+{
+       struct mlx5_qp_table *table = &dev->priv.qp_table;
+       struct mlx5_core_qp *qp;
+
+       spin_lock(&table->lock);
+
+       qp = radix_tree_lookup(&table->tree, qpn);
+       if (qp)
+               atomic_inc(&qp->refcount);
+
+       spin_unlock(&table->lock);
+
+       if (!qp) {
+               mlx5_core_warn(dev, "Async event for bogus QP 0x%x\n", qpn);
+               return;
+       }
+
+       qp->event(qp, event_type);
+
+       if (atomic_dec_and_test(&qp->refcount))
+               complete(&qp->free);
+}
+
+int mlx5_core_create_qp(struct mlx5_core_dev *dev,
+                       struct mlx5_core_qp *qp,
+                       struct mlx5_create_qp_mbox_in *in,
+                       int inlen)
+{
+       struct mlx5_qp_table *table = &dev->priv.qp_table;
+       struct mlx5_create_qp_mbox_out out;
+       struct mlx5_destroy_qp_mbox_in din;
+       struct mlx5_destroy_qp_mbox_out dout;
+       int err;
+
+       memset(&dout, 0, sizeof(dout));
+       in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP);
+
+       err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+       if (err) {
+               mlx5_core_warn(dev, "ret %d", err);
+               return err;
+       }
+
+       if (out.hdr.status) {
+               pr_warn("current num of QPs 0x%x\n", atomic_read(&dev->num_qps));
+               return mlx5_cmd_status_to_err(&out.hdr);
+       }
+
+       qp->qpn = be32_to_cpu(out.qpn) & 0xffffff;
+       mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
+
+       spin_lock_irq(&table->lock);
+       err = radix_tree_insert(&table->tree, qp->qpn, qp);
+       spin_unlock_irq(&table->lock);
+       if (err) {
+               mlx5_core_warn(dev, "err %d", err);
+               goto err_cmd;
+       }
+
+       err = mlx5_debug_qp_add(dev, qp);
+       if (err)
+               mlx5_core_dbg(dev, "failed adding QP 0x%x to debug file system\n",
+                             qp->qpn);
+
+       qp->pid = current->pid;
+       atomic_set(&qp->refcount, 1);
+       atomic_inc(&dev->num_qps);
+       init_completion(&qp->free);
+
+       return 0;
+
+err_cmd:
+       memset(&din, 0, sizeof(din));
+       memset(&dout, 0, sizeof(dout));
+       din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_QP);
+       din.qpn = cpu_to_be32(qp->qpn);
+       mlx5_cmd_exec(dev, &din, sizeof(din), &out, sizeof(dout));
+
+       return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_create_qp);
+
+int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
+                        struct mlx5_core_qp *qp)
+{
+       struct mlx5_destroy_qp_mbox_in in;
+       struct mlx5_destroy_qp_mbox_out out;
+       struct mlx5_qp_table *table = &dev->priv.qp_table;
+       unsigned long flags;
+       int err;
+
+       mlx5_debug_qp_remove(dev, qp);
+
+       spin_lock_irqsave(&table->lock, flags);
+       radix_tree_delete(&table->tree, qp->qpn);
+       spin_unlock_irqrestore(&table->lock, flags);
+
+       if (atomic_dec_and_test(&qp->refcount))
+               complete(&qp->free);
+       wait_for_completion(&qp->free);
+
+       memset(&in, 0, sizeof(in));
+       memset(&out, 0, sizeof(out));
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_QP);
+       in.qpn = cpu_to_be32(qp->qpn);
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+       if (err)
+               return err;
+
+       if (out.hdr.status)
+               return mlx5_cmd_status_to_err(&out.hdr);
+
+       atomic_dec(&dev->num_qps);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp);
+
+int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state,
+                       enum mlx5_qp_state new_state,
+                       struct mlx5_modify_qp_mbox_in *in, int sqd_event,
+                       struct mlx5_core_qp *qp)
+{
+       static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
+               [MLX5_QP_STATE_RST] = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+                       [MLX5_QP_STATE_INIT]    = MLX5_CMD_OP_RST2INIT_QP,
+               },
+               [MLX5_QP_STATE_INIT]  = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+                       [MLX5_QP_STATE_INIT]    = MLX5_CMD_OP_INIT2INIT_QP,
+                       [MLX5_QP_STATE_RTR]     = MLX5_CMD_OP_INIT2RTR_QP,
+               },
+               [MLX5_QP_STATE_RTR]   = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+                       [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_RTR2RTS_QP,
+               },
+               [MLX5_QP_STATE_RTS]   = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+                       [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_RTS2RTS_QP,
+                       [MLX5_QP_STATE_SQD]     = MLX5_CMD_OP_RTS2SQD_QP,
+               },
+               [MLX5_QP_STATE_SQD] = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+                       [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_SQD2RTS_QP,
+                       [MLX5_QP_STATE_SQD]     = MLX5_CMD_OP_SQD2SQD_QP,
+               },
+               [MLX5_QP_STATE_SQER] = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+                       [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_SQERR2RTS_QP,
+               },
+               [MLX5_QP_STATE_ERR] = {
+                       [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
+                       [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
+               }
+       };
+
+       struct mlx5_modify_qp_mbox_out out;
+       int err = 0;
+       u16 op;
+
+       if (cur_state >= MLX5_QP_NUM_STATE || new_state >= MLX5_QP_NUM_STATE ||
+           !optab[cur_state][new_state])
+               return -EINVAL;
+
+       memset(&out, 0, sizeof(out));
+       op = optab[cur_state][new_state];
+       in->hdr.opcode = cpu_to_be16(op);
+       in->qpn = cpu_to_be32(qp->qpn);
+       err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out));
+       if (err)
+               return err;
+
+       return mlx5_cmd_status_to_err(&out.hdr);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_qp_modify);
+
+void mlx5_init_qp_table(struct mlx5_core_dev *dev)
+{
+       struct mlx5_qp_table *table = &dev->priv.qp_table;
+
+       spin_lock_init(&table->lock);
+       INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+       mlx5_qp_debugfs_init(dev);
+}
+
+void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev)
+{
+       mlx5_qp_debugfs_cleanup(dev);
+}
+
+int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
+                      struct mlx5_query_qp_mbox_out *out, int outlen)
+{
+       struct mlx5_query_qp_mbox_in in;
+       int err;
+
+       memset(&in, 0, sizeof(in));
+       memset(out, 0, outlen);
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_QP);
+       in.qpn = cpu_to_be32(qp->qpn);
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
+       if (err)
+               return err;
+
+       if (out->hdr.status)
+               return mlx5_cmd_status_to_err(&out->hdr);
+
+       return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_qp_query);
+
+int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn)
+{
+       struct mlx5_alloc_xrcd_mbox_in in;
+       struct mlx5_alloc_xrcd_mbox_out out;
+       int err;
+
+       memset(&in, 0, sizeof(in));
+       memset(&out, 0, sizeof(out));
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_XRCD);
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+       if (err)
+               return err;
+
+       if (out.hdr.status)
+               err = mlx5_cmd_status_to_err(&out.hdr);
+       else
+               *xrcdn = be32_to_cpu(out.xrcdn);
+
+       return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_xrcd_alloc);
+
+int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn)
+{
+       struct mlx5_dealloc_xrcd_mbox_in in;
+       struct mlx5_dealloc_xrcd_mbox_out out;
+       int err;
+
+       memset(&in, 0, sizeof(in));
+       memset(&out, 0, sizeof(out));
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_XRCD);
+       in.xrcdn = cpu_to_be32(xrcdn);
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+       if (err)
+               return err;
+
+       if (out.hdr.status)
+               err = mlx5_cmd_status_to_err(&out.hdr);
+
+       return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
new file mode 100644 (file)
index 0000000..38bce93
--- /dev/null
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include <linux/mlx5/srq.h>
+#include <rdma/ib_verbs.h>
+#include "mlx5_core.h"
+
+void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type)
+{
+       struct mlx5_srq_table *table = &dev->priv.srq_table;
+       struct mlx5_core_srq *srq;
+
+       spin_lock(&table->lock);
+
+       srq = radix_tree_lookup(&table->tree, srqn);
+       if (srq)
+               atomic_inc(&srq->refcount);
+
+       spin_unlock(&table->lock);
+
+       if (!srq) {
+               mlx5_core_warn(dev, "Async event for bogus SRQ 0x%08x\n", srqn);
+               return;
+       }
+
+       srq->event(srq, event_type);
+
+       if (atomic_dec_and_test(&srq->refcount))
+               complete(&srq->free);
+}
+
+struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn)
+{
+       struct mlx5_srq_table *table = &dev->priv.srq_table;
+       struct mlx5_core_srq *srq;
+
+       spin_lock(&table->lock);
+
+       srq = radix_tree_lookup(&table->tree, srqn);
+       if (srq)
+               atomic_inc(&srq->refcount);
+
+       spin_unlock(&table->lock);
+
+       return srq;
+}
+EXPORT_SYMBOL(mlx5_core_get_srq);
+
+int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+                        struct mlx5_create_srq_mbox_in *in, int inlen)
+{
+       struct mlx5_create_srq_mbox_out out;
+       struct mlx5_srq_table *table = &dev->priv.srq_table;
+       struct mlx5_destroy_srq_mbox_in din;
+       struct mlx5_destroy_srq_mbox_out dout;
+       int err;
+
+       memset(&out, 0, sizeof(out));
+       in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_SRQ);
+       err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+       if (err)
+               return err;
+
+       if (out.hdr.status)
+               return mlx5_cmd_status_to_err(&out.hdr);
+
+       srq->srqn = be32_to_cpu(out.srqn) & 0xffffff;
+
+       atomic_set(&srq->refcount, 1);
+       init_completion(&srq->free);
+
+       spin_lock_irq(&table->lock);
+       err = radix_tree_insert(&table->tree, srq->srqn, srq);
+       spin_unlock_irq(&table->lock);
+       if (err) {
+               mlx5_core_warn(dev, "err %d, srqn 0x%x\n", err, srq->srqn);
+               goto err_cmd;
+       }
+
+       return 0;
+
+err_cmd:
+       memset(&din, 0, sizeof(din));
+       memset(&dout, 0, sizeof(dout));
+       din.srqn = cpu_to_be32(srq->srqn);
+       din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ);
+       mlx5_cmd_exec(dev, &din, sizeof(din), &dout, sizeof(dout));
+       return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_srq);
+
+int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
+{
+       struct mlx5_destroy_srq_mbox_in in;
+       struct mlx5_destroy_srq_mbox_out out;
+       struct mlx5_srq_table *table = &dev->priv.srq_table;
+       struct mlx5_core_srq *tmp;
+       int err;
+
+       spin_lock_irq(&table->lock);
+       tmp = radix_tree_delete(&table->tree, srq->srqn);
+       spin_unlock_irq(&table->lock);
+       if (!tmp) {
+               mlx5_core_warn(dev, "srq 0x%x not found in tree\n", srq->srqn);
+               return -EINVAL;
+       }
+       if (tmp != srq) {
+               mlx5_core_warn(dev, "corruption on srqn 0x%x\n", srq->srqn);
+               return -EINVAL;
+       }
+
+       memset(&in, 0, sizeof(in));
+       memset(&out, 0, sizeof(out));
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ);
+       in.srqn = cpu_to_be32(srq->srqn);
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+       if (err)
+               return err;
+
+       if (out.hdr.status)
+               return mlx5_cmd_status_to_err(&out.hdr);
+
+       if (atomic_dec_and_test(&srq->refcount))
+               complete(&srq->free);
+       wait_for_completion(&srq->free);
+
+       return 0;
+}
+EXPORT_SYMBOL(mlx5_core_destroy_srq);
+
+int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+                       struct mlx5_query_srq_mbox_out *out)
+{
+       struct mlx5_query_srq_mbox_in in;
+       int err;
+
+       memset(&in, 0, sizeof(in));
+       memset(out, 0, sizeof(*out));
+
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SRQ);
+       in.srqn = cpu_to_be32(srq->srqn);
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out));
+       if (err)
+               return err;
+
+       if (out->hdr.status)
+               return mlx5_cmd_status_to_err(&out->hdr);
+
+       return err;
+}
+EXPORT_SYMBOL(mlx5_core_query_srq);
+
+int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+                     u16 lwm, int is_srq)
+{
+       struct mlx5_arm_srq_mbox_in     in;
+       struct mlx5_arm_srq_mbox_out    out;
+       int err;
+
+       memset(&in, 0, sizeof(in));
+       memset(&out, 0, sizeof(out));
+
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ARM_RQ);
+       in.hdr.opmod = cpu_to_be16(!!is_srq);
+       in.srqn = cpu_to_be32(srq->srqn);
+       in.lwm = cpu_to_be16(lwm);
+
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+       if (err)
+               return err;
+
+       if (out.hdr.status)
+               return mlx5_cmd_status_to_err(&out.hdr);
+
+       return err;
+}
+EXPORT_SYMBOL(mlx5_core_arm_srq);
+
+void mlx5_init_srq_table(struct mlx5_core_dev *dev)
+{
+       struct mlx5_srq_table *table = &dev->priv.srq_table;
+
+       spin_lock_init(&table->lock);
+       INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+}
+
+void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev)
+{
+       /* nothing */
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
new file mode 100644 (file)
index 0000000..71d4a39
--- /dev/null
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+enum {
+       NUM_DRIVER_UARS         = 4,
+       NUM_LOW_LAT_UUARS       = 4,
+};
+
+
+struct mlx5_alloc_uar_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       u8                      rsvd[8];
+};
+
+struct mlx5_alloc_uar_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       __be32                  uarn;
+       u8                      rsvd[4];
+};
+
+struct mlx5_free_uar_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be32                  uarn;
+       u8                      rsvd[4];
+};
+
+struct mlx5_free_uar_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd[8];
+};
+
+int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn)
+{
+       struct mlx5_alloc_uar_mbox_in   in;
+       struct mlx5_alloc_uar_mbox_out  out;
+       int err;
+
+       memset(&in, 0, sizeof(in));
+       memset(&out, 0, sizeof(out));
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_UAR);
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+       if (err)
+               goto ex;
+
+       if (out.hdr.status) {
+               err = mlx5_cmd_status_to_err(&out.hdr);
+               goto ex;
+       }
+
+       *uarn = be32_to_cpu(out.uarn) & 0xffffff;
+
+ex:
+       return err;
+}
+EXPORT_SYMBOL(mlx5_cmd_alloc_uar);
+
+int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn)
+{
+       struct mlx5_free_uar_mbox_in    in;
+       struct mlx5_free_uar_mbox_out   out;
+       int err;
+
+       memset(&in, 0, sizeof(in));
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_UAR);
+       in.uarn = cpu_to_be32(uarn);
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+       if (err)
+               goto ex;
+
+       if (out.hdr.status)
+               err = mlx5_cmd_status_to_err(&out.hdr);
+
+ex:
+       return err;
+}
+EXPORT_SYMBOL(mlx5_cmd_free_uar);
+
+static int need_uuar_lock(int uuarn)
+{
+       int tot_uuars = NUM_DRIVER_UARS * MLX5_BF_REGS_PER_PAGE;
+
+       if (uuarn == 0 || tot_uuars - NUM_LOW_LAT_UUARS)
+               return 0;
+
+       return 1;
+}
+
+int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari)
+{
+       int tot_uuars = NUM_DRIVER_UARS * MLX5_BF_REGS_PER_PAGE;
+       struct mlx5_bf *bf;
+       phys_addr_t addr;
+       int err;
+       int i;
+
+       uuari->num_uars = NUM_DRIVER_UARS;
+       uuari->num_low_latency_uuars = NUM_LOW_LAT_UUARS;
+
+       mutex_init(&uuari->lock);
+       uuari->uars = kcalloc(uuari->num_uars, sizeof(*uuari->uars), GFP_KERNEL);
+       if (!uuari->uars)
+               return -ENOMEM;
+
+       uuari->bfs = kcalloc(tot_uuars, sizeof(*uuari->bfs), GFP_KERNEL);
+       if (!uuari->bfs) {
+               err = -ENOMEM;
+               goto out_uars;
+       }
+
+       uuari->bitmap = kcalloc(BITS_TO_LONGS(tot_uuars), sizeof(*uuari->bitmap),
+                               GFP_KERNEL);
+       if (!uuari->bitmap) {
+               err = -ENOMEM;
+               goto out_bfs;
+       }
+
+       uuari->count = kcalloc(tot_uuars, sizeof(*uuari->count), GFP_KERNEL);
+       if (!uuari->count) {
+               err = -ENOMEM;
+               goto out_bitmap;
+       }
+
+       for (i = 0; i < uuari->num_uars; i++) {
+               err = mlx5_cmd_alloc_uar(dev, &uuari->uars[i].index);
+               if (err)
+                       goto out_count;
+
+               addr = dev->iseg_base + ((phys_addr_t)(uuari->uars[i].index) << PAGE_SHIFT);
+               uuari->uars[i].map = ioremap(addr, PAGE_SIZE);
+               if (!uuari->uars[i].map) {
+                       mlx5_cmd_free_uar(dev, uuari->uars[i].index);
+                       goto out_count;
+               }
+               mlx5_core_dbg(dev, "allocated uar index 0x%x, mmaped at %p\n",
+                             uuari->uars[i].index, uuari->uars[i].map);
+       }
+
+       for (i = 0; i < tot_uuars; i++) {
+               bf = &uuari->bfs[i];
+
+               bf->buf_size = dev->caps.bf_reg_size / 2;
+               bf->uar = &uuari->uars[i / MLX5_BF_REGS_PER_PAGE];
+               bf->regreg = uuari->uars[i / MLX5_BF_REGS_PER_PAGE].map;
+               bf->reg = NULL; /* Add WC support */
+               bf->offset = (i % MLX5_BF_REGS_PER_PAGE) * dev->caps.bf_reg_size +
+                       MLX5_BF_OFFSET;
+               bf->need_lock = need_uuar_lock(i);
+               spin_lock_init(&bf->lock);
+               spin_lock_init(&bf->lock32);
+               bf->uuarn = i;
+       }
+
+       return 0;
+
+out_count:
+       for (i--; i >= 0; i--) {
+               iounmap(uuari->uars[i].map);
+               mlx5_cmd_free_uar(dev, uuari->uars[i].index);
+       }
+       kfree(uuari->count);
+
+out_bitmap:
+       kfree(uuari->bitmap);
+
+out_bfs:
+       kfree(uuari->bfs);
+
+out_uars:
+       kfree(uuari->uars);
+       return err;
+}
+
+int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari)
+{
+       int i = uuari->num_uars;
+
+       for (i--; i >= 0; i--) {
+               iounmap(uuari->uars[i].map);
+               mlx5_cmd_free_uar(dev, uuari->uars[i].index);
+       }
+
+       kfree(uuari->count);
+       kfree(uuari->bitmap);
+       kfree(uuari->bfs);
+       kfree(uuari->uars);
+
+       return 0;
+}
diff --git a/include/linux/mlx5/cmd.h b/include/linux/mlx5/cmd.h
new file mode 100644 (file)
index 0000000..2826a4b
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_CMD_H
+#define MLX5_CMD_H
+
+#include <linux/types.h>
+
+struct manage_pages_layout {
+       u64     ptr;
+       u32     reserved;
+       u16     num_entries;
+       u16     func_id;
+};
+
+
+struct mlx5_cmd_alloc_uar_imm_out {
+       u32     rsvd[3];
+       u32     uarn;
+};
+
+#endif /* MLX5_CMD_H */
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
new file mode 100644 (file)
index 0000000..3db67f7
--- /dev/null
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_CORE_CQ_H
+#define MLX5_CORE_CQ_H
+
+#include <rdma/ib_verbs.h>
+#include <linux/mlx5/driver.h>
+
+
+struct mlx5_core_cq {
+       u32                     cqn;
+       int                     cqe_sz;
+       __be32                 *set_ci_db;
+       __be32                 *arm_db;
+       atomic_t                refcount;
+       struct completion       free;
+       unsigned                vector;
+       int                     irqn;
+       void (*comp)            (struct mlx5_core_cq *);
+       void (*event)           (struct mlx5_core_cq *, enum mlx5_event);
+       struct mlx5_uar        *uar;
+       u32                     cons_index;
+       unsigned                arm_sn;
+       struct mlx5_rsc_debug   *dbg;
+       int                     pid;
+};
+
+
+enum {
+       MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR              = 0x01,
+       MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR               = 0x02,
+       MLX5_CQE_SYNDROME_LOCAL_PROT_ERR                = 0x04,
+       MLX5_CQE_SYNDROME_WR_FLUSH_ERR                  = 0x05,
+       MLX5_CQE_SYNDROME_MW_BIND_ERR                   = 0x06,
+       MLX5_CQE_SYNDROME_BAD_RESP_ERR                  = 0x10,
+       MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR              = 0x11,
+       MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR          = 0x12,
+       MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR             = 0x13,
+       MLX5_CQE_SYNDROME_REMOTE_OP_ERR                 = 0x14,
+       MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR       = 0x15,
+       MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR             = 0x16,
+       MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR            = 0x22,
+};
+
+enum {
+       MLX5_CQE_OWNER_MASK     = 1,
+       MLX5_CQE_REQ            = 0,
+       MLX5_CQE_RESP_WR_IMM    = 1,
+       MLX5_CQE_RESP_SEND      = 2,
+       MLX5_CQE_RESP_SEND_IMM  = 3,
+       MLX5_CQE_RESP_SEND_INV  = 4,
+       MLX5_CQE_RESIZE_CQ      = 0xff, /* TBD */
+       MLX5_CQE_REQ_ERR        = 13,
+       MLX5_CQE_RESP_ERR       = 14,
+};
+
+enum {
+       MLX5_CQ_MODIFY_RESEIZE = 0,
+       MLX5_CQ_MODIFY_MODER = 1,
+       MLX5_CQ_MODIFY_MAPPING = 2,
+};
+
+struct mlx5_cq_modify_params {
+       int     type;
+       union {
+               struct {
+                       u32     page_offset;
+                       u8      log_cq_size;
+               } resize;
+
+               struct {
+               } moder;
+
+               struct {
+               } mapping;
+       } params;
+};
+
+enum {
+       CQE_SIZE_64 = 0,
+       CQE_SIZE_128 = 1,
+};
+
+static inline int cqe_sz_to_mlx_sz(u8 size)
+{
+       return size == 64 ? CQE_SIZE_64 : CQE_SIZE_128;
+}
+
+static inline void mlx5_cq_set_ci(struct mlx5_core_cq *cq)
+{
+       *cq->set_ci_db = cpu_to_be32(cq->cons_index & 0xffffff);
+}
+
+enum {
+       MLX5_CQ_DB_REQ_NOT_SOL          = 1 << 24,
+       MLX5_CQ_DB_REQ_NOT              = 0 << 24
+};
+
+static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd,
+                              void __iomem *uar_page,
+                              spinlock_t *doorbell_lock)
+{
+       __be32 doorbell[2];
+       u32 sn;
+       u32 ci;
+
+       sn = cq->arm_sn & 3;
+       ci = cq->cons_index & 0xffffff;
+
+       *cq->arm_db = cpu_to_be32(sn << 28 | cmd | ci);
+
+       /* Make sure that the doorbell record in host memory is
+        * written before ringing the doorbell via PCI MMIO.
+        */
+       wmb();
+
+       doorbell[0] = cpu_to_be32(sn << 28 | cmd | ci);
+       doorbell[1] = cpu_to_be32(cq->cqn);
+
+       mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, doorbell_lock);
+}
+
+int mlx5_init_cq_table(struct mlx5_core_dev *dev);
+void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev);
+int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+                       struct mlx5_create_cq_mbox_in *in, int inlen);
+int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
+int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+                      struct mlx5_query_cq_mbox_out *out);
+int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+                       int type, struct mlx5_cq_modify_params *params);
+int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
+void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
+
+#endif /* MLX5_CORE_CQ_H */
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
new file mode 100644 (file)
index 0000000..5139091
--- /dev/null
@@ -0,0 +1,893 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_DEVICE_H
+#define MLX5_DEVICE_H
+
+#include <linux/types.h>
+#include <rdma/ib_verbs.h>
+
+#if defined(__LITTLE_ENDIAN)
+#define MLX5_SET_HOST_ENDIANNESS       0
+#elif defined(__BIG_ENDIAN)
+#define MLX5_SET_HOST_ENDIANNESS       0x80
+#else
+#error Host endianness not defined
+#endif
+
+enum {
+       MLX5_MAX_COMMANDS               = 32,
+       MLX5_CMD_DATA_BLOCK_SIZE        = 512,
+       MLX5_PCI_CMD_XPORT              = 7,
+};
+
+enum {
+       MLX5_EXTENDED_UD_AV             = 0x80000000,
+};
+
+enum {
+       MLX5_CQ_STATE_ARMED             = 9,
+       MLX5_CQ_STATE_ALWAYS_ARMED      = 0xb,
+       MLX5_CQ_STATE_FIRED             = 0xa,
+};
+
+enum {
+       MLX5_STAT_RATE_OFFSET   = 5,
+};
+
+enum {
+       MLX5_INLINE_SEG = 0x80000000,
+};
+
+enum {
+       MLX5_PERM_LOCAL_READ    = 1 << 2,
+       MLX5_PERM_LOCAL_WRITE   = 1 << 3,
+       MLX5_PERM_REMOTE_READ   = 1 << 4,
+       MLX5_PERM_REMOTE_WRITE  = 1 << 5,
+       MLX5_PERM_ATOMIC        = 1 << 6,
+       MLX5_PERM_UMR_EN        = 1 << 7,
+};
+
+enum {
+       MLX5_PCIE_CTRL_SMALL_FENCE      = 1 << 0,
+       MLX5_PCIE_CTRL_RELAXED_ORDERING = 1 << 2,
+       MLX5_PCIE_CTRL_NO_SNOOP         = 1 << 3,
+       MLX5_PCIE_CTRL_TLP_PROCE_EN     = 1 << 6,
+       MLX5_PCIE_CTRL_TPH_MASK         = 3 << 4,
+};
+
+enum {
+       MLX5_ACCESS_MODE_PA     = 0,
+       MLX5_ACCESS_MODE_MTT    = 1,
+       MLX5_ACCESS_MODE_KLM    = 2
+};
+
+enum {
+       MLX5_MKEY_REMOTE_INVAL  = 1 << 24,
+       MLX5_MKEY_FLAG_SYNC_UMR = 1 << 29,
+       MLX5_MKEY_BSF_EN        = 1 << 30,
+       MLX5_MKEY_LEN64         = 1 << 31,
+};
+
+enum {
+       MLX5_EN_RD      = (u64)1,
+       MLX5_EN_WR      = (u64)2
+};
+
+enum {
+       MLX5_BF_REGS_PER_PAGE   = 4,
+       MLX5_MAX_UAR_PAGES      = 1 << 8,
+       MLX5_MAX_UUARS          = MLX5_MAX_UAR_PAGES * MLX5_BF_REGS_PER_PAGE,
+};
+
+enum {
+       MLX5_MKEY_MASK_LEN              = 1ull << 0,
+       MLX5_MKEY_MASK_PAGE_SIZE        = 1ull << 1,
+       MLX5_MKEY_MASK_START_ADDR       = 1ull << 6,
+       MLX5_MKEY_MASK_PD               = 1ull << 7,
+       MLX5_MKEY_MASK_EN_RINVAL        = 1ull << 8,
+       MLX5_MKEY_MASK_BSF_EN           = 1ull << 12,
+       MLX5_MKEY_MASK_KEY              = 1ull << 13,
+       MLX5_MKEY_MASK_QPN              = 1ull << 14,
+       MLX5_MKEY_MASK_LR               = 1ull << 17,
+       MLX5_MKEY_MASK_LW               = 1ull << 18,
+       MLX5_MKEY_MASK_RR               = 1ull << 19,
+       MLX5_MKEY_MASK_RW               = 1ull << 20,
+       MLX5_MKEY_MASK_A                = 1ull << 21,
+       MLX5_MKEY_MASK_SMALL_FENCE      = 1ull << 23,
+       MLX5_MKEY_MASK_FREE             = 1ull << 29,
+};
+
+enum mlx5_event {
+       MLX5_EVENT_TYPE_COMP               = 0x0,
+
+       MLX5_EVENT_TYPE_PATH_MIG           = 0x01,
+       MLX5_EVENT_TYPE_COMM_EST           = 0x02,
+       MLX5_EVENT_TYPE_SQ_DRAINED         = 0x03,
+       MLX5_EVENT_TYPE_SRQ_LAST_WQE       = 0x13,
+       MLX5_EVENT_TYPE_SRQ_RQ_LIMIT       = 0x14,
+
+       MLX5_EVENT_TYPE_CQ_ERROR           = 0x04,
+       MLX5_EVENT_TYPE_WQ_CATAS_ERROR     = 0x05,
+       MLX5_EVENT_TYPE_PATH_MIG_FAILED    = 0x07,
+       MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10,
+       MLX5_EVENT_TYPE_WQ_ACCESS_ERROR    = 0x11,
+       MLX5_EVENT_TYPE_SRQ_CATAS_ERROR    = 0x12,
+
+       MLX5_EVENT_TYPE_INTERNAL_ERROR     = 0x08,
+       MLX5_EVENT_TYPE_PORT_CHANGE        = 0x09,
+       MLX5_EVENT_TYPE_GPIO_EVENT         = 0x15,
+       MLX5_EVENT_TYPE_REMOTE_CONFIG      = 0x19,
+
+       MLX5_EVENT_TYPE_DB_BF_CONGESTION   = 0x1a,
+       MLX5_EVENT_TYPE_STALL_EVENT        = 0x1b,
+
+       MLX5_EVENT_TYPE_CMD                = 0x0a,
+       MLX5_EVENT_TYPE_PAGE_REQUEST       = 0xb,
+};
+
+enum {
+       MLX5_PORT_CHANGE_SUBTYPE_DOWN           = 1,
+       MLX5_PORT_CHANGE_SUBTYPE_ACTIVE         = 4,
+       MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED    = 5,
+       MLX5_PORT_CHANGE_SUBTYPE_LID            = 6,
+       MLX5_PORT_CHANGE_SUBTYPE_PKEY           = 7,
+       MLX5_PORT_CHANGE_SUBTYPE_GUID           = 8,
+       MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG   = 9,
+};
+
+enum {
+       MLX5_DEV_CAP_FLAG_RC            = 1LL <<  0,
+       MLX5_DEV_CAP_FLAG_UC            = 1LL <<  1,
+       MLX5_DEV_CAP_FLAG_UD            = 1LL <<  2,
+       MLX5_DEV_CAP_FLAG_XRC           = 1LL <<  3,
+       MLX5_DEV_CAP_FLAG_SRQ           = 1LL <<  6,
+       MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1LL <<  8,
+       MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1LL <<  9,
+       MLX5_DEV_CAP_FLAG_APM           = 1LL << 17,
+       MLX5_DEV_CAP_FLAG_ATOMIC        = 1LL << 18,
+       MLX5_DEV_CAP_FLAG_ON_DMND_PG    = 1LL << 24,
+       MLX5_DEV_CAP_FLAG_RESIZE_SRQ    = 1LL << 32,
+       MLX5_DEV_CAP_FLAG_REMOTE_FENCE  = 1LL << 38,
+       MLX5_DEV_CAP_FLAG_TLP_HINTS     = 1LL << 39,
+       MLX5_DEV_CAP_FLAG_SIG_HAND_OVER = 1LL << 40,
+       MLX5_DEV_CAP_FLAG_DCT           = 1LL << 41,
+       MLX5_DEV_CAP_FLAG_CMDIF_CSUM    = 1LL << 46,
+};
+
+enum {
+       MLX5_OPCODE_NOP                 = 0x00,
+       MLX5_OPCODE_SEND_INVAL          = 0x01,
+       MLX5_OPCODE_RDMA_WRITE          = 0x08,
+       MLX5_OPCODE_RDMA_WRITE_IMM      = 0x09,
+       MLX5_OPCODE_SEND                = 0x0a,
+       MLX5_OPCODE_SEND_IMM            = 0x0b,
+       MLX5_OPCODE_RDMA_READ           = 0x10,
+       MLX5_OPCODE_ATOMIC_CS           = 0x11,
+       MLX5_OPCODE_ATOMIC_FA           = 0x12,
+       MLX5_OPCODE_ATOMIC_MASKED_CS    = 0x14,
+       MLX5_OPCODE_ATOMIC_MASKED_FA    = 0x15,
+       MLX5_OPCODE_BIND_MW             = 0x18,
+       MLX5_OPCODE_CONFIG_CMD          = 0x1f,
+
+       MLX5_RECV_OPCODE_RDMA_WRITE_IMM = 0x00,
+       MLX5_RECV_OPCODE_SEND           = 0x01,
+       MLX5_RECV_OPCODE_SEND_IMM       = 0x02,
+       MLX5_RECV_OPCODE_SEND_INVAL     = 0x03,
+
+       MLX5_CQE_OPCODE_ERROR           = 0x1e,
+       MLX5_CQE_OPCODE_RESIZE          = 0x16,
+
+       MLX5_OPCODE_SET_PSV             = 0x20,
+       MLX5_OPCODE_GET_PSV             = 0x21,
+       MLX5_OPCODE_CHECK_PSV           = 0x22,
+       MLX5_OPCODE_RGET_PSV            = 0x26,
+       MLX5_OPCODE_RCHECK_PSV          = 0x27,
+
+       MLX5_OPCODE_UMR                 = 0x25,
+
+};
+
+enum {
+       MLX5_SET_PORT_RESET_QKEY        = 0,
+       MLX5_SET_PORT_GUID0             = 16,
+       MLX5_SET_PORT_NODE_GUID         = 17,
+       MLX5_SET_PORT_SYS_GUID          = 18,
+       MLX5_SET_PORT_GID_TABLE         = 19,
+       MLX5_SET_PORT_PKEY_TABLE        = 20,
+};
+
+enum {
+       MLX5_MAX_PAGE_SHIFT             = 31
+};
+
+struct mlx5_inbox_hdr {
+       __be16          opcode;
+       u8              rsvd[4];
+       __be16          opmod;
+};
+
+struct mlx5_outbox_hdr {
+       u8              status;
+       u8              rsvd[3];
+       __be32          syndrome;
+};
+
+struct mlx5_cmd_query_adapter_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       u8                      rsvd[8];
+};
+
+struct mlx5_cmd_query_adapter_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd0[24];
+       u8                      intapin;
+       u8                      rsvd1[13];
+       __be16                  vsd_vendor_id;
+       u8                      vsd[208];
+       u8                      vsd_psid[16];
+};
+
+struct mlx5_hca_cap {
+       u8      rsvd1[16];
+       u8      log_max_srq_sz;
+       u8      log_max_qp_sz;
+       u8      rsvd2;
+       u8      log_max_qp;
+       u8      log_max_strq_sz;
+       u8      log_max_srqs;
+       u8      rsvd4[2];
+       u8      rsvd5;
+       u8      log_max_cq_sz;
+       u8      rsvd6;
+       u8      log_max_cq;
+       u8      log_max_eq_sz;
+       u8      log_max_mkey;
+       u8      rsvd7;
+       u8      log_max_eq;
+       u8      max_indirection;
+       u8      log_max_mrw_sz;
+       u8      log_max_bsf_list_sz;
+       u8      log_max_klm_list_sz;
+       u8      rsvd_8_0;
+       u8      log_max_ra_req_dc;
+       u8      rsvd_8_1;
+       u8      log_max_ra_res_dc;
+       u8      rsvd9;
+       u8      log_max_ra_req_qp;
+       u8      rsvd10;
+       u8      log_max_ra_res_qp;
+       u8      rsvd11[4];
+       __be16  max_qp_count;
+       __be16  rsvd12;
+       u8      rsvd13;
+       u8      local_ca_ack_delay;
+       u8      rsvd14;
+       u8      num_ports;
+       u8      log_max_msg;
+       u8      rsvd15[3];
+       __be16  stat_rate_support;
+       u8      rsvd16[2];
+       __be64  flags;
+       u8      rsvd17;
+       u8      uar_sz;
+       u8      rsvd18;
+       u8      log_pg_sz;
+       __be16  bf_log_bf_reg_size;
+       u8      rsvd19[4];
+       __be16  max_desc_sz_sq;
+       u8      rsvd20[2];
+       __be16  max_desc_sz_rq;
+       u8      rsvd21[2];
+       __be16  max_desc_sz_sq_dc;
+       u8      rsvd22[4];
+       __be16  max_qp_mcg;
+       u8      rsvd23;
+       u8      log_max_mcg;
+       u8      rsvd24;
+       u8      log_max_pd;
+       u8      rsvd25;
+       u8      log_max_xrcd;
+       u8      rsvd26[40];
+       __be32  uar_page_sz;
+       u8      rsvd27[28];
+       u8      log_msx_atomic_size_qp;
+       u8      rsvd28[2];
+       u8      log_msx_atomic_size_dc;
+       u8      rsvd29[76];
+};
+
+
+struct mlx5_cmd_query_hca_cap_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       u8                      rsvd[8];
+};
+
+
+struct mlx5_cmd_query_hca_cap_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd0[8];
+       struct mlx5_hca_cap     hca_cap;
+};
+
+
+struct mlx5_cmd_set_hca_cap_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       u8                      rsvd[8];
+       struct mlx5_hca_cap     hca_cap;
+};
+
+
+struct mlx5_cmd_set_hca_cap_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd0[8];
+};
+
+
+struct mlx5_cmd_init_hca_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       u8                      rsvd0[2];
+       __be16                  profile;
+       u8                      rsvd1[4];
+};
+
+struct mlx5_cmd_init_hca_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd[8];
+};
+
+struct mlx5_cmd_teardown_hca_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       u8                      rsvd0[2];
+       __be16                  profile;
+       u8                      rsvd1[4];
+};
+
+struct mlx5_cmd_teardown_hca_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd[8];
+};
+
+struct mlx5_cmd_layout {
+       u8              type;
+       u8              rsvd0[3];
+       __be32          inlen;
+       __be64          in_ptr;
+       __be32          in[4];
+       __be32          out[4];
+       __be64          out_ptr;
+       __be32          outlen;
+       u8              token;
+       u8              sig;
+       u8              rsvd1;
+       u8              status_own;
+};
+
+
+struct health_buffer {
+       __be32          assert_var[5];
+       __be32          rsvd0[3];
+       __be32          assert_exit_ptr;
+       __be32          assert_callra;
+       __be32          rsvd1[2];
+       __be32          fw_ver;
+       __be32          hw_id;
+       __be32          rsvd2;
+       u8              irisc_index;
+       u8              synd;
+       __be16          ext_sync;
+};
+
+struct mlx5_init_seg {
+       __be32                  fw_rev;
+       __be32                  cmdif_rev_fw_sub;
+       __be32                  rsvd0[2];
+       __be32                  cmdq_addr_h;
+       __be32                  cmdq_addr_l_sz;
+       __be32                  cmd_dbell;
+       __be32                  rsvd1[121];
+       struct health_buffer    health;
+       __be32                  rsvd2[884];
+       __be32                  health_counter;
+       __be32                  rsvd3[1023];
+       __be64                  ieee1588_clk;
+       __be32                  ieee1588_clk_type;
+       __be32                  clr_intx;
+};
+
+struct mlx5_eqe_comp {
+       __be32  reserved[6];
+       __be32  cqn;
+};
+
+struct mlx5_eqe_qp_srq {
+       __be32  reserved[6];
+       __be32  qp_srq_n;
+};
+
+struct mlx5_eqe_cq_err {
+       __be32  cqn;
+       u8      reserved1[7];
+       u8      syndrome;
+};
+
+struct mlx5_eqe_dropped_packet {
+};
+
+struct mlx5_eqe_port_state {
+       u8      reserved0[8];
+       u8      port;
+};
+
+struct mlx5_eqe_gpio {
+       __be32  reserved0[2];
+       __be64  gpio_event;
+};
+
+struct mlx5_eqe_congestion {
+       u8      type;
+       u8      rsvd0;
+       u8      congestion_level;
+};
+
+struct mlx5_eqe_stall_vl {
+       u8      rsvd0[3];
+       u8      port_vl;
+};
+
+struct mlx5_eqe_cmd {
+       __be32  vector;
+       __be32  rsvd[6];
+};
+
+struct mlx5_eqe_page_req {
+       u8              rsvd0[2];
+       __be16          func_id;
+       u8              rsvd1[2];
+       __be16          num_pages;
+       __be32          rsvd2[5];
+};
+
+union ev_data {
+       __be32                          raw[7];
+       struct mlx5_eqe_cmd             cmd;
+       struct mlx5_eqe_comp            comp;
+       struct mlx5_eqe_qp_srq          qp_srq;
+       struct mlx5_eqe_cq_err          cq_err;
+       struct mlx5_eqe_dropped_packet  dp;
+       struct mlx5_eqe_port_state      port;
+       struct mlx5_eqe_gpio            gpio;
+       struct mlx5_eqe_congestion      cong;
+       struct mlx5_eqe_stall_vl        stall_vl;
+       struct mlx5_eqe_page_req        req_pages;
+} __packed;
+
+struct mlx5_eqe {
+       u8              rsvd0;
+       u8              type;
+       u8              rsvd1;
+       u8              sub_type;
+       __be32          rsvd2[7];
+       union ev_data   data;
+       __be16          rsvd3;
+       u8              signature;
+       u8              owner;
+} __packed;
+
+struct mlx5_cmd_prot_block {
+       u8              data[MLX5_CMD_DATA_BLOCK_SIZE];
+       u8              rsvd0[48];
+       __be64          next;
+       __be32          block_num;
+       u8              rsvd1;
+       u8              token;
+       u8              ctrl_sig;
+       u8              sig;
+};
+
+struct mlx5_err_cqe {
+       u8      rsvd0[32];
+       __be32  srqn;
+       u8      rsvd1[18];
+       u8      vendor_err_synd;
+       u8      syndrome;
+       __be32  s_wqe_opcode_qpn;
+       __be16  wqe_counter;
+       u8      signature;
+       u8      op_own;
+};
+
+struct mlx5_cqe64 {
+       u8              rsvd0[17];
+       u8              ml_path;
+       u8              rsvd20[4];
+       __be16          slid;
+       __be32          flags_rqpn;
+       u8              rsvd28[4];
+       __be32          srqn;
+       __be32          imm_inval_pkey;
+       u8              rsvd40[4];
+       __be32          byte_cnt;
+       __be64          timestamp;
+       __be32          sop_drop_qpn;
+       __be16          wqe_counter;
+       u8              signature;
+       u8              op_own;
+};
+
+struct mlx5_wqe_srq_next_seg {
+       u8                      rsvd0[2];
+       __be16                  next_wqe_index;
+       u8                      signature;
+       u8                      rsvd1[11];
+};
+
+union mlx5_ext_cqe {
+       struct ib_grh   grh;
+       u8              inl[64];
+};
+
+struct mlx5_cqe128 {
+       union mlx5_ext_cqe      inl_grh;
+       struct mlx5_cqe64       cqe64;
+};
+
+struct mlx5_srq_ctx {
+       u8                      state_log_sz;
+       u8                      rsvd0[3];
+       __be32                  flags_xrcd;
+       __be32                  pgoff_cqn;
+       u8                      rsvd1[4];
+       u8                      log_pg_sz;
+       u8                      rsvd2[7];
+       __be32                  pd;
+       __be16                  lwm;
+       __be16                  wqe_cnt;
+       u8                      rsvd3[8];
+       __be64                  db_record;
+};
+
+struct mlx5_create_srq_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be32                  input_srqn;
+       u8                      rsvd0[4];
+       struct mlx5_srq_ctx     ctx;
+       u8                      rsvd1[208];
+       __be64                  pas[0];
+};
+
+struct mlx5_create_srq_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       __be32                  srqn;
+       u8                      rsvd[4];
+};
+
+struct mlx5_destroy_srq_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be32                  srqn;
+       u8                      rsvd[4];
+};
+
+struct mlx5_destroy_srq_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd[8];
+};
+
+struct mlx5_query_srq_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be32                  srqn;
+       u8                      rsvd0[4];
+};
+
+struct mlx5_query_srq_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd0[8];
+       struct mlx5_srq_ctx     ctx;
+       u8                      rsvd1[32];
+       __be64                  pas[0];
+};
+
+struct mlx5_arm_srq_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be32                  srqn;
+       __be16                  rsvd;
+       __be16                  lwm;
+};
+
+struct mlx5_arm_srq_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd[8];
+};
+
+struct mlx5_cq_context {
+       u8                      status;
+       u8                      cqe_sz_flags;
+       u8                      st;
+       u8                      rsvd3;
+       u8                      rsvd4[6];
+       __be16                  page_offset;
+       __be32                  log_sz_usr_page;
+       __be16                  cq_period;
+       __be16                  cq_max_count;
+       __be16                  rsvd20;
+       __be16                  c_eqn;
+       u8                      log_pg_sz;
+       u8                      rsvd25[7];
+       __be32                  last_notified_index;
+       __be32                  solicit_producer_index;
+       __be32                  consumer_counter;
+       __be32                  producer_counter;
+       u8                      rsvd48[8];
+       __be64                  db_record_addr;
+};
+
+struct mlx5_create_cq_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be32                  input_cqn;
+       u8                      rsvdx[4];
+       struct mlx5_cq_context  ctx;
+       u8                      rsvd6[192];
+       __be64                  pas[0];
+};
+
+struct mlx5_create_cq_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       __be32                  cqn;
+       u8                      rsvd0[4];
+};
+
+struct mlx5_destroy_cq_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be32                  cqn;
+       u8                      rsvd0[4];
+};
+
+struct mlx5_destroy_cq_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd0[8];
+};
+
+struct mlx5_query_cq_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be32                  cqn;
+       u8                      rsvd0[4];
+};
+
+struct mlx5_query_cq_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd0[8];
+       struct mlx5_cq_context  ctx;
+       u8                      rsvd6[16];
+       __be64                  pas[0];
+};
+
+struct mlx5_eq_context {
+       u8                      status;
+       u8                      ec_oi;
+       u8                      st;
+       u8                      rsvd2[7];
+       __be16                  page_pffset;
+       __be32                  log_sz_usr_page;
+       u8                      rsvd3[7];
+       u8                      intr;
+       u8                      log_page_size;
+       u8                      rsvd4[15];
+       __be32                  consumer_counter;
+       __be32                  produser_counter;
+       u8                      rsvd5[16];
+};
+
+struct mlx5_create_eq_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       u8                      rsvd0[3];
+       u8                      input_eqn;
+       u8                      rsvd1[4];
+       struct mlx5_eq_context  ctx;
+       u8                      rsvd2[8];
+       __be64                  events_mask;
+       u8                      rsvd3[176];
+       __be64                  pas[0];
+};
+
+struct mlx5_create_eq_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd0[3];
+       u8                      eq_number;
+       u8                      rsvd1[4];
+};
+
+struct mlx5_destroy_eq_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       u8                      rsvd0[3];
+       u8                      eqn;
+       u8                      rsvd1[4];
+};
+
+struct mlx5_destroy_eq_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd[8];
+};
+
+struct mlx5_map_eq_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be64                  mask;
+       u8                      mu;
+       u8                      rsvd0[2];
+       u8                      eqn;
+       u8                      rsvd1[24];
+};
+
+struct mlx5_map_eq_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd[8];
+};
+
+struct mlx5_query_eq_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       u8                      rsvd0[3];
+       u8                      eqn;
+       u8                      rsvd1[4];
+};
+
+struct mlx5_query_eq_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd[8];
+       struct mlx5_eq_context  ctx;
+};
+
+struct mlx5_mkey_seg {
+       /* This is a two bit field occupying bits 31-30.
+        * bit 31 is always 0,
+        * bit 30 is zero for regular MRs and 1 (e.g free) for UMRs that do not have tanslation
+        */
+       u8              status;
+       u8              pcie_control;
+       u8              flags;
+       u8              version;
+       __be32          qpn_mkey7_0;
+       u8              rsvd1[4];
+       __be32          flags_pd;
+       __be64          start_addr;
+       __be64          len;
+       __be32          bsfs_octo_size;
+       u8              rsvd2[16];
+       __be32          xlt_oct_size;
+       u8              rsvd3[3];
+       u8              log2_page_size;
+       u8              rsvd4[4];
+};
+
+struct mlx5_query_special_ctxs_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       u8                      rsvd[8];
+};
+
+struct mlx5_query_special_ctxs_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       __be32                  dump_fill_mkey;
+       __be32                  reserved_lkey;
+};
+
+struct mlx5_create_mkey_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be32                  input_mkey_index;
+       u8                      rsvd0[4];
+       struct mlx5_mkey_seg    seg;
+       u8                      rsvd1[16];
+       __be32                  xlat_oct_act_size;
+       __be32                  bsf_coto_act_size;
+       u8                      rsvd2[168];
+       __be64                  pas[0];
+};
+
+struct mlx5_create_mkey_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       __be32                  mkey;
+       u8                      rsvd[4];
+};
+
+struct mlx5_destroy_mkey_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be32                  mkey;
+       u8                      rsvd[4];
+};
+
+struct mlx5_destroy_mkey_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd[8];
+};
+
+struct mlx5_query_mkey_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be32                  mkey;
+};
+
+struct mlx5_query_mkey_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       __be64                  pas[0];
+};
+
+struct mlx5_modify_mkey_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be32                  mkey;
+       __be64                  pas[0];
+};
+
+struct mlx5_modify_mkey_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+};
+
+struct mlx5_dump_mkey_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+};
+
+struct mlx5_dump_mkey_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       __be32                  mkey;
+};
+
+struct mlx5_mad_ifc_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be16                  remote_lid;
+       u8                      rsvd0;
+       u8                      port;
+       u8                      rsvd1[4];
+       u8                      data[256];
+};
+
+struct mlx5_mad_ifc_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd[8];
+       u8                      data[256];
+};
+
+struct mlx5_access_reg_mbox_in {
+       struct mlx5_inbox_hdr           hdr;
+       u8                              rsvd0[2];
+       __be16                          register_id;
+       __be32                          arg;
+       __be32                          data[0];
+};
+
+struct mlx5_access_reg_mbox_out {
+       struct mlx5_outbox_hdr          hdr;
+       u8                              rsvd[8];
+       __be32                          data[0];
+};
+
+#define MLX5_ATTR_EXTENDED_PORT_INFO   cpu_to_be16(0xff90)
+
+enum {
+       MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO        = 1 <<  0
+};
+
+#endif /* MLX5_DEVICE_H */
diff --git a/include/linux/mlx5/doorbell.h b/include/linux/mlx5/doorbell.h
new file mode 100644 (file)
index 0000000..163a818
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_DOORBELL_H
+#define MLX5_DOORBELL_H
+
+#define MLX5_BF_OFFSET       0x800
+#define MLX5_CQ_DOORBELL      0x20
+
+#if BITS_PER_LONG == 64
+/* Assume that we can just write a 64-bit doorbell atomically.  s390
+ * actually doesn't have writeq() but S/390 systems don't even have
+ * PCI so we won't worry about it.
+ */
+
+#define MLX5_DECLARE_DOORBELL_LOCK(name)
+#define MLX5_INIT_DOORBELL_LOCK(ptr)    do { } while (0)
+#define MLX5_GET_DOORBELL_LOCK(ptr)      (NULL)
+
+static inline void mlx5_write64(__be32 val[2], void __iomem *dest,
+                               spinlock_t *doorbell_lock)
+{
+       __raw_writeq(*(u64 *)val, dest);
+}
+
+#else
+
+/* Just fall back to a spinlock to protect the doorbell if
+ * BITS_PER_LONG is 32 -- there's no portable way to do atomic 64-bit
+ * MMIO writes.
+ */
+
+#define MLX5_DECLARE_DOORBELL_LOCK(name) spinlock_t name;
+#define MLX5_INIT_DOORBELL_LOCK(ptr)     spin_lock_init(ptr)
+#define MLX5_GET_DOORBELL_LOCK(ptr)      (ptr)
+
+static inline void mlx5_write64(__be32 val[2], void __iomem *dest,
+                               spinlock_t *doorbell_lock)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(doorbell_lock, flags);
+       __raw_writel((__force u32) val[0], dest);
+       __raw_writel((__force u32) val[1], dest + 4);
+       spin_unlock_irqrestore(doorbell_lock, flags);
+}
+
+#endif
+
+#endif /* MLX5_DOORBELL_H */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
new file mode 100644 (file)
index 0000000..e47f1e4
--- /dev/null
@@ -0,0 +1,769 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_DRIVER_H
+#define MLX5_DRIVER_H
+
+#include <linux/kernel.h>
+#include <linux/completion.h>
+#include <linux/pci.h>
+#include <linux/spinlock_types.h>
+#include <linux/semaphore.h>
+#include <linux/vmalloc.h>
+#include <linux/radix-tree.h>
+#include <linux/mlx5/device.h>
+#include <linux/mlx5/doorbell.h>
+
+enum {
+       MLX5_BOARD_ID_LEN = 64,
+       MLX5_MAX_NAME_LEN = 16,
+};
+
+enum {
+       /* one minute for the sake of bringup. Generally, commands must always
+        * complete and we may need to increase this timeout value
+        */
+       MLX5_CMD_TIMEOUT_MSEC   = 7200 * 1000,
+       MLX5_CMD_WQ_MAX_NAME    = 32,
+};
+
+enum {
+       CMD_OWNER_SW            = 0x0,
+       CMD_OWNER_HW            = 0x1,
+       CMD_STATUS_SUCCESS      = 0,
+};
+
+enum mlx5_sqp_t {
+       MLX5_SQP_SMI            = 0,
+       MLX5_SQP_GSI            = 1,
+       MLX5_SQP_IEEE_1588      = 2,
+       MLX5_SQP_SNIFFER        = 3,
+       MLX5_SQP_SYNC_UMR       = 4,
+};
+
+enum {
+       MLX5_MAX_PORTS  = 2,
+};
+
+enum {
+       MLX5_EQ_VEC_PAGES        = 0,
+       MLX5_EQ_VEC_CMD          = 1,
+       MLX5_EQ_VEC_ASYNC        = 2,
+       MLX5_EQ_VEC_COMP_BASE,
+};
+
+enum {
+       MLX5_MAX_EQ_NAME        = 20
+};
+
+enum {
+       MLX5_ATOMIC_MODE_IB_COMP        = 1 << 16,
+       MLX5_ATOMIC_MODE_CX             = 2 << 16,
+       MLX5_ATOMIC_MODE_8B             = 3 << 16,
+       MLX5_ATOMIC_MODE_16B            = 4 << 16,
+       MLX5_ATOMIC_MODE_32B            = 5 << 16,
+       MLX5_ATOMIC_MODE_64B            = 6 << 16,
+       MLX5_ATOMIC_MODE_128B           = 7 << 16,
+       MLX5_ATOMIC_MODE_256B           = 8 << 16,
+};
+
+enum {
+       MLX5_CMD_OP_QUERY_HCA_CAP               = 0x100,
+       MLX5_CMD_OP_QUERY_ADAPTER               = 0x101,
+       MLX5_CMD_OP_INIT_HCA                    = 0x102,
+       MLX5_CMD_OP_TEARDOWN_HCA                = 0x103,
+       MLX5_CMD_OP_QUERY_PAGES                 = 0x107,
+       MLX5_CMD_OP_MANAGE_PAGES                = 0x108,
+       MLX5_CMD_OP_SET_HCA_CAP                 = 0x109,
+
+       MLX5_CMD_OP_CREATE_MKEY                 = 0x200,
+       MLX5_CMD_OP_QUERY_MKEY                  = 0x201,
+       MLX5_CMD_OP_DESTROY_MKEY                = 0x202,
+       MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS      = 0x203,
+
+       MLX5_CMD_OP_CREATE_EQ                   = 0x301,
+       MLX5_CMD_OP_DESTROY_EQ                  = 0x302,
+       MLX5_CMD_OP_QUERY_EQ                    = 0x303,
+
+       MLX5_CMD_OP_CREATE_CQ                   = 0x400,
+       MLX5_CMD_OP_DESTROY_CQ                  = 0x401,
+       MLX5_CMD_OP_QUERY_CQ                    = 0x402,
+       MLX5_CMD_OP_MODIFY_CQ                   = 0x403,
+
+       MLX5_CMD_OP_CREATE_QP                   = 0x500,
+       MLX5_CMD_OP_DESTROY_QP                  = 0x501,
+       MLX5_CMD_OP_RST2INIT_QP                 = 0x502,
+       MLX5_CMD_OP_INIT2RTR_QP                 = 0x503,
+       MLX5_CMD_OP_RTR2RTS_QP                  = 0x504,
+       MLX5_CMD_OP_RTS2RTS_QP                  = 0x505,
+       MLX5_CMD_OP_SQERR2RTS_QP                = 0x506,
+       MLX5_CMD_OP_2ERR_QP                     = 0x507,
+       MLX5_CMD_OP_RTS2SQD_QP                  = 0x508,
+       MLX5_CMD_OP_SQD2RTS_QP                  = 0x509,
+       MLX5_CMD_OP_2RST_QP                     = 0x50a,
+       MLX5_CMD_OP_QUERY_QP                    = 0x50b,
+       MLX5_CMD_OP_CONF_SQP                    = 0x50c,
+       MLX5_CMD_OP_MAD_IFC                     = 0x50d,
+       MLX5_CMD_OP_INIT2INIT_QP                = 0x50e,
+       MLX5_CMD_OP_SUSPEND_QP                  = 0x50f,
+       MLX5_CMD_OP_UNSUSPEND_QP                = 0x510,
+       MLX5_CMD_OP_SQD2SQD_QP                  = 0x511,
+       MLX5_CMD_OP_ALLOC_QP_COUNTER_SET        = 0x512,
+       MLX5_CMD_OP_DEALLOC_QP_COUNTER_SET      = 0x513,
+       MLX5_CMD_OP_QUERY_QP_COUNTER_SET        = 0x514,
+
+       MLX5_CMD_OP_CREATE_PSV                  = 0x600,
+       MLX5_CMD_OP_DESTROY_PSV                 = 0x601,
+       MLX5_CMD_OP_QUERY_PSV                   = 0x602,
+       MLX5_CMD_OP_QUERY_SIG_RULE_TABLE        = 0x603,
+       MLX5_CMD_OP_QUERY_BLOCK_SIZE_TABLE      = 0x604,
+
+       MLX5_CMD_OP_CREATE_SRQ                  = 0x700,
+       MLX5_CMD_OP_DESTROY_SRQ                 = 0x701,
+       MLX5_CMD_OP_QUERY_SRQ                   = 0x702,
+       MLX5_CMD_OP_ARM_RQ                      = 0x703,
+       MLX5_CMD_OP_RESIZE_SRQ                  = 0x704,
+
+       MLX5_CMD_OP_ALLOC_PD                    = 0x800,
+       MLX5_CMD_OP_DEALLOC_PD                  = 0x801,
+       MLX5_CMD_OP_ALLOC_UAR                   = 0x802,
+       MLX5_CMD_OP_DEALLOC_UAR                 = 0x803,
+
+       MLX5_CMD_OP_ATTACH_TO_MCG               = 0x806,
+       MLX5_CMD_OP_DETACH_FROM_MCG             = 0x807,
+
+
+       MLX5_CMD_OP_ALLOC_XRCD                  = 0x80e,
+       MLX5_CMD_OP_DEALLOC_XRCD                = 0x80f,
+
+       MLX5_CMD_OP_ACCESS_REG                  = 0x805,
+       MLX5_CMD_OP_MAX                         = 0x810,
+};
+
+enum {
+       MLX5_REG_PCAP            = 0x5001,
+       MLX5_REG_PMTU            = 0x5003,
+       MLX5_REG_PTYS            = 0x5004,
+       MLX5_REG_PAOS            = 0x5006,
+       MLX5_REG_PMAOS           = 0x5012,
+       MLX5_REG_PUDE            = 0x5009,
+       MLX5_REG_PMPE            = 0x5010,
+       MLX5_REG_PELC            = 0x500e,
+       MLX5_REG_PMLP            = 0, /* TBD */
+       MLX5_REG_NODE_DESC       = 0x6001,
+       MLX5_REG_HOST_ENDIANNESS = 0x7004,
+};
+
+enum dbg_rsc_type {
+       MLX5_DBG_RSC_QP,
+       MLX5_DBG_RSC_EQ,
+       MLX5_DBG_RSC_CQ,
+};
+
+struct mlx5_field_desc {
+       struct dentry          *dent;
+       int                     i;
+};
+
+struct mlx5_rsc_debug {
+       struct mlx5_core_dev   *dev;
+       void                   *object;
+       enum dbg_rsc_type       type;
+       struct dentry          *root;
+       struct mlx5_field_desc  fields[0];
+};
+
+enum mlx5_dev_event {
+       MLX5_DEV_EVENT_SYS_ERROR,
+       MLX5_DEV_EVENT_PORT_UP,
+       MLX5_DEV_EVENT_PORT_DOWN,
+       MLX5_DEV_EVENT_PORT_INITIALIZED,
+       MLX5_DEV_EVENT_LID_CHANGE,
+       MLX5_DEV_EVENT_PKEY_CHANGE,
+       MLX5_DEV_EVENT_GUID_CHANGE,
+       MLX5_DEV_EVENT_CLIENT_REREG,
+};
+
+struct mlx5_uuar_info {
+       struct mlx5_uar        *uars;
+       int                     num_uars;
+       int                     num_low_latency_uuars;
+       unsigned long          *bitmap;
+       unsigned int           *count;
+       struct mlx5_bf         *bfs;
+
+       /*
+        * protect uuar allocation data structs
+        */
+       struct mutex            lock;
+};
+
+struct mlx5_bf {
+       void __iomem           *reg;
+       void __iomem           *regreg;
+       int                     buf_size;
+       struct mlx5_uar        *uar;
+       unsigned long           offset;
+       int                     need_lock;
+       /* protect blue flame buffer selection when needed
+        */
+       spinlock_t              lock;
+
+       /* serialize 64 bit writes when done as two 32 bit accesses
+        */
+       spinlock_t              lock32;
+       int                     uuarn;
+};
+
+struct mlx5_cmd_first {
+       __be32          data[4];
+};
+
+struct mlx5_cmd_msg {
+       struct list_head                list;
+       struct cache_ent               *cache;
+       u32                             len;
+       struct mlx5_cmd_first           first;
+       struct mlx5_cmd_mailbox        *next;
+};
+
+struct mlx5_cmd_debug {
+       struct dentry          *dbg_root;
+       struct dentry          *dbg_in;
+       struct dentry          *dbg_out;
+       struct dentry          *dbg_outlen;
+       struct dentry          *dbg_status;
+       struct dentry          *dbg_run;
+       void                   *in_msg;
+       void                   *out_msg;
+       u8                      status;
+       u16                     inlen;
+       u16                     outlen;
+};
+
+struct cache_ent {
+       /* protect block chain allocations
+        */
+       spinlock_t              lock;
+       struct list_head        head;
+};
+
+struct cmd_msg_cache {
+       struct cache_ent        large;
+       struct cache_ent        med;
+
+};
+
+struct mlx5_cmd_stats {
+       u64             sum;
+       u64             n;
+       struct dentry  *root;
+       struct dentry  *avg;
+       struct dentry  *count;
+       /* protect command average calculations */
+       spinlock_t      lock;
+};
+
+struct mlx5_cmd {
+       void           *cmd_buf;
+       dma_addr_t      dma;
+       u16             cmdif_rev;
+       u8              log_sz;
+       u8              log_stride;
+       int             max_reg_cmds;
+       int             events;
+       u32 __iomem    *vector;
+
+       /* protect command queue allocations
+        */
+       spinlock_t      alloc_lock;
+
+       /* protect token allocations
+        */
+       spinlock_t      token_lock;
+       u8              token;
+       unsigned long   bitmask;
+       char            wq_name[MLX5_CMD_WQ_MAX_NAME];
+       struct workqueue_struct *wq;
+       struct semaphore sem;
+       struct semaphore pages_sem;
+       int     mode;
+       struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS];
+       struct pci_pool *pool;
+       struct mlx5_cmd_debug dbg;
+       struct cmd_msg_cache cache;
+       int checksum_disabled;
+       struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX];
+};
+
+struct mlx5_port_caps {
+       int     gid_table_len;
+       int     pkey_table_len;
+};
+
+struct mlx5_caps {
+       u8      log_max_eq;
+       u8      log_max_cq;
+       u8      log_max_qp;
+       u8      log_max_mkey;
+       u8      log_max_pd;
+       u8      log_max_srq;
+       u32     max_cqes;
+       int     max_wqes;
+       int     max_sq_desc_sz;
+       int     max_rq_desc_sz;
+       u64     flags;
+       u16     stat_rate_support;
+       int     log_max_msg;
+       int     num_ports;
+       int     max_ra_res_qp;
+       int     max_ra_req_qp;
+       int     max_srq_wqes;
+       int     bf_reg_size;
+       int     bf_regs_per_page;
+       struct mlx5_port_caps   port[MLX5_MAX_PORTS];
+       u8                      ext_port_cap[MLX5_MAX_PORTS];
+       int     max_vf;
+       u32     reserved_lkey;
+       u8      local_ca_ack_delay;
+       u8      log_max_mcg;
+       u16     max_qp_mcg;
+       int     min_page_sz;
+};
+
+struct mlx5_cmd_mailbox {
+       void           *buf;
+       dma_addr_t      dma;
+       struct mlx5_cmd_mailbox *next;
+};
+
+struct mlx5_buf_list {
+       void                   *buf;
+       dma_addr_t              map;
+};
+
+struct mlx5_buf {
+       struct mlx5_buf_list    direct;
+       struct mlx5_buf_list   *page_list;
+       int                     nbufs;
+       int                     npages;
+       int                     page_shift;
+       int                     size;
+};
+
+struct mlx5_eq {
+       struct mlx5_core_dev   *dev;
+       __be32 __iomem         *doorbell;
+       u32                     cons_index;
+       struct mlx5_buf         buf;
+       int                     size;
+       u8                      irqn;
+       u8                      eqn;
+       int                     nent;
+       u64                     mask;
+       char                    name[MLX5_MAX_EQ_NAME];
+       struct list_head        list;
+       int                     index;
+       struct mlx5_rsc_debug   *dbg;
+};
+
+
+struct mlx5_core_mr {
+       u64                     iova;
+       u64                     size;
+       u32                     key;
+       u32                     pd;
+       u32                     access;
+};
+
+struct mlx5_core_srq {
+       u32             srqn;
+       int             max;
+       int             max_gs;
+       int             max_avail_gather;
+       int             wqe_shift;
+       void (*event)   (struct mlx5_core_srq *, enum mlx5_event);
+
+       atomic_t                refcount;
+       struct completion       free;
+};
+
+struct mlx5_eq_table {
+       void __iomem           *update_ci;
+       void __iomem           *update_arm_ci;
+       struct list_head       *comp_eq_head;
+       struct mlx5_eq          pages_eq;
+       struct mlx5_eq          async_eq;
+       struct mlx5_eq          cmd_eq;
+       struct msix_entry       *msix_arr;
+       int                     num_comp_vectors;
+       /* protect EQs list
+        */
+       spinlock_t              lock;
+};
+
+struct mlx5_uar {
+       u32                     index;
+       struct list_head        bf_list;
+       unsigned                free_bf_bmap;
+       void __iomem           *wc_map;
+       void __iomem           *map;
+};
+
+
+struct mlx5_core_health {
+       struct health_buffer __iomem   *health;
+       __be32 __iomem                 *health_counter;
+       struct timer_list               timer;
+       struct list_head                list;
+       u32                             prev;
+       int                             miss_counter;
+};
+
+struct mlx5_cq_table {
+       /* protect radix tree
+        */
+       spinlock_t              lock;
+       struct radix_tree_root  tree;
+};
+
+struct mlx5_qp_table {
+       /* protect radix tree
+        */
+       spinlock_t              lock;
+       struct radix_tree_root  tree;
+};
+
+struct mlx5_srq_table {
+       /* protect radix tree
+        */
+       spinlock_t              lock;
+       struct radix_tree_root  tree;
+};
+
+struct mlx5_priv {
+       char                    name[MLX5_MAX_NAME_LEN];
+       struct mlx5_eq_table    eq_table;
+       struct mlx5_uuar_info   uuari;
+       MLX5_DECLARE_DOORBELL_LOCK(cq_uar_lock);
+
+       /* pages stuff */
+       struct workqueue_struct *pg_wq;
+       struct rb_root          page_root;
+       int                     fw_pages;
+       int                     reg_pages;
+
+       struct mlx5_core_health health;
+
+       struct mlx5_srq_table   srq_table;
+
+       /* start: qp staff */
+       struct mlx5_qp_table    qp_table;
+       struct dentry          *qp_debugfs;
+       struct dentry          *eq_debugfs;
+       struct dentry          *cq_debugfs;
+       struct dentry          *cmdif_debugfs;
+       /* end: qp staff */
+
+       /* start: cq staff */
+       struct mlx5_cq_table    cq_table;
+       /* end: cq staff */
+
+       /* start: alloc staff */
+       struct mutex            pgdir_mutex;
+       struct list_head        pgdir_list;
+       /* end: alloc staff */
+       struct dentry          *dbg_root;
+
+       /* protect mkey key part */
+       spinlock_t              mkey_lock;
+       u8                      mkey_key;
+};
+
+struct mlx5_core_dev {
+       struct pci_dev         *pdev;
+       u8                      rev_id;
+       char                    board_id[MLX5_BOARD_ID_LEN];
+       struct mlx5_cmd         cmd;
+       struct mlx5_caps        caps;
+       phys_addr_t             iseg_base;
+       struct mlx5_init_seg __iomem *iseg;
+       void                    (*event) (struct mlx5_core_dev *dev,
+                                         enum mlx5_dev_event event,
+                                         void *data);
+       struct mlx5_priv        priv;
+       struct mlx5_profile     *profile;
+       atomic_t                num_qps;
+};
+
+struct mlx5_db {
+       __be32                  *db;
+       union {
+               struct mlx5_db_pgdir            *pgdir;
+               struct mlx5_ib_user_db_page     *user_page;
+       }                       u;
+       dma_addr_t              dma;
+       int                     index;
+};
+
+enum {
+       MLX5_DB_PER_PAGE = PAGE_SIZE / L1_CACHE_BYTES,
+};
+
+enum {
+       MLX5_COMP_EQ_SIZE = 1024,
+};
+
+struct mlx5_db_pgdir {
+       struct list_head        list;
+       DECLARE_BITMAP(bitmap, MLX5_DB_PER_PAGE);
+       __be32                 *db_page;
+       dma_addr_t              db_dma;
+};
+
+typedef void (*mlx5_cmd_cbk_t)(int status, void *context);
+
+struct mlx5_cmd_work_ent {
+       struct mlx5_cmd_msg    *in;
+       struct mlx5_cmd_msg    *out;
+       mlx5_cmd_cbk_t          callback;
+       void                   *context;
+       int idx;
+       struct completion       done;
+       struct mlx5_cmd        *cmd;
+       struct work_struct      work;
+       struct mlx5_cmd_layout *lay;
+       int                     ret;
+       int                     page_queue;
+       u8                      status;
+       u8                      token;
+       struct timespec         ts1;
+       struct timespec         ts2;
+};
+
+struct mlx5_pas {
+       u64     pa;
+       u8      log_sz;
+};
+
+static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset)
+{
+       if (likely(BITS_PER_LONG == 64 || buf->nbufs == 1))
+               return buf->direct.buf + offset;
+       else
+               return buf->page_list[offset >> PAGE_SHIFT].buf +
+                       (offset & (PAGE_SIZE - 1));
+}
+
+extern struct workqueue_struct *mlx5_core_wq;
+
+#define STRUCT_FIELD(header, field) \
+       .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field),      \
+       .struct_size_bytes   = sizeof((struct ib_unpacked_ ## header *)0)->field
+
+struct ib_field {
+       size_t struct_offset_bytes;
+       size_t struct_size_bytes;
+       int    offset_bits;
+       int    size_bits;
+};
+
+static inline struct mlx5_core_dev *pci2mlx5_core_dev(struct pci_dev *pdev)
+{
+       return pci_get_drvdata(pdev);
+}
+
+extern struct dentry *mlx5_debugfs_root;
+
+static inline u16 fw_rev_maj(struct mlx5_core_dev *dev)
+{
+       return ioread32be(&dev->iseg->fw_rev) & 0xffff;
+}
+
+static inline u16 fw_rev_min(struct mlx5_core_dev *dev)
+{
+       return ioread32be(&dev->iseg->fw_rev) >> 16;
+}
+
+static inline u16 fw_rev_sub(struct mlx5_core_dev *dev)
+{
+       return ioread32be(&dev->iseg->cmdif_rev_fw_sub) & 0xffff;
+}
+
+static inline u16 cmdif_rev(struct mlx5_core_dev *dev)
+{
+       return ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16;
+}
+
+static inline void *mlx5_vzalloc(unsigned long size)
+{
+       void *rtn;
+
+       rtn = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
+       if (!rtn)
+               rtn = vzalloc(size);
+       return rtn;
+}
+
+static inline void mlx5_vfree(const void *addr)
+{
+       if (addr && is_vmalloc_addr(addr))
+               vfree(addr);
+       else
+               kfree(addr);
+}
+
+int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev);
+void mlx5_dev_cleanup(struct mlx5_core_dev *dev);
+int mlx5_cmd_init(struct mlx5_core_dev *dev);
+void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
+void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
+void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
+int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr);
+int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+                 int out_size);
+int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
+int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
+int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
+int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
+void mlx5_health_cleanup(void);
+void  __init mlx5_health_init(void);
+void mlx5_start_health_poll(struct mlx5_core_dev *dev);
+void mlx5_stop_health_poll(struct mlx5_core_dev *dev);
+int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, int max_direct,
+                  struct mlx5_buf *buf);
+void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf);
+struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev,
+                                                     gfp_t flags, int npages);
+void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev,
+                                struct mlx5_cmd_mailbox *head);
+int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+                        struct mlx5_create_srq_mbox_in *in, int inlen);
+int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq);
+int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+                       struct mlx5_query_srq_mbox_out *out);
+int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+                     u16 lwm, int is_srq);
+int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
+                         struct mlx5_create_mkey_mbox_in *in, int inlen);
+int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr);
+int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
+                        struct mlx5_query_mkey_mbox_out *out, int outlen);
+int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
+                            u32 *mkey);
+int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn);
+int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn);
+int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, void *inb, void *outb,
+                     u16 opmod, int port);
+void mlx5_pagealloc_init(struct mlx5_core_dev *dev);
+void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev);
+int mlx5_pagealloc_start(struct mlx5_core_dev *dev);
+void mlx5_pagealloc_stop(struct mlx5_core_dev *dev);
+void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
+                                s16 npages);
+int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev);
+int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev);
+void mlx5_register_debugfs(void);
+void mlx5_unregister_debugfs(void);
+int mlx5_eq_init(struct mlx5_core_dev *dev);
+void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
+void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
+void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
+void mlx5_qp_event(struct mlx5_core_dev *dev, u32 qpn, int event_type);
+void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
+struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
+void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector);
+void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type);
+int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
+                      int nent, u64 mask, const char *name, struct mlx5_uar *uar);
+int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_start_eqs(struct mlx5_core_dev *dev);
+int mlx5_stop_eqs(struct mlx5_core_dev *dev);
+int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
+int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
+
+int mlx5_qp_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev);
+int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
+                        int size_in, void *data_out, int size_out,
+                        u16 reg_num, int arg, int write);
+int mlx5_set_port_caps(struct mlx5_core_dev *dev, int port_num, u32 caps);
+
+int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+                      struct mlx5_query_eq_mbox_out *out, int outlen);
+int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
+int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
+int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db);
+void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db);
+
+typedef void (*health_handler_t)(struct pci_dev *pdev, void *buf, int size);
+int mlx5_register_health_report_handler(health_handler_t handler);
+void mlx5_unregister_health_report_handler(void);
+const char *mlx5_command_str(int command);
+int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev);
+
+static inline u32 mlx5_mkey_to_idx(u32 mkey)
+{
+       return mkey >> 8;
+}
+
+static inline u32 mlx5_idx_to_mkey(u32 mkey_idx)
+{
+       return mkey_idx << 8;
+}
+
+enum {
+       MLX5_PROF_MASK_QP_SIZE          = (u64)1 << 0,
+       MLX5_PROF_MASK_CMDIF_CSUM       = (u64)1 << 1,
+       MLX5_PROF_MASK_MR_CACHE         = (u64)1 << 2,
+};
+
+enum {
+       MAX_MR_CACHE_ENTRIES    = 16,
+};
+
+struct mlx5_profile {
+       u64     mask;
+       u32     log_max_qp;
+       int     cmdif_csum;
+       struct {
+               int     size;
+               int     limit;
+       } mr_cache[MAX_MR_CACHE_ENTRIES];
+};
+
+#endif /* MLX5_DRIVER_H */
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
new file mode 100644 (file)
index 0000000..d9e3eac
--- /dev/null
@@ -0,0 +1,467 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_QP_H
+#define MLX5_QP_H
+
+#include <linux/mlx5/device.h>
+#include <linux/mlx5/driver.h>
+
+#define MLX5_INVALID_LKEY      0x100
+
+enum mlx5_qp_optpar {
+       MLX5_QP_OPTPAR_ALT_ADDR_PATH            = 1 << 0,
+       MLX5_QP_OPTPAR_RRE                      = 1 << 1,
+       MLX5_QP_OPTPAR_RAE                      = 1 << 2,
+       MLX5_QP_OPTPAR_RWE                      = 1 << 3,
+       MLX5_QP_OPTPAR_PKEY_INDEX               = 1 << 4,
+       MLX5_QP_OPTPAR_Q_KEY                    = 1 << 5,
+       MLX5_QP_OPTPAR_RNR_TIMEOUT              = 1 << 6,
+       MLX5_QP_OPTPAR_PRIMARY_ADDR_PATH        = 1 << 7,
+       MLX5_QP_OPTPAR_SRA_MAX                  = 1 << 8,
+       MLX5_QP_OPTPAR_RRA_MAX                  = 1 << 9,
+       MLX5_QP_OPTPAR_PM_STATE                 = 1 << 10,
+       MLX5_QP_OPTPAR_RETRY_COUNT              = 1 << 12,
+       MLX5_QP_OPTPAR_RNR_RETRY                = 1 << 13,
+       MLX5_QP_OPTPAR_ACK_TIMEOUT              = 1 << 14,
+       MLX5_QP_OPTPAR_PRI_PORT                 = 1 << 16,
+       MLX5_QP_OPTPAR_SRQN                     = 1 << 18,
+       MLX5_QP_OPTPAR_CQN_RCV                  = 1 << 19,
+       MLX5_QP_OPTPAR_DC_HS                    = 1 << 20,
+       MLX5_QP_OPTPAR_DC_KEY                   = 1 << 21,
+};
+
+enum mlx5_qp_state {
+       MLX5_QP_STATE_RST                       = 0,
+       MLX5_QP_STATE_INIT                      = 1,
+       MLX5_QP_STATE_RTR                       = 2,
+       MLX5_QP_STATE_RTS                       = 3,
+       MLX5_QP_STATE_SQER                      = 4,
+       MLX5_QP_STATE_SQD                       = 5,
+       MLX5_QP_STATE_ERR                       = 6,
+       MLX5_QP_STATE_SQ_DRAINING               = 7,
+       MLX5_QP_STATE_SUSPENDED                 = 9,
+       MLX5_QP_NUM_STATE
+};
+
+enum {
+       MLX5_QP_ST_RC                           = 0x0,
+       MLX5_QP_ST_UC                           = 0x1,
+       MLX5_QP_ST_UD                           = 0x2,
+       MLX5_QP_ST_XRC                          = 0x3,
+       MLX5_QP_ST_MLX                          = 0x4,
+       MLX5_QP_ST_DCI                          = 0x5,
+       MLX5_QP_ST_DCT                          = 0x6,
+       MLX5_QP_ST_QP0                          = 0x7,
+       MLX5_QP_ST_QP1                          = 0x8,
+       MLX5_QP_ST_RAW_ETHERTYPE                = 0x9,
+       MLX5_QP_ST_RAW_IPV6                     = 0xa,
+       MLX5_QP_ST_SNIFFER                      = 0xb,
+       MLX5_QP_ST_SYNC_UMR                     = 0xe,
+       MLX5_QP_ST_PTP_1588                     = 0xd,
+       MLX5_QP_ST_REG_UMR                      = 0xc,
+       MLX5_QP_ST_MAX
+};
+
+enum {
+       MLX5_QP_PM_MIGRATED                     = 0x3,
+       MLX5_QP_PM_ARMED                        = 0x0,
+       MLX5_QP_PM_REARM                        = 0x1
+};
+
+enum {
+       MLX5_NON_ZERO_RQ        = 0 << 24,
+       MLX5_SRQ_RQ             = 1 << 24,
+       MLX5_CRQ_RQ             = 2 << 24,
+       MLX5_ZERO_LEN_RQ        = 3 << 24
+};
+
+enum {
+       /* params1 */
+       MLX5_QP_BIT_SRE                         = 1 << 15,
+       MLX5_QP_BIT_SWE                         = 1 << 14,
+       MLX5_QP_BIT_SAE                         = 1 << 13,
+       /* params2 */
+       MLX5_QP_BIT_RRE                         = 1 << 15,
+       MLX5_QP_BIT_RWE                         = 1 << 14,
+       MLX5_QP_BIT_RAE                         = 1 << 13,
+       MLX5_QP_BIT_RIC                         = 1 <<  4,
+};
+
+enum {
+       MLX5_WQE_CTRL_CQ_UPDATE         = 2 << 2,
+       MLX5_WQE_CTRL_SOLICITED         = 1 << 1,
+};
+
+enum {
+       MLX5_SEND_WQE_BB        = 64,
+};
+
+enum {
+       MLX5_WQE_FMR_PERM_LOCAL_READ    = 1 << 27,
+       MLX5_WQE_FMR_PERM_LOCAL_WRITE   = 1 << 28,
+       MLX5_WQE_FMR_PERM_REMOTE_READ   = 1 << 29,
+       MLX5_WQE_FMR_PERM_REMOTE_WRITE  = 1 << 30,
+       MLX5_WQE_FMR_PERM_ATOMIC        = 1 << 31
+};
+
+enum {
+       MLX5_FENCE_MODE_NONE                    = 0 << 5,
+       MLX5_FENCE_MODE_INITIATOR_SMALL         = 1 << 5,
+       MLX5_FENCE_MODE_STRONG_ORDERING         = 3 << 5,
+       MLX5_FENCE_MODE_SMALL_AND_FENCE         = 4 << 5,
+};
+
+enum {
+       MLX5_QP_LAT_SENSITIVE   = 1 << 28,
+       MLX5_QP_ENABLE_SIG      = 1 << 31,
+};
+
+enum {
+       MLX5_RCV_DBR    = 0,
+       MLX5_SND_DBR    = 1,
+};
+
+struct mlx5_wqe_fmr_seg {
+       __be32                  flags;
+       __be32                  mem_key;
+       __be64                  buf_list;
+       __be64                  start_addr;
+       __be64                  reg_len;
+       __be32                  offset;
+       __be32                  page_size;
+       u32                     reserved[2];
+};
+
+struct mlx5_wqe_ctrl_seg {
+       __be32                  opmod_idx_opcode;
+       __be32                  qpn_ds;
+       u8                      signature;
+       u8                      rsvd[2];
+       u8                      fm_ce_se;
+       __be32                  imm;
+};
+
+struct mlx5_wqe_xrc_seg {
+       __be32                  xrc_srqn;
+       u8                      rsvd[12];
+};
+
+struct mlx5_wqe_masked_atomic_seg {
+       __be64                  swap_add;
+       __be64                  compare;
+       __be64                  swap_add_mask;
+       __be64                  compare_mask;
+};
+
+struct mlx5_av {
+       union {
+               struct {
+                       __be32  qkey;
+                       __be32  reserved;
+               } qkey;
+               __be64  dc_key;
+       } key;
+       __be32  dqp_dct;
+       u8      stat_rate_sl;
+       u8      fl_mlid;
+       __be16  rlid;
+       u8      reserved0[10];
+       u8      tclass;
+       u8      hop_limit;
+       __be32  grh_gid_fl;
+       u8      rgid[16];
+};
+
+struct mlx5_wqe_datagram_seg {
+       struct mlx5_av  av;
+};
+
+struct mlx5_wqe_raddr_seg {
+       __be64                  raddr;
+       __be32                  rkey;
+       u32                     reserved;
+};
+
+struct mlx5_wqe_atomic_seg {
+       __be64                  swap_add;
+       __be64                  compare;
+};
+
+struct mlx5_wqe_data_seg {
+       __be32                  byte_count;
+       __be32                  lkey;
+       __be64                  addr;
+};
+
+struct mlx5_wqe_umr_ctrl_seg {
+       u8              flags;
+       u8              rsvd0[3];
+       __be16          klm_octowords;
+       __be16          bsf_octowords;
+       __be64          mkey_mask;
+       u8              rsvd1[32];
+};
+
+struct mlx5_seg_set_psv {
+       __be32          psv_num;
+       __be16          syndrome;
+       __be16          status;
+       __be32          transient_sig;
+       __be32          ref_tag;
+};
+
+struct mlx5_seg_get_psv {
+       u8              rsvd[19];
+       u8              num_psv;
+       __be32          l_key;
+       __be64          va;
+       __be32          psv_index[4];
+};
+
+struct mlx5_seg_check_psv {
+       u8              rsvd0[2];
+       __be16          err_coalescing_op;
+       u8              rsvd1[2];
+       __be16          xport_err_op;
+       u8              rsvd2[2];
+       __be16          xport_err_mask;
+       u8              rsvd3[7];
+       u8              num_psv;
+       __be32          l_key;
+       __be64          va;
+       __be32          psv_index[4];
+};
+
+struct mlx5_rwqe_sig {
+       u8      rsvd0[4];
+       u8      signature;
+       u8      rsvd1[11];
+};
+
+struct mlx5_wqe_signature_seg {
+       u8      rsvd0[4];
+       u8      signature;
+       u8      rsvd1[11];
+};
+
+struct mlx5_wqe_inline_seg {
+       __be32  byte_count;
+};
+
+struct mlx5_core_qp {
+       void (*event)           (struct mlx5_core_qp *, int);
+       int                     qpn;
+       atomic_t                refcount;
+       struct completion       free;
+       struct mlx5_rsc_debug   *dbg;
+       int                     pid;
+};
+
+struct mlx5_qp_path {
+       u8                      fl;
+       u8                      rsvd3;
+       u8                      free_ar;
+       u8                      pkey_index;
+       u8                      rsvd0;
+       u8                      grh_mlid;
+       __be16                  rlid;
+       u8                      ackto_lt;
+       u8                      mgid_index;
+       u8                      static_rate;
+       u8                      hop_limit;
+       __be32                  tclass_flowlabel;
+       u8                      rgid[16];
+       u8                      rsvd1[4];
+       u8                      sl;
+       u8                      port;
+       u8                      rsvd2[6];
+};
+
+struct mlx5_qp_context {
+       __be32                  flags;
+       __be32                  flags_pd;
+       u8                      mtu_msgmax;
+       u8                      rq_size_stride;
+       __be16                  sq_crq_size;
+       __be32                  qp_counter_set_usr_page;
+       __be32                  wire_qpn;
+       __be32                  log_pg_sz_remote_qpn;
+       struct                  mlx5_qp_path pri_path;
+       struct                  mlx5_qp_path alt_path;
+       __be32                  params1;
+       u8                      reserved2[4];
+       __be32                  next_send_psn;
+       __be32                  cqn_send;
+       u8                      reserved3[8];
+       __be32                  last_acked_psn;
+       __be32                  ssn;
+       __be32                  params2;
+       __be32                  rnr_nextrecvpsn;
+       __be32                  xrcd;
+       __be32                  cqn_recv;
+       __be64                  db_rec_addr;
+       __be32                  qkey;
+       __be32                  rq_type_srqn;
+       __be32                  rmsn;
+       __be16                  hw_sq_wqe_counter;
+       __be16                  sw_sq_wqe_counter;
+       __be16                  hw_rcyclic_byte_counter;
+       __be16                  hw_rq_counter;
+       __be16                  sw_rcyclic_byte_counter;
+       __be16                  sw_rq_counter;
+       u8                      rsvd0[5];
+       u8                      cgs;
+       u8                      cs_req;
+       u8                      cs_res;
+       __be64                  dc_access_key;
+       u8                      rsvd1[24];
+};
+
+struct mlx5_create_qp_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be32                  input_qpn;
+       u8                      rsvd0[4];
+       __be32                  opt_param_mask;
+       u8                      rsvd1[4];
+       struct mlx5_qp_context  ctx;
+       u8                      rsvd3[16];
+       __be64                  pas[0];
+};
+
+struct mlx5_create_qp_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       __be32                  qpn;
+       u8                      rsvd0[4];
+};
+
+struct mlx5_destroy_qp_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be32                  qpn;
+       u8                      rsvd0[4];
+};
+
+struct mlx5_destroy_qp_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd0[8];
+};
+
+struct mlx5_modify_qp_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be32                  qpn;
+       u8                      rsvd1[4];
+       __be32                  optparam;
+       u8                      rsvd0[4];
+       struct mlx5_qp_context  ctx;
+};
+
+struct mlx5_modify_qp_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd0[8];
+};
+
+struct mlx5_query_qp_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be32                  qpn;
+       u8                      rsvd[4];
+};
+
+struct mlx5_query_qp_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd1[8];
+       __be32                  optparam;
+       u8                      rsvd0[4];
+       struct mlx5_qp_context  ctx;
+       u8                      rsvd2[16];
+       __be64                  pas[0];
+};
+
+struct mlx5_conf_sqp_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be32                  qpn;
+       u8                      rsvd[3];
+       u8                      type;
+};
+
+struct mlx5_conf_sqp_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd[8];
+};
+
+struct mlx5_alloc_xrcd_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       u8                      rsvd[8];
+};
+
+struct mlx5_alloc_xrcd_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       __be32                  xrcdn;
+       u8                      rsvd[4];
+};
+
+struct mlx5_dealloc_xrcd_mbox_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be32                  xrcdn;
+       u8                      rsvd[4];
+};
+
+struct mlx5_dealloc_xrcd_mbox_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd[8];
+};
+
+static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u32 qpn)
+{
+       return radix_tree_lookup(&dev->priv.qp_table.tree, qpn);
+}
+
+int mlx5_core_create_qp(struct mlx5_core_dev *dev,
+                       struct mlx5_core_qp *qp,
+                       struct mlx5_create_qp_mbox_in *in,
+                       int inlen);
+int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state,
+                       enum mlx5_qp_state new_state,
+                       struct mlx5_modify_qp_mbox_in *in, int sqd_event,
+                       struct mlx5_core_qp *qp);
+int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
+                        struct mlx5_core_qp *qp);
+int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
+                      struct mlx5_query_qp_mbox_out *out, int outlen);
+
+int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn);
+int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn);
+void mlx5_init_qp_table(struct mlx5_core_dev *dev);
+void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev);
+int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
+void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
+
+#endif /* MLX5_QP_H */
diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h
new file mode 100644 (file)
index 0000000..e1a363a
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_SRQ_H
+#define MLX5_SRQ_H
+
+#include <linux/mlx5/driver.h>
+
+void mlx5_init_srq_table(struct mlx5_core_dev *dev);
+void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev);
+
+#endif /* MLX5_SRQ_H */