Merge branches 'cma', 'cxgb4', 'flowsteer', 'ipoib', 'misc', 'mlx4', 'mlx5', 'nes...
authorRoland Dreier <roland@purestorage.com>
Sun, 17 Nov 2013 16:22:19 +0000 (08:22 -0800)
committerRoland Dreier <roland@purestorage.com>
Sun, 17 Nov 2013 16:22:19 +0000 (08:22 -0800)
46 files changed:
drivers/infiniband/Kconfig
drivers/infiniband/core/cm.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/netlink.c
drivers/infiniband/core/sysfs.c
drivers/infiniband/core/ucma.c
drivers/infiniband/core/uverbs.h
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/cxgb4/device.c
drivers/infiniband/hw/ipath/ipath_user_sdma.c
drivers/infiniband/hw/mlx4/cq.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/mlx5/srq.c
drivers/infiniband/hw/nes/nes_verbs.c
drivers/infiniband/hw/ocrdma/ocrdma.h
drivers/infiniband/hw/ocrdma/ocrdma_hw.c
drivers/infiniband/hw/ocrdma/ocrdma_main.c
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
drivers/infiniband/hw/qib/qib_iba7322.c
drivers/infiniband/hw/qib/qib_mad.h
drivers/infiniband/hw/qib/qib_user_sdma.c
drivers/infiniband/hw/qib/qib_verbs.h
drivers/infiniband/ulp/ipoib/ipoib.h
drivers/infiniband/ulp/ipoib/ipoib_cm.c
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/ipoib/ipoib_multicast.c
drivers/infiniband/ulp/ipoib/ipoib_netlink.c
drivers/infiniband/ulp/ipoib/ipoib_vlan.c
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
drivers/net/ethernet/mellanox/mlx5/core/eq.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/mr.c
drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
include/linux/mlx5/device.h
include/linux/mlx5/driver.h
include/rdma/ib_verbs.h
include/uapi/rdma/ib_user_verbs.h

index b84791f..5ceda71 100644 (file)
@@ -31,17 +31,6 @@ config INFINIBAND_USER_ACCESS
          libibverbs, libibcm and a hardware driver library from
          <http://www.openfabrics.org/git/>.
 
-config INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
-       bool "Experimental and unstable ABI for userspace access to flow steering verbs"
-       depends on INFINIBAND_USER_ACCESS
-       depends on STAGING
-       ---help---
-         The final ABI for userspace access to flow steering verbs
-         has not been defined.  To use the current ABI, *WHICH WILL
-         CHANGE IN THE FUTURE*, say Y here.
-
-         If unsure, say N.
-
 config INFINIBAND_USER_MEM
        bool
        depends on INFINIBAND_USER_ACCESS != n
index 784b97c..f2ef7ef 100644 (file)
@@ -383,14 +383,11 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv)
 {
        unsigned long flags;
        int id;
-       static int next_id;
 
        idr_preload(GFP_KERNEL);
        spin_lock_irqsave(&cm.lock, flags);
 
-       id = idr_alloc(&cm.local_id_table, cm_id_priv, next_id, 0, GFP_NOWAIT);
-       if (id >= 0)
-               next_id = max(id + 1, 0);
+       id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT);
 
        spin_unlock_irqrestore(&cm.lock, flags);
        idr_preload_end();
index dab4b41..830c983 100644 (file)
@@ -328,28 +328,6 @@ static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
        return ret;
 }
 
-static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num)
-{
-       int i;
-       int err;
-       struct ib_port_attr props;
-       union ib_gid tmp;
-
-       err = ib_query_port(device, port_num, &props);
-       if (err)
-               return err;
-
-       for (i = 0; i < props.gid_tbl_len; ++i) {
-               err = ib_query_gid(device, port_num, i, &tmp);
-               if (err)
-                       return err;
-               if (!memcmp(&tmp, gid, sizeof tmp))
-                       return 0;
-       }
-
-       return -EADDRNOTAVAIL;
-}
-
 static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
 {
        dev_addr->dev_type = ARPHRD_INFINIBAND;
@@ -371,13 +349,14 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
        return ret;
 }
 
-static int cma_acquire_dev(struct rdma_id_private *id_priv)
+static int cma_acquire_dev(struct rdma_id_private *id_priv,
+                          struct rdma_id_private *listen_id_priv)
 {
        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
        struct cma_device *cma_dev;
        union ib_gid gid, iboe_gid;
        int ret = -ENODEV;
-       u8 port;
+       u8 port, found_port;
        enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
                IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
 
@@ -389,17 +368,39 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
        iboe_addr_get_sgid(dev_addr, &iboe_gid);
        memcpy(&gid, dev_addr->src_dev_addr +
               rdma_addr_gid_offset(dev_addr), sizeof gid);
+       if (listen_id_priv &&
+           rdma_port_get_link_layer(listen_id_priv->id.device,
+                                    listen_id_priv->id.port_num) == dev_ll) {
+               cma_dev = listen_id_priv->cma_dev;
+               port = listen_id_priv->id.port_num;
+               if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
+                   rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
+                       ret = ib_find_cached_gid(cma_dev->device, &iboe_gid,
+                                                &found_port, NULL);
+               else
+                       ret = ib_find_cached_gid(cma_dev->device, &gid,
+                                                &found_port, NULL);
+
+               if (!ret && (port  == found_port)) {
+                       id_priv->id.port_num = found_port;
+                       goto out;
+               }
+       }
        list_for_each_entry(cma_dev, &dev_list, list) {
                for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
+                       if (listen_id_priv &&
+                           listen_id_priv->cma_dev == cma_dev &&
+                           listen_id_priv->id.port_num == port)
+                               continue;
                        if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) {
                                if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
                                    rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
-                                       ret = find_gid_port(cma_dev->device, &iboe_gid, port);
+                                       ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, &found_port, NULL);
                                else
-                                       ret = find_gid_port(cma_dev->device, &gid, port);
+                                       ret = ib_find_cached_gid(cma_dev->device, &gid, &found_port, NULL);
 
-                               if (!ret) {
-                                       id_priv->id.port_num = port;
+                               if (!ret && (port == found_port)) {
+                                       id_priv->id.port_num = found_port;
                                        goto out;
                                }
                        }
@@ -1292,7 +1293,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
        }
 
        mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
-       ret = cma_acquire_dev(conn_id);
+       ret = cma_acquire_dev(conn_id, listen_id);
        if (ret)
                goto err2;
 
@@ -1451,7 +1452,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 {
        struct rdma_cm_id *new_cm_id;
        struct rdma_id_private *listen_id, *conn_id;
-       struct net_device *dev = NULL;
        struct rdma_cm_event event;
        int ret;
        struct ib_device_attr attr;
@@ -1481,7 +1481,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
                goto out;
        }
 
-       ret = cma_acquire_dev(conn_id);
+       ret = cma_acquire_dev(conn_id, listen_id);
        if (ret) {
                mutex_unlock(&conn_id->handler_mutex);
                rdma_destroy_id(new_cm_id);
@@ -1529,8 +1529,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
        cma_deref_id(conn_id);
 
 out:
-       if (dev)
-               dev_put(dev);
        mutex_unlock(&listen_id->handler_mutex);
        return ret;
 }
@@ -2050,7 +2048,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
                goto out;
 
        if (!status && !id_priv->cma_dev)
-               status = cma_acquire_dev(id_priv);
+               status = cma_acquire_dev(id_priv, NULL);
 
        if (status) {
                if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
@@ -2547,7 +2545,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
                if (ret)
                        goto err1;
 
-               ret = cma_acquire_dev(id_priv);
+               ret = cma_acquire_dev(id_priv, NULL);
                if (ret)
                        goto err1;
        }
index da06abd..a1e9cba 100644 (file)
@@ -148,7 +148,7 @@ static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
        list_for_each_entry(client, &client_list, list) {
                if (client->index == index) {
                        if (op < 0 || op >= client->nops ||
-                           !client->cb_table[RDMA_NL_GET_OP(op)].dump)
+                           !client->cb_table[op].dump)
                                return -EINVAL;
 
                        {
index cde1e7b..faad2ca 100644 (file)
@@ -612,6 +612,7 @@ static ssize_t show_node_type(struct device *device,
        switch (dev->node_type) {
        case RDMA_NODE_IB_CA:     return sprintf(buf, "%d: CA\n", dev->node_type);
        case RDMA_NODE_RNIC:      return sprintf(buf, "%d: RNIC\n", dev->node_type);
+       case RDMA_NODE_USNIC:     return sprintf(buf, "%d: usNIC\n", dev->node_type);
        case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
        case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
        default:                  return sprintf(buf, "%d: <unknown>\n", dev->node_type);
index b0f189b..ab8b1c3 100644 (file)
@@ -57,7 +57,7 @@ MODULE_LICENSE("Dual BSD/GPL");
 static unsigned int max_backlog = 1024;
 
 static struct ctl_table_header *ucma_ctl_table_hdr;
-static ctl_table ucma_ctl_table[] = {
+static struct ctl_table ucma_ctl_table[] = {
        {
                .procname       = "max_backlog",
                .data           = &max_backlog,
@@ -271,7 +271,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
                        goto out;
                }
                ctx->backlog--;
-       } else if (!ctx->uid) {
+       } else if (!ctx->uid || ctx->cm_id != cm_id) {
                /*
                 * We ignore events for new connections until userspace has set
                 * their context.  This can only happen if an error occurs on a
index d8f9c6c..bdc842e 100644 (file)
 #include <rdma/ib_umem.h>
 #include <rdma/ib_user_verbs.h>
 
+#define INIT_UDATA(udata, ibuf, obuf, ilen, olen)                      \
+       do {                                                            \
+               (udata)->inbuf  = (void __user *) (ibuf);               \
+               (udata)->outbuf = (void __user *) (obuf);               \
+               (udata)->inlen  = (ilen);                               \
+               (udata)->outlen = (olen);                               \
+       } while (0)
+
 /*
  * Our lifetime rules for these structs are the following:
  *
@@ -178,6 +186,22 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler,
                             struct ib_event *event);
 void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
 
+struct ib_uverbs_flow_spec {
+       union {
+               union {
+                       struct ib_uverbs_flow_spec_hdr hdr;
+                       struct {
+                               __u32 type;
+                               __u16 size;
+                               __u16 reserved;
+                       };
+               };
+               struct ib_uverbs_flow_spec_eth     eth;
+               struct ib_uverbs_flow_spec_ipv4    ipv4;
+               struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
+       };
+};
+
 #define IB_UVERBS_DECLARE_CMD(name)                                    \
        ssize_t ib_uverbs_##name(struct ib_uverbs_file *file,           \
                                 const char __user *buf, int in_len,    \
@@ -217,9 +241,13 @@ IB_UVERBS_DECLARE_CMD(destroy_srq);
 IB_UVERBS_DECLARE_CMD(create_xsrq);
 IB_UVERBS_DECLARE_CMD(open_xrcd);
 IB_UVERBS_DECLARE_CMD(close_xrcd);
-#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
-IB_UVERBS_DECLARE_CMD(create_flow);
-IB_UVERBS_DECLARE_CMD(destroy_flow);
-#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
+
+#define IB_UVERBS_DECLARE_EX_CMD(name)                         \
+       int ib_uverbs_ex_##name(struct ib_uverbs_file *file,    \
+                               struct ib_udata *ucore,         \
+                               struct ib_udata *uhw)
+
+IB_UVERBS_DECLARE_EX_CMD(create_flow);
+IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
 
 #endif /* UVERBS_H */
index 2f0f01b..65f6e7d 100644 (file)
@@ -54,17 +54,7 @@ static struct uverbs_lock_class qp_lock_class        = { .name = "QP-uobj" };
 static struct uverbs_lock_class ah_lock_class  = { .name = "AH-uobj" };
 static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" };
 static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
-#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
 static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
-#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
-
-#define INIT_UDATA(udata, ibuf, obuf, ilen, olen)                      \
-       do {                                                            \
-               (udata)->inbuf  = (void __user *) (ibuf);               \
-               (udata)->outbuf = (void __user *) (obuf);               \
-               (udata)->inlen  = (ilen);                               \
-               (udata)->outlen = (olen);                               \
-       } while (0)
 
 /*
  * The ib_uobject locking scheme is as follows:
@@ -939,13 +929,9 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
        if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
                return -EINVAL;
 
-       /*
-        * Local write permission is required if remote write or
-        * remote atomic permission is also requested.
-        */
-       if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
-           !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
-               return -EINVAL;
+       ret = ib_check_mr_access(cmd.access_flags);
+       if (ret)
+               return ret;
 
        uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
        if (!uobj)
@@ -2128,6 +2114,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
                        }
                        next->wr.ud.remote_qpn  = user_wr->wr.ud.remote_qpn;
                        next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
+                       if (next->opcode == IB_WR_SEND_WITH_IMM)
+                               next->ex.imm_data =
+                                       (__be32 __force) user_wr->ex.imm_data;
                } else {
                        switch (next->opcode) {
                        case IB_WR_RDMA_WRITE_WITH_IMM:
@@ -2601,8 +2590,7 @@ out_put:
        return ret ? ret : in_len;
 }
 
-#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
-static int kern_spec_to_ib_spec(struct ib_kern_spec *kern_spec,
+static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
                                union ib_flow_spec *ib_spec)
 {
        ib_spec->type = kern_spec->type;
@@ -2642,28 +2630,31 @@ static int kern_spec_to_ib_spec(struct ib_kern_spec *kern_spec,
        return 0;
 }
 
-ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
-                             const char __user *buf, int in_len,
-                             int out_len)
+int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
+                            struct ib_udata *ucore,
+                            struct ib_udata *uhw)
 {
        struct ib_uverbs_create_flow      cmd;
        struct ib_uverbs_create_flow_resp resp;
        struct ib_uobject                 *uobj;
        struct ib_flow                    *flow_id;
-       struct ib_kern_flow_attr          *kern_flow_attr;
+       struct ib_uverbs_flow_attr        *kern_flow_attr;
        struct ib_flow_attr               *flow_attr;
        struct ib_qp                      *qp;
        int err = 0;
        void *kern_spec;
        void *ib_spec;
        int i;
-       int kern_attr_size;
 
-       if (out_len < sizeof(resp))
+       if (ucore->outlen < sizeof(resp))
                return -ENOSPC;
 
-       if (copy_from_user(&cmd, buf, sizeof(cmd)))
-               return -EFAULT;
+       err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+       if (err)
+               return err;
+
+       ucore->inbuf += sizeof(cmd);
+       ucore->inlen -= sizeof(cmd);
 
        if (cmd.comp_mask)
                return -EINVAL;
@@ -2672,32 +2663,27 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
             !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW))
                return -EPERM;
 
-       if (cmd.flow_attr.num_of_specs < 0 ||
-           cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
+       if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
                return -EINVAL;
 
-       kern_attr_size = cmd.flow_attr.size - sizeof(cmd) -
-                        sizeof(struct ib_uverbs_cmd_hdr_ex);
-
-       if (cmd.flow_attr.size < 0 || cmd.flow_attr.size > in_len ||
-           kern_attr_size < 0 || kern_attr_size >
-           (cmd.flow_attr.num_of_specs * sizeof(struct ib_kern_spec)))
+       if (cmd.flow_attr.size > ucore->inlen ||
+           cmd.flow_attr.size >
+           (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec)))
                return -EINVAL;
 
        if (cmd.flow_attr.num_of_specs) {
-               kern_flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL);
+               kern_flow_attr = kmalloc(sizeof(*kern_flow_attr) + cmd.flow_attr.size,
+                                        GFP_KERNEL);
                if (!kern_flow_attr)
                        return -ENOMEM;
 
                memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr));
-               if (copy_from_user(kern_flow_attr + 1, buf + sizeof(cmd),
-                                  kern_attr_size)) {
-                       err = -EFAULT;
+               err = ib_copy_from_udata(kern_flow_attr + 1, ucore,
+                                        cmd.flow_attr.size);
+               if (err)
                        goto err_free_attr;
-               }
        } else {
                kern_flow_attr = &cmd.flow_attr;
-               kern_attr_size = sizeof(cmd.flow_attr);
        }
 
        uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
@@ -2714,7 +2700,7 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
                goto err_uobj;
        }
 
-       flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL);
+       flow_attr = kmalloc(sizeof(*flow_attr) + cmd.flow_attr.size, GFP_KERNEL);
        if (!flow_attr) {
                err = -ENOMEM;
                goto err_put;
@@ -2729,19 +2715,22 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
 
        kern_spec = kern_flow_attr + 1;
        ib_spec = flow_attr + 1;
-       for (i = 0; i < flow_attr->num_of_specs && kern_attr_size > 0; i++) {
+       for (i = 0; i < flow_attr->num_of_specs &&
+            cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) &&
+            cmd.flow_attr.size >=
+            ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {
                err = kern_spec_to_ib_spec(kern_spec, ib_spec);
                if (err)
                        goto err_free;
                flow_attr->size +=
                        ((union ib_flow_spec *) ib_spec)->size;
-               kern_attr_size -= ((struct ib_kern_spec *) kern_spec)->size;
-               kern_spec += ((struct ib_kern_spec *) kern_spec)->size;
+               cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size;
+               kern_spec += ((struct ib_uverbs_flow_spec *) kern_spec)->size;
                ib_spec += ((union ib_flow_spec *) ib_spec)->size;
        }
-       if (kern_attr_size) {
-               pr_warn("create flow failed, %d bytes left from uverb cmd\n",
-                       kern_attr_size);
+       if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) {
+               pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n",
+                       i, cmd.flow_attr.size);
                goto err_free;
        }
        flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
@@ -2760,11 +2749,10 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
        memset(&resp, 0, sizeof(resp));
        resp.flow_handle = uobj->id;
 
-       if (copy_to_user((void __user *)(unsigned long) cmd.response,
-                        &resp, sizeof(resp))) {
-               err = -EFAULT;
+       err = ib_copy_to_udata(ucore,
+                              &resp, sizeof(resp));
+       if (err)
                goto err_copy;
-       }
 
        put_qp_read(qp);
        mutex_lock(&file->mutex);
@@ -2777,7 +2765,7 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
        kfree(flow_attr);
        if (cmd.flow_attr.num_of_specs)
                kfree(kern_flow_attr);
-       return in_len;
+       return 0;
 err_copy:
        idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
 destroy_flow:
@@ -2794,16 +2782,18 @@ err_free_attr:
        return err;
 }
 
-ssize_t ib_uverbs_destroy_flow(struct ib_uverbs_file *file,
-                              const char __user *buf, int in_len,
-                              int out_len) {
+int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
+                             struct ib_udata *ucore,
+                             struct ib_udata *uhw)
+{
        struct ib_uverbs_destroy_flow   cmd;
        struct ib_flow                  *flow_id;
        struct ib_uobject               *uobj;
        int                             ret;
 
-       if (copy_from_user(&cmd, buf, sizeof(cmd)))
-               return -EFAULT;
+       ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+       if (ret)
+               return ret;
 
        uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,
                              file->ucontext);
@@ -2825,9 +2815,8 @@ ssize_t ib_uverbs_destroy_flow(struct ib_uverbs_file *file,
 
        put_uobj(uobj);
 
-       return ret ? ret : in_len;
+       return ret;
 }
-#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
 
 static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
                                struct ib_uverbs_create_xsrq *cmd,
index 2df31f6..3438694 100644 (file)
@@ -115,10 +115,13 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
        [IB_USER_VERBS_CMD_CLOSE_XRCD]          = ib_uverbs_close_xrcd,
        [IB_USER_VERBS_CMD_CREATE_XSRQ]         = ib_uverbs_create_xsrq,
        [IB_USER_VERBS_CMD_OPEN_QP]             = ib_uverbs_open_qp,
-#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
-       [IB_USER_VERBS_CMD_CREATE_FLOW]         = ib_uverbs_create_flow,
-       [IB_USER_VERBS_CMD_DESTROY_FLOW]        = ib_uverbs_destroy_flow
-#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
+};
+
+static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
+                                   struct ib_udata *ucore,
+                                   struct ib_udata *uhw) = {
+       [IB_USER_VERBS_EX_CMD_CREATE_FLOW]      = ib_uverbs_ex_create_flow,
+       [IB_USER_VERBS_EX_CMD_DESTROY_FLOW]     = ib_uverbs_ex_destroy_flow
 };
 
 static void ib_uverbs_add_one(struct ib_device *device);
@@ -589,6 +592,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 {
        struct ib_uverbs_file *file = filp->private_data;
        struct ib_uverbs_cmd_hdr hdr;
+       __u32 flags;
 
        if (count < sizeof hdr)
                return -EINVAL;
@@ -596,45 +600,105 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
        if (copy_from_user(&hdr, buf, sizeof hdr))
                return -EFAULT;
 
-       if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
-           !uverbs_cmd_table[hdr.command])
-               return -EINVAL;
+       flags = (hdr.command &
+                IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
 
-       if (!file->ucontext &&
-           hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT)
-               return -EINVAL;
+       if (!flags) {
+               __u32 command;
 
-       if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
-               return -ENOSYS;
+               if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
+                                          IB_USER_VERBS_CMD_COMMAND_MASK))
+                       return -EINVAL;
 
-#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
-       if (hdr.command >= IB_USER_VERBS_CMD_THRESHOLD) {
-               struct ib_uverbs_cmd_hdr_ex hdr_ex;
+               command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
 
-               if (copy_from_user(&hdr_ex, buf, sizeof(hdr_ex)))
-                       return -EFAULT;
+               if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
+                   !uverbs_cmd_table[command])
+                       return -EINVAL;
 
-               if (((hdr_ex.in_words + hdr_ex.provider_in_words) * 4) != count)
+               if (!file->ucontext &&
+                   command != IB_USER_VERBS_CMD_GET_CONTEXT)
                        return -EINVAL;
 
-               return uverbs_cmd_table[hdr.command](file,
-                                                    buf + sizeof(hdr_ex),
-                                                    (hdr_ex.in_words +
-                                                     hdr_ex.provider_in_words) * 4,
-                                                    (hdr_ex.out_words +
-                                                     hdr_ex.provider_out_words) * 4);
-       } else {
-#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
+               if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << command)))
+                       return -ENOSYS;
+
                if (hdr.in_words * 4 != count)
                        return -EINVAL;
 
-               return uverbs_cmd_table[hdr.command](file,
-                                                    buf + sizeof(hdr),
-                                                    hdr.in_words * 4,
-                                                    hdr.out_words * 4);
-#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
+               return uverbs_cmd_table[command](file,
+                                                buf + sizeof(hdr),
+                                                hdr.in_words * 4,
+                                                hdr.out_words * 4);
+
+       } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
+               __u32 command;
+
+               struct ib_uverbs_ex_cmd_hdr ex_hdr;
+               struct ib_udata ucore;
+               struct ib_udata uhw;
+               int err;
+               size_t written_count = count;
+
+               if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
+                                          IB_USER_VERBS_CMD_COMMAND_MASK))
+                       return -EINVAL;
+
+               command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
+
+               if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
+                   !uverbs_ex_cmd_table[command])
+                       return -ENOSYS;
+
+               if (!file->ucontext)
+                       return -EINVAL;
+
+               if (!(file->device->ib_dev->uverbs_ex_cmd_mask & (1ull << command)))
+                       return -ENOSYS;
+
+               if (count < (sizeof(hdr) + sizeof(ex_hdr)))
+                       return -EINVAL;
+
+               if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
+                       return -EFAULT;
+
+               count -= sizeof(hdr) + sizeof(ex_hdr);
+               buf += sizeof(hdr) + sizeof(ex_hdr);
+
+               if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count)
+                       return -EINVAL;
+
+               if (ex_hdr.response) {
+                       if (!hdr.out_words && !ex_hdr.provider_out_words)
+                               return -EINVAL;
+               } else {
+                       if (hdr.out_words || ex_hdr.provider_out_words)
+                               return -EINVAL;
+               }
+
+               INIT_UDATA(&ucore,
+                          (hdr.in_words) ? buf : 0,
+                          (unsigned long)ex_hdr.response,
+                          hdr.in_words * 8,
+                          hdr.out_words * 8);
+
+               INIT_UDATA(&uhw,
+                          (ex_hdr.provider_in_words) ? buf + ucore.inlen : 0,
+                          (ex_hdr.provider_out_words) ? (unsigned long)ex_hdr.response + ucore.outlen : 0,
+                          ex_hdr.provider_in_words * 8,
+                          ex_hdr.provider_out_words * 8);
+
+               err = uverbs_ex_cmd_table[command](file,
+                                                  &ucore,
+                                                  &uhw);
+
+               if (err)
+                       return err;
+
+               return written_count;
        }
-#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
+
+       return -ENOSYS;
 }
 
 static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
index a321df2..d4f6ddf 100644 (file)
@@ -114,6 +114,8 @@ rdma_node_get_transport(enum rdma_node_type node_type)
                return RDMA_TRANSPORT_IB;
        case RDMA_NODE_RNIC:
                return RDMA_TRANSPORT_IWARP;
+       case RDMA_NODE_USNIC:
+               return RDMA_TRANSPORT_USNIC;
        default:
                BUG();
                return 0;
@@ -130,6 +132,7 @@ enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_
        case RDMA_TRANSPORT_IB:
                return IB_LINK_LAYER_INFINIBAND;
        case RDMA_TRANSPORT_IWARP:
+       case RDMA_TRANSPORT_USNIC:
                return IB_LINK_LAYER_ETHERNET;
        default:
                return IB_LINK_LAYER_UNSPECIFIED;
@@ -958,6 +961,11 @@ EXPORT_SYMBOL(ib_resize_cq);
 struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
 {
        struct ib_mr *mr;
+       int err;
+
+       err = ib_check_mr_access(mr_access_flags);
+       if (err)
+               return ERR_PTR(err);
 
        mr = pd->device->get_dma_mr(pd, mr_access_flags);
 
@@ -980,6 +988,11 @@ struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
                             u64 *iova_start)
 {
        struct ib_mr *mr;
+       int err;
+
+       err = ib_check_mr_access(mr_access_flags);
+       if (err)
+               return ERR_PTR(err);
 
        if (!pd->device->reg_phys_mr)
                return ERR_PTR(-ENOSYS);
@@ -1010,6 +1023,10 @@ int ib_rereg_phys_mr(struct ib_mr *mr,
        struct ib_pd *old_pd;
        int ret;
 
+       ret = ib_check_mr_access(mr_access_flags);
+       if (ret)
+               return ret;
+
        if (!mr->device->rereg_phys_mr)
                return -ENOSYS;
 
index 33d2cc6..4a03385 100644 (file)
@@ -602,10 +602,10 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
             rdev->lldi.vr->qp.size,
             rdev->lldi.vr->cq.start,
             rdev->lldi.vr->cq.size);
-       PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p qpshift %lu "
+       PDBG("udb len 0x%x udb base %llx db_reg %p gts_reg %p qpshift %lu "
             "qpmask 0x%x cqshift %lu cqmask 0x%x\n",
             (unsigned)pci_resource_len(rdev->lldi.pdev, 2),
-            (void *)(unsigned long)pci_resource_start(rdev->lldi.pdev, 2),
+            (u64)pci_resource_start(rdev->lldi.pdev, 2),
             rdev->lldi.db_reg,
             rdev->lldi.gts_reg,
             rdev->qpshift, rdev->qpmask,
index f5cb13b..cc04b7b 100644 (file)
@@ -280,9 +280,7 @@ static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd,
        int j;
        int ret;
 
-       ret = get_user_pages(current, current->mm, addr,
-                            npages, 0, 1, pages, NULL);
-
+       ret = get_user_pages_fast(addr, npages, 0, pages);
        if (ret != npages) {
                int i;
 
@@ -811,10 +809,7 @@ int ipath_user_sdma_writev(struct ipath_devdata *dd,
        while (dim) {
                const int mxp = 8;
 
-               down_write(&current->mm->mmap_sem);
                ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
-               up_write(&current->mm->mmap_sem);
-
                if (ret <= 0)
                        goto done_unlock;
                else {
index d5e60f4..66dbf80 100644 (file)
@@ -324,7 +324,7 @@ static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq)
        u32 i;
 
        i = cq->mcq.cons_index;
-       while (get_sw_cqe(cq, i & cq->ibcq.cqe))
+       while (get_sw_cqe(cq, i))
                ++i;
 
        return i - cq->mcq.cons_index;
@@ -365,7 +365,7 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 
        mutex_lock(&cq->resize_mutex);
 
-       if (entries < 1 || entries > dev->dev->caps.max_cqes) {
+       if (entries < 1) {
                err = -EINVAL;
                goto out;
        }
@@ -376,6 +376,11 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
                goto out;
        }
 
+       if (entries > dev->dev->caps.max_cqes) {
+               err = -EINVAL;
+               goto out;
+       }
+
        if (ibcq->uobject) {
                err = mlx4_alloc_resize_umem(dev, cq, entries, udata);
                if (err)
index f061264..1aad9b3 100644 (file)
@@ -1691,11 +1691,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
                ibdev->ib_dev.create_flow       = mlx4_ib_create_flow;
                ibdev->ib_dev.destroy_flow      = mlx4_ib_destroy_flow;
 
-#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
-               ibdev->ib_dev.uverbs_cmd_mask   |=
-                       (1ull << IB_USER_VERBS_CMD_CREATE_FLOW) |
-                       (1ull << IB_USER_VERBS_CMD_DESTROY_FLOW);
-#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
+               ibdev->ib_dev.uverbs_ex_cmd_mask        |=
+                       (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
+                       (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
        }
 
        mlx4_ib_alloc_eqs(dev, ibdev);
index 344ab03..b726274 100644 (file)
@@ -556,7 +556,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
                goto err_db;
        }
        mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0);
-       (*cqb)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
+       (*cqb)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
 
        *index = to_mucontext(context)->uuari.uars[0].index;
 
@@ -620,7 +620,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
        }
        mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
 
-       (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - PAGE_SHIFT;
+       (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
        *index = dev->mdev.priv.uuari.uars[0].index;
 
        return 0;
@@ -653,8 +653,11 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
        int eqn;
        int err;
 
+       if (entries < 0)
+               return ERR_PTR(-EINVAL);
+
        entries = roundup_pow_of_two(entries + 1);
-       if (entries < 1 || entries > dev->mdev.caps.max_cqes)
+       if (entries > dev->mdev.caps.max_cqes)
                return ERR_PTR(-EINVAL);
 
        cq = kzalloc(sizeof(*cq), GFP_KERNEL);
@@ -747,17 +750,9 @@ int mlx5_ib_destroy_cq(struct ib_cq *cq)
        return 0;
 }
 
-static int is_equal_rsn(struct mlx5_cqe64 *cqe64, struct mlx5_ib_srq *srq,
-                       u32 rsn)
+static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)
 {
-       u32 lrsn;
-
-       if (srq)
-               lrsn = be32_to_cpu(cqe64->srqn) & 0xffffff;
-       else
-               lrsn = be32_to_cpu(cqe64->sop_drop_qpn) & 0xffffff;
-
-       return rsn == lrsn;
+       return rsn == (ntohl(cqe64->sop_drop_qpn) & 0xffffff);
 }
 
 void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
@@ -787,8 +782,8 @@ void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
        while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
                cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
                cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
-               if (is_equal_rsn(cqe64, srq, rsn)) {
-                       if (srq)
+               if (is_equal_rsn(cqe64, rsn)) {
+                       if (srq && (ntohl(cqe64->srqn) & 0xffffff))
                                mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter));
                        ++nfreed;
                } else if (nfreed) {
index b1a6cb3..3065341 100644 (file)
@@ -745,7 +745,8 @@ static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
        seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
        seg->start_addr = 0;
 
-       err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in));
+       err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in),
+                                   NULL, NULL, NULL);
        if (err) {
                mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
                goto err_in;
index 836be91..4c134d9 100644 (file)
@@ -262,6 +262,9 @@ struct mlx5_ib_mr {
        int                     npages;
        struct completion       done;
        enum ib_wc_status       status;
+       struct mlx5_ib_dev     *dev;
+       struct mlx5_create_mkey_mbox_out out;
+       unsigned long           start;
 };
 
 struct mlx5_ib_fast_reg_page_list {
@@ -323,6 +326,7 @@ struct mlx5_cache_ent {
        struct mlx5_ib_dev     *dev;
        struct work_struct      work;
        struct delayed_work     dwork;
+       int                     pending;
 };
 
 struct mlx5_mr_cache {
@@ -358,6 +362,8 @@ struct mlx5_ib_dev {
        spinlock_t                      mr_lock;
        struct mlx5_ib_resources        devr;
        struct mlx5_mr_cache            cache;
+       struct timer_list               delay_timer;
+       int                             fill_delay;
 };
 
 static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
index 3453580..039c3e4 100644 (file)
 #include <linux/random.h>
 #include <linux/debugfs.h>
 #include <linux/export.h>
+#include <linux/delay.h>
 #include <rdma/ib_umem.h>
 #include "mlx5_ib.h"
 
 enum {
-       DEF_CACHE_SIZE  = 10,
+       MAX_PENDING_REG_MR = 8,
 };
 
 enum {
@@ -63,6 +64,51 @@ static int order2idx(struct mlx5_ib_dev *dev, int order)
                return order - cache->ent[0].order;
 }
 
+static void reg_mr_callback(int status, void *context)
+{
+       struct mlx5_ib_mr *mr = context;
+       struct mlx5_ib_dev *dev = mr->dev;
+       struct mlx5_mr_cache *cache = &dev->cache;
+       int c = order2idx(dev, mr->order);
+       struct mlx5_cache_ent *ent = &cache->ent[c];
+       u8 key;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ent->lock, flags);
+       ent->pending--;
+       spin_unlock_irqrestore(&ent->lock, flags);
+       if (status) {
+               mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
+               kfree(mr);
+               dev->fill_delay = 1;
+               mod_timer(&dev->delay_timer, jiffies + HZ);
+               return;
+       }
+
+       if (mr->out.hdr.status) {
+               mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
+                            mr->out.hdr.status,
+                            be32_to_cpu(mr->out.hdr.syndrome));
+               kfree(mr);
+               dev->fill_delay = 1;
+               mod_timer(&dev->delay_timer, jiffies + HZ);
+               return;
+       }
+
+       spin_lock_irqsave(&dev->mdev.priv.mkey_lock, flags);
+       key = dev->mdev.priv.mkey_key++;
+       spin_unlock_irqrestore(&dev->mdev.priv.mkey_lock, flags);
+       mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
+
+       cache->last_add = jiffies;
+
+       spin_lock_irqsave(&ent->lock, flags);
+       list_add_tail(&mr->list, &ent->head);
+       ent->cur++;
+       ent->size++;
+       spin_unlock_irqrestore(&ent->lock, flags);
+}
+
 static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 {
        struct mlx5_mr_cache *cache = &dev->cache;
@@ -78,36 +124,39 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
                return -ENOMEM;
 
        for (i = 0; i < num; i++) {
+               if (ent->pending >= MAX_PENDING_REG_MR) {
+                       err = -EAGAIN;
+                       break;
+               }
+
                mr = kzalloc(sizeof(*mr), GFP_KERNEL);
                if (!mr) {
                        err = -ENOMEM;
-                       goto out;
+                       break;
                }
                mr->order = ent->order;
                mr->umred = 1;
+               mr->dev = dev;
                in->seg.status = 1 << 6;
                in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
                in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
                in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
                in->seg.log2_page_size = 12;
 
+               spin_lock_irq(&ent->lock);
+               ent->pending++;
+               spin_unlock_irq(&ent->lock);
+               mr->start = jiffies;
                err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
-                                           sizeof(*in));
+                                           sizeof(*in), reg_mr_callback,
+                                           mr, &mr->out);
                if (err) {
                        mlx5_ib_warn(dev, "create mkey failed %d\n", err);
                        kfree(mr);
-                       goto out;
+                       break;
                }
-               cache->last_add = jiffies;
-
-               spin_lock(&ent->lock);
-               list_add_tail(&mr->list, &ent->head);
-               ent->cur++;
-               ent->size++;
-               spin_unlock(&ent->lock);
        }
 
-out:
        kfree(in);
        return err;
 }
@@ -121,16 +170,16 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
        int i;
 
        for (i = 0; i < num; i++) {
-               spin_lock(&ent->lock);
+               spin_lock_irq(&ent->lock);
                if (list_empty(&ent->head)) {
-                       spin_unlock(&ent->lock);
+                       spin_unlock_irq(&ent->lock);
                        return;
                }
                mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
                list_del(&mr->list);
                ent->cur--;
                ent->size--;
-               spin_unlock(&ent->lock);
+               spin_unlock_irq(&ent->lock);
                err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
                if (err)
                        mlx5_ib_warn(dev, "failed destroy mkey\n");
@@ -162,9 +211,13 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
                return -EINVAL;
 
        if (var > ent->size) {
-               err = add_keys(dev, c, var - ent->size);
-               if (err)
-                       return err;
+               do {
+                       err = add_keys(dev, c, var - ent->size);
+                       if (err && err != -EAGAIN)
+                               return err;
+
+                       usleep_range(3000, 5000);
+               } while (err);
        } else if (var < ent->size) {
                remove_keys(dev, c, ent->size - var);
        }
@@ -280,23 +333,37 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
        struct mlx5_ib_dev *dev = ent->dev;
        struct mlx5_mr_cache *cache = &dev->cache;
        int i = order2idx(dev, ent->order);
+       int err;
 
        if (cache->stopped)
                return;
 
        ent = &dev->cache.ent[i];
-       if (ent->cur < 2 * ent->limit) {
-               add_keys(dev, i, 1);
-               if (ent->cur < 2 * ent->limit)
-                       queue_work(cache->wq, &ent->work);
+       if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
+               err = add_keys(dev, i, 1);
+               if (ent->cur < 2 * ent->limit) {
+                       if (err == -EAGAIN) {
+                               mlx5_ib_dbg(dev, "returned eagain, order %d\n",
+                                           i + 2);
+                               queue_delayed_work(cache->wq, &ent->dwork,
+                                                  msecs_to_jiffies(3));
+                       } else if (err) {
+                               mlx5_ib_warn(dev, "command failed order %d, err %d\n",
+                                            i + 2, err);
+                               queue_delayed_work(cache->wq, &ent->dwork,
+                                                  msecs_to_jiffies(1000));
+                       } else {
+                               queue_work(cache->wq, &ent->work);
+                       }
+               }
        } else if (ent->cur > 2 * ent->limit) {
                if (!someone_adding(cache) &&
-                   time_after(jiffies, cache->last_add + 60 * HZ)) {
+                   time_after(jiffies, cache->last_add + 300 * HZ)) {
                        remove_keys(dev, i, 1);
                        if (ent->cur > ent->limit)
                                queue_work(cache->wq, &ent->work);
                } else {
-                       queue_delayed_work(cache->wq, &ent->dwork, 60 * HZ);
+                       queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
                }
        }
 }
@@ -336,18 +403,18 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
 
                mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
 
-               spin_lock(&ent->lock);
+               spin_lock_irq(&ent->lock);
                if (!list_empty(&ent->head)) {
                        mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
                                              list);
                        list_del(&mr->list);
                        ent->cur--;
-                       spin_unlock(&ent->lock);
+                       spin_unlock_irq(&ent->lock);
                        if (ent->cur < ent->limit)
                                queue_work(cache->wq, &ent->work);
                        break;
                }
-               spin_unlock(&ent->lock);
+               spin_unlock_irq(&ent->lock);
 
                queue_work(cache->wq, &ent->work);
 
@@ -374,12 +441,12 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
                return;
        }
        ent = &cache->ent[c];
-       spin_lock(&ent->lock);
+       spin_lock_irq(&ent->lock);
        list_add_tail(&mr->list, &ent->head);
        ent->cur++;
        if (ent->cur > 2 * ent->limit)
                shrink = 1;
-       spin_unlock(&ent->lock);
+       spin_unlock_irq(&ent->lock);
 
        if (shrink)
                queue_work(cache->wq, &ent->work);
@@ -394,16 +461,16 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
 
        cancel_delayed_work(&ent->dwork);
        while (1) {
-               spin_lock(&ent->lock);
+               spin_lock_irq(&ent->lock);
                if (list_empty(&ent->head)) {
-                       spin_unlock(&ent->lock);
+                       spin_unlock_irq(&ent->lock);
                        return;
                }
                mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
                list_del(&mr->list);
                ent->cur--;
                ent->size--;
-               spin_unlock(&ent->lock);
+               spin_unlock_irq(&ent->lock);
                err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
                if (err)
                        mlx5_ib_warn(dev, "failed destroy mkey\n");
@@ -464,12 +531,18 @@ static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
        debugfs_remove_recursive(dev->cache.root);
 }
 
+static void delay_time_func(unsigned long ctx)
+{
+       struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
+
+       dev->fill_delay = 0;
+}
+
 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 {
        struct mlx5_mr_cache *cache = &dev->cache;
        struct mlx5_cache_ent *ent;
        int limit;
-       int size;
        int err;
        int i;
 
@@ -479,6 +552,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
                return -ENOMEM;
        }
 
+       setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
                INIT_LIST_HEAD(&cache->ent[i].head);
                spin_lock_init(&cache->ent[i].lock);
@@ -489,13 +563,11 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
                ent->order = i + 2;
                ent->dev = dev;
 
-               if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) {
-                       size = dev->mdev.profile->mr_cache[i].size;
+               if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE)
                        limit = dev->mdev.profile->mr_cache[i].limit;
-               } else {
-                       size = DEF_CACHE_SIZE;
+               else
                        limit = 0;
-               }
+
                INIT_WORK(&ent->work, cache_work_func);
                INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
                ent->limit = limit;
@@ -522,6 +594,7 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
                clean_keys(dev, i);
 
        destroy_workqueue(dev->cache.wq);
+       del_timer_sync(&dev->delay_timer);
 
        return 0;
 }
@@ -551,7 +624,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
        seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
        seg->start_addr = 0;
 
-       err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in));
+       err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL,
+                                   NULL);
        if (err)
                goto err_in;
 
@@ -660,14 +734,14 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
        int err;
        int i;
 
-       for (i = 0; i < 10; i++) {
+       for (i = 0; i < 1; i++) {
                mr = alloc_cached_mr(dev, order);
                if (mr)
                        break;
 
                err = add_keys(dev, order2idx(dev, order), 1);
-               if (err) {
-                       mlx5_ib_warn(dev, "add_keys failed\n");
+               if (err && err != -EAGAIN) {
+                       mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
                        break;
                }
        }
@@ -759,8 +833,10 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
        in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
        in->seg.log2_page_size = page_shift;
        in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
-       in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
-       err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen);
+       in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
+                                                        1 << page_shift));
+       err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen, NULL,
+                                   NULL, NULL);
        if (err) {
                mlx5_ib_warn(dev, "create mkey failed\n");
                goto err_2;
@@ -944,7 +1020,8 @@ struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
         * TBD not needed - issue 197292 */
        in->seg.log2_page_size = PAGE_SHIFT;
 
-       err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in));
+       err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), NULL,
+                                   NULL, NULL);
        kfree(in);
        if (err)
                goto err_free;
index 5659ea8..7c6b4ba 100644 (file)
@@ -551,7 +551,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
        }
        mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
        (*in)->ctx.log_pg_sz_remote_qpn =
-               cpu_to_be32((page_shift - PAGE_SHIFT) << 24);
+               cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
        (*in)->ctx.params2 = cpu_to_be32(offset << 6);
 
        (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
@@ -648,7 +648,8 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
                goto err_buf;
        }
        (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
-       (*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - PAGE_SHIFT) << 24);
+       (*in)->ctx.log_pg_sz_remote_qpn =
+               cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
        /* Set "fast registration enabled" for all kernel QPs */
        (*in)->ctx.params1 |= cpu_to_be32(1 << 11);
        (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
@@ -1317,9 +1318,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
                                          MLX5_QP_OPTPAR_RAE            |
                                          MLX5_QP_OPTPAR_RWE            |
                                          MLX5_QP_OPTPAR_RNR_TIMEOUT    |
-                                         MLX5_QP_OPTPAR_PM_STATE,
+                                         MLX5_QP_OPTPAR_PM_STATE       |
+                                         MLX5_QP_OPTPAR_ALT_ADDR_PATH,
                        [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE            |
-                                         MLX5_QP_OPTPAR_PM_STATE,
+                                         MLX5_QP_OPTPAR_PM_STATE       |
+                                         MLX5_QP_OPTPAR_ALT_ADDR_PATH,
                        [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY          |
                                          MLX5_QP_OPTPAR_SRQN           |
                                          MLX5_QP_OPTPAR_CQN_RCV,
@@ -1550,7 +1553,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
        mlx5_cur = to_mlx5_state(cur_state);
        mlx5_new = to_mlx5_state(new_state);
        mlx5_st = to_mlx5_st(ibqp->qp_type);
-       if (mlx5_cur < 0 || mlx5_new < 0 || mlx5_st < 0)
+       if (mlx5_st < 0)
                goto out;
 
        optpar = ib_mask_to_mlx5_opt(attr_mask);
@@ -1744,6 +1747,7 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
                        MLX5_MKEY_MASK_PD               |
                        MLX5_MKEY_MASK_LR               |
                        MLX5_MKEY_MASK_LW               |
+                       MLX5_MKEY_MASK_KEY              |
                        MLX5_MKEY_MASK_RR               |
                        MLX5_MKEY_MASK_RW               |
                        MLX5_MKEY_MASK_A                |
@@ -1800,7 +1804,8 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w
        seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
        seg->len = cpu_to_be64(wr->wr.fast_reg.length);
        seg->log2_page_size = wr->wr.fast_reg.page_shift;
-       seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+       seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
+                                      mlx5_mkey_variant(wr->wr.fast_reg.rkey));
 }
 
 static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
@@ -1913,6 +1918,10 @@ static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,
        if (unlikely((*seg == qp->sq.qend)))
                *seg = mlx5_get_send_wqe(qp, 0);
        if (!li) {
+               if (unlikely(wr->wr.fast_reg.page_list_len >
+                            wr->wr.fast_reg.page_list->max_page_list_len))
+                       return  -ENOMEM;
+
                set_frwr_pages(*seg, wr, mdev, pd, writ);
                *seg += sizeof(struct mlx5_wqe_data_seg);
                *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
index 0aa478b..210b3ea 100644 (file)
@@ -123,7 +123,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
                goto err_in;
        }
 
-       (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
+       (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
        (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
 
        return 0;
@@ -192,7 +192,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
        }
        srq->wq_sig = !!srq_signature;
 
-       (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
+       (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
 
        return 0;
 
@@ -390,9 +390,7 @@ int mlx5_ib_destroy_srq(struct ib_srq *srq)
                mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
                ib_umem_release(msrq->umem);
        } else {
-               kfree(msrq->wrid);
-               mlx5_buf_free(&dev->mdev, &msrq->buf);
-               mlx5_db_free(&dev->mdev, &msrq->db);
+               destroy_srq_kernel(dev, msrq);
        }
 
        kfree(srq);
index 5b53ca5..8308e36 100644 (file)
@@ -2834,7 +2834,7 @@ static int nes_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
        init_attr->qp_context = nesqp->ibqp.qp_context;
        init_attr->send_cq = nesqp->ibqp.send_cq;
        init_attr->recv_cq = nesqp->ibqp.recv_cq;
-       init_attr->srq = nesqp->ibqp.srq = nesqp->ibqp.srq;
+       init_attr->srq = nesqp->ibqp.srq;
        init_attr->cap = attr->cap;
 
        return 0;
index adc11d1..294dd27 100644 (file)
@@ -122,6 +122,32 @@ struct mqe_ctx {
        bool cmd_done;
 };
 
+struct ocrdma_hw_mr {
+       u32 lkey;
+       u8 fr_mr;
+       u8 remote_atomic;
+       u8 remote_rd;
+       u8 remote_wr;
+       u8 local_rd;
+       u8 local_wr;
+       u8 mw_bind;
+       u8 rsvd;
+       u64 len;
+       struct ocrdma_pbl *pbl_table;
+       u32 num_pbls;
+       u32 num_pbes;
+       u32 pbl_size;
+       u32 pbe_size;
+       u64 fbo;
+       u64 va;
+};
+
+struct ocrdma_mr {
+       struct ib_mr ibmr;
+       struct ib_umem *umem;
+       struct ocrdma_hw_mr hwmr;
+};
+
 struct ocrdma_dev {
        struct ib_device ibdev;
        struct ocrdma_dev_attr attr;
@@ -169,7 +195,7 @@ struct ocrdma_dev {
        struct list_head entry;
        struct rcu_head rcu;
        int id;
-       u64 stag_arr[OCRDMA_MAX_STAG];
+       struct ocrdma_mr *stag_arr[OCRDMA_MAX_STAG];
        u16 pvid;
 };
 
@@ -294,31 +320,6 @@ struct ocrdma_qp {
        u16 db_cache;
 };
 
-struct ocrdma_hw_mr {
-       u32 lkey;
-       u8 fr_mr;
-       u8 remote_atomic;
-       u8 remote_rd;
-       u8 remote_wr;
-       u8 local_rd;
-       u8 local_wr;
-       u8 mw_bind;
-       u8 rsvd;
-       u64 len;
-       struct ocrdma_pbl *pbl_table;
-       u32 num_pbls;
-       u32 num_pbes;
-       u32 pbl_size;
-       u32 pbe_size;
-       u64 fbo;
-       u64 va;
-};
-
-struct ocrdma_mr {
-       struct ib_mr ibmr;
-       struct ib_umem *umem;
-       struct ocrdma_hw_mr hwmr;
-};
 
 struct ocrdma_ucontext {
        struct ib_ucontext ibucontext;
index 50219ab..56bf32f 100644 (file)
@@ -1783,7 +1783,7 @@ static int ocrdma_set_create_qp_sq_cmd(struct ocrdma_create_qp_req *cmd,
        u32 max_sges = attrs->cap.max_send_sge;
 
        /* QP1 may exceed 127 */
-       max_wqe_allocated = min_t(int, attrs->cap.max_send_wr + 1,
+       max_wqe_allocated = min_t(u32, attrs->cap.max_send_wr + 1,
                                dev->attr.max_wqe);
 
        status = ocrdma_build_q_conf(&max_wqe_allocated,
index 0ce7674..91443bc 100644 (file)
@@ -452,9 +452,6 @@ static void ocrdma_remove_free(struct rcu_head *rcu)
 {
        struct ocrdma_dev *dev = container_of(rcu, struct ocrdma_dev, rcu);
 
-       ocrdma_free_resources(dev);
-       ocrdma_cleanup_hw(dev);
-
        idr_remove(&ocrdma_dev_id, dev->id);
        kfree(dev->mbx_cmd);
        ib_dealloc_device(&dev->ibdev);
@@ -470,6 +467,10 @@ static void ocrdma_remove(struct ocrdma_dev *dev)
        spin_lock(&ocrdma_devlist_lock);
        list_del_rcu(&dev->entry);
        spin_unlock(&ocrdma_devlist_lock);
+
+       ocrdma_free_resources(dev);
+       ocrdma_cleanup_hw(dev);
+
        call_rcu(&dev->rcu, ocrdma_remove_free);
 }
 
index 69f1d12..7686dce 100644 (file)
@@ -1981,9 +1981,7 @@ static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
 
        wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES);
 
-       if ((wr->wr.fast_reg.page_list_len >
-               qp->dev->attr.max_pages_per_frmr) ||
-               (wr->wr.fast_reg.length > 0xffffffffULL))
+       if (wr->wr.fast_reg.page_list_len > qp->dev->attr.max_pages_per_frmr)
                return -EINVAL;
 
        hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT);
@@ -2839,7 +2837,7 @@ struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len)
                goto mbx_err;
        mr->ibmr.rkey = mr->hwmr.lkey;
        mr->ibmr.lkey = mr->hwmr.lkey;
-       dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = (unsigned long) mr;
+       dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = mr;
        return &mr->ibmr;
 mbx_err:
        ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
index 016e742..5bfc02f 100644 (file)
@@ -6190,21 +6190,20 @@ static int setup_txselect(const char *str, struct kernel_param *kp)
 {
        struct qib_devdata *dd;
        unsigned long val;
-       int ret;
-
+       char *n;
        if (strlen(str) >= MAX_ATTEN_LEN) {
                pr_info("txselect_values string too long\n");
                return -ENOSPC;
        }
-       ret = kstrtoul(str, 0, &val);
-       if (ret || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ +
+       val = simple_strtoul(str, &n, 0);
+       if (n == str || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ +
                                TXDDS_MFG_SZ)) {
                pr_info("txselect_values must start with a number < %d\n",
                        TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ);
-               return ret ? ret : -EINVAL;
+               return -EINVAL;
        }
-
        strcpy(txselect_list, str);
+
        list_for_each_entry(dd, &qib_dev_list, list)
                if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322)
                        set_no_qsfp_atten(dd, 1);
index 28874f8..941d4d5 100644 (file)
@@ -54,7 +54,7 @@ struct ib_node_info {
        __be32 revision;
        u8 local_port_num;
        u8 vendor_id[3];
-} __attribute__ ((packed));
+} __packed;
 
 struct ib_mad_notice_attr {
        u8 generic_type;
@@ -73,7 +73,7 @@ struct ib_mad_notice_attr {
                        __be16  reserved;
                        __be16  lid;            /* where violation happened */
                        u8      port_num;       /* where violation happened */
-               } __attribute__ ((packed)) ntc_129_131;
+               } __packed ntc_129_131;
 
                struct {
                        __be16  reserved;
@@ -83,14 +83,14 @@ struct ib_mad_notice_attr {
                        __be32  new_cap_mask;   /* new capability mask */
                        u8      reserved3;
                        u8      change_flags;   /* low 3 bits only */
-               } __attribute__ ((packed)) ntc_144;
+               } __packed ntc_144;
 
                struct {
                        __be16  reserved;
                        __be16  lid;            /* lid where sys guid changed */
                        __be16  reserved2;
                        __be64  new_sys_guid;
-               } __attribute__ ((packed)) ntc_145;
+               } __packed ntc_145;
 
                struct {
                        __be16  reserved;
@@ -104,7 +104,7 @@ struct ib_mad_notice_attr {
                        u8      reserved3;
                        u8      dr_trunc_hop;
                        u8      dr_rtn_path[30];
-               } __attribute__ ((packed)) ntc_256;
+               } __packed ntc_256;
 
                struct {
                        __be16          reserved;
@@ -115,7 +115,7 @@ struct ib_mad_notice_attr {
                        __be32          qp2;    /* high 8 bits reserved */
                        union ib_gid    gid1;
                        union ib_gid    gid2;
-               } __attribute__ ((packed)) ntc_257_258;
+               } __packed ntc_257_258;
 
        } details;
 };
@@ -209,7 +209,7 @@ struct ib_pma_portcounters_cong {
        __be64 port_rcv_packets;
        __be64 port_xmit_wait;
        __be64 port_adr_events;
-} __attribute__ ((packed));
+} __packed;
 
 #define IB_PMA_CONG_HW_CONTROL_TIMER            0x00
 #define IB_PMA_CONG_HW_CONTROL_SAMPLE           0x01
index d0a0ea0..165aee2 100644 (file)
@@ -594,8 +594,7 @@ static int qib_user_sdma_pin_pages(const struct qib_devdata *dd,
                else
                        j = npages;
 
-               ret = get_user_pages(current, current->mm, addr,
-                            j, 0, 1, pages, NULL);
+               ret = get_user_pages_fast(addr, j, 0, pages);
                if (ret != j) {
                        i = 0;
                        j = ret;
@@ -1294,11 +1293,8 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
                int mxp = 8;
                int ndesc = 0;
 
-               down_write(&current->mm->mmap_sem);
                ret = qib_user_sdma_queue_pkts(dd, ppd, pq,
                                iov, dim, &list, &mxp, &ndesc);
-               up_write(&current->mm->mmap_sem);
-
                if (ret < 0)
                        goto done_unlock;
                else {
index 012e2c7..a01c7d2 100644 (file)
@@ -150,14 +150,14 @@ struct ib_reth {
        __be64 vaddr;
        __be32 rkey;
        __be32 length;
-} __attribute__ ((packed));
+} __packed;
 
 struct ib_atomic_eth {
        __be32 vaddr[2];        /* unaligned so access as 2 32-bit words */
        __be32 rkey;
        __be64 swap_data;
        __be64 compare_data;
-} __attribute__ ((packed));
+} __packed;
 
 struct qib_other_headers {
        __be32 bth[3];
@@ -178,7 +178,7 @@ struct qib_other_headers {
                __be32 aeth;
                struct ib_atomic_eth atomic_eth;
        } u;
-} __attribute__ ((packed));
+} __packed;
 
 /*
  * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
@@ -195,12 +195,12 @@ struct qib_ib_header {
                } l;
                struct qib_other_headers oth;
        } u;
-} __attribute__ ((packed));
+} __packed;
 
 struct qib_pio_header {
        __le32 pbc[2];
        struct qib_ib_header hdr;
-} __attribute__ ((packed));
+} __packed;
 
 /*
  * There is one struct qib_mcast for each multicast GID.
index eb71aaa..c639f90 100644 (file)
@@ -101,6 +101,7 @@ enum {
        IPOIB_MCAST_FLAG_SENDONLY = 1,
        IPOIB_MCAST_FLAG_BUSY     = 2,  /* joining or already joined */
        IPOIB_MCAST_FLAG_ATTACHED = 3,
+       IPOIB_MCAST_JOIN_STARTED  = 4,
 
        MAX_SEND_CQE              = 16,
        IPOIB_CM_COPYBREAK        = 256,
@@ -151,6 +152,7 @@ struct ipoib_mcast {
        struct sk_buff_head pkt_queue;
 
        struct net_device *dev;
+       struct completion done;
 };
 
 struct ipoib_rx_buf {
@@ -299,7 +301,7 @@ struct ipoib_dev_priv {
 
        unsigned long flags;
 
-       struct mutex vlan_mutex;
+       struct rw_semaphore vlan_rwsem;
 
        struct rb_root  path_tree;
        struct list_head path_list;
index 7a31754..1377f85 100644 (file)
@@ -140,7 +140,8 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
 static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
                                             struct ipoib_cm_rx_buf *rx_ring,
                                             int id, int frags,
-                                            u64 mapping[IPOIB_CM_RX_SG])
+                                            u64 mapping[IPOIB_CM_RX_SG],
+                                            gfp_t gfp)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct sk_buff *skb;
@@ -164,7 +165,7 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
        }
 
        for (i = 0; i < frags; i++) {
-               struct page *page = alloc_page(GFP_ATOMIC);
+               struct page *page = alloc_page(gfp);
 
                if (!page)
                        goto partial_error;
@@ -382,7 +383,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
 
        for (i = 0; i < ipoib_recvq_size; ++i) {
                if (!ipoib_cm_alloc_rx_skb(dev, rx->rx_ring, i, IPOIB_CM_RX_SG - 1,
-                                          rx->rx_ring[i].mapping)) {
+                                          rx->rx_ring[i].mapping,
+                                          GFP_KERNEL)) {
                        ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
                                ret = -ENOMEM;
                                goto err_count;
@@ -639,7 +641,8 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
        frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
                                              (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;
 
-       newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags, mapping);
+       newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags,
+                                      mapping, GFP_ATOMIC);
        if (unlikely(!newskb)) {
                /*
                 * If we can't allocate a new RX buffer, dump
@@ -1556,7 +1559,8 @@ int ipoib_cm_dev_init(struct net_device *dev)
                for (i = 0; i < ipoib_recvq_size; ++i) {
                        if (!ipoib_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i,
                                                   priv->cm.num_frags - 1,
-                                                  priv->cm.srq_ring[i].mapping)) {
+                                                  priv->cm.srq_ring[i].mapping,
+                                                  GFP_KERNEL)) {
                                ipoib_warn(priv, "failed to allocate "
                                           "receive buffer %d\n", i);
                                ipoib_cm_dev_cleanup(dev);
index 196b1d1..6a7003d 100644 (file)
@@ -685,15 +685,13 @@ int ipoib_ib_dev_open(struct net_device *dev)
        ret = ipoib_ib_post_receives(dev);
        if (ret) {
                ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
-               ipoib_ib_dev_stop(dev, 1);
-               return -1;
+               goto dev_stop;
        }
 
        ret = ipoib_cm_dev_open(dev);
        if (ret) {
                ipoib_warn(priv, "ipoib_cm_dev_open returned %d\n", ret);
-               ipoib_ib_dev_stop(dev, 1);
-               return -1;
+               goto dev_stop;
        }
 
        clear_bit(IPOIB_STOP_REAPER, &priv->flags);
@@ -704,6 +702,11 @@ int ipoib_ib_dev_open(struct net_device *dev)
                napi_enable(&priv->napi);
 
        return 0;
+dev_stop:
+       if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
+               napi_enable(&priv->napi);
+       ipoib_ib_dev_stop(dev, 1);
+       return -1;
 }
 
 static void ipoib_pkey_dev_check_presence(struct net_device *dev)
@@ -746,10 +749,8 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
        if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
                mutex_lock(&pkey_mutex);
                set_bit(IPOIB_PKEY_STOP, &priv->flags);
-               cancel_delayed_work(&priv->pkey_poll_task);
+               cancel_delayed_work_sync(&priv->pkey_poll_task);
                mutex_unlock(&pkey_mutex);
-               if (flush)
-                       flush_workqueue(ipoib_workqueue);
        }
 
        ipoib_mcast_stop_thread(dev, flush);
@@ -974,7 +975,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
        u16 new_index;
        int result;
 
-       mutex_lock(&priv->vlan_mutex);
+       down_read(&priv->vlan_rwsem);
 
        /*
         * Flush any child interfaces too -- they might be up even if
@@ -983,7 +984,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
        list_for_each_entry(cpriv, &priv->child_intfs, list)
                __ipoib_ib_dev_flush(cpriv, level);
 
-       mutex_unlock(&priv->vlan_mutex);
+       up_read(&priv->vlan_rwsem);
 
        if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
                /* for non-child devices must check/update the pkey value here */
@@ -1081,6 +1082,11 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
        struct ipoib_dev_priv *priv = netdev_priv(dev);
 
        ipoib_dbg(priv, "cleaning up ib_dev\n");
+       /*
+        * We must make sure there are no more (path) completions
+        * that may wish to touch priv fields that are no longer valid
+        */
+       ipoib_flush_paths(dev);
 
        ipoib_mcast_stop_thread(dev, 1);
        ipoib_mcast_dev_flush(dev);
index 82cec1a..d64ed05 100644 (file)
@@ -119,7 +119,7 @@ int ipoib_open(struct net_device *dev)
                struct ipoib_dev_priv *cpriv;
 
                /* Bring up any child interfaces too */
-               mutex_lock(&priv->vlan_mutex);
+               down_read(&priv->vlan_rwsem);
                list_for_each_entry(cpriv, &priv->child_intfs, list) {
                        int flags;
 
@@ -129,7 +129,7 @@ int ipoib_open(struct net_device *dev)
 
                        dev_change_flags(cpriv->dev, flags | IFF_UP);
                }
-               mutex_unlock(&priv->vlan_mutex);
+               up_read(&priv->vlan_rwsem);
        }
 
        netif_start_queue(dev);
@@ -162,7 +162,7 @@ static int ipoib_stop(struct net_device *dev)
                struct ipoib_dev_priv *cpriv;
 
                /* Bring down any child interfaces too */
-               mutex_lock(&priv->vlan_mutex);
+               down_read(&priv->vlan_rwsem);
                list_for_each_entry(cpriv, &priv->child_intfs, list) {
                        int flags;
 
@@ -172,7 +172,7 @@ static int ipoib_stop(struct net_device *dev)
 
                        dev_change_flags(cpriv->dev, flags & ~IFF_UP);
                }
-               mutex_unlock(&priv->vlan_mutex);
+               up_read(&priv->vlan_rwsem);
        }
 
        return 0;
@@ -1350,7 +1350,7 @@ void ipoib_setup(struct net_device *dev)
 
        ipoib_set_ethtool_ops(dev);
 
-       netif_napi_add(dev, &priv->napi, ipoib_poll, 100);
+       netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT);
 
        dev->watchdog_timeo      = HZ;
 
@@ -1372,7 +1372,7 @@ void ipoib_setup(struct net_device *dev)
 
        spin_lock_init(&priv->lock);
 
-       mutex_init(&priv->vlan_mutex);
+       init_rwsem(&priv->vlan_rwsem);
 
        INIT_LIST_HEAD(&priv->path_list);
        INIT_LIST_HEAD(&priv->child_intfs);
index cecb98a..d4e0057 100644 (file)
@@ -386,8 +386,10 @@ static int ipoib_mcast_join_complete(int status,
                        mcast->mcmember.mgid.raw, status);
 
        /* We trap for port events ourselves. */
-       if (status == -ENETRESET)
-               return 0;
+       if (status == -ENETRESET) {
+               status = 0;
+               goto out;
+       }
 
        if (!status)
                status = ipoib_mcast_join_finish(mcast, &multicast->rec);
@@ -407,7 +409,8 @@ static int ipoib_mcast_join_complete(int status,
                if (mcast == priv->broadcast)
                        queue_work(ipoib_workqueue, &priv->carrier_on_task);
 
-               return 0;
+               status = 0;
+               goto out;
        }
 
        if (mcast->logcount++ < 20) {
@@ -434,7 +437,8 @@ static int ipoib_mcast_join_complete(int status,
                                   mcast->backoff * HZ);
        spin_unlock_irq(&priv->lock);
        mutex_unlock(&mcast_mutex);
-
+out:
+       complete(&mcast->done);
        return status;
 }
 
@@ -484,11 +488,15 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
        }
 
        set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+       init_completion(&mcast->done);
+       set_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags);
+
        mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
                                         &rec, comp_mask, GFP_KERNEL,
                                         ipoib_mcast_join_complete, mcast);
        if (IS_ERR(mcast->mc)) {
                clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+               complete(&mcast->done);
                ret = PTR_ERR(mcast->mc);
                ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
 
@@ -510,10 +518,18 @@ void ipoib_mcast_join_task(struct work_struct *work)
        struct ipoib_dev_priv *priv =
                container_of(work, struct ipoib_dev_priv, mcast_task.work);
        struct net_device *dev = priv->dev;
+       struct ib_port_attr port_attr;
 
        if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
                return;
 
+       if (ib_query_port(priv->ca, priv->port, &port_attr) ||
+           port_attr.state != IB_PORT_ACTIVE) {
+               ipoib_dbg(priv, "port state is not ACTIVE (state = %d) suspending join task\n",
+                         port_attr.state);
+               return;
+       }
+
        if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))
                ipoib_warn(priv, "ib_query_gid() failed\n");
        else
@@ -751,6 +767,11 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
 
        spin_unlock_irqrestore(&priv->lock, flags);
 
+       /* seperate between the wait to the leave*/
+       list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
+               if (test_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags))
+                       wait_for_completion(&mcast->done);
+
        list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
                ipoib_mcast_leave(dev, mcast);
                ipoib_mcast_free(mcast);
index f81abe1..c29b5c8 100644 (file)
@@ -142,10 +142,10 @@ static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head
        priv = netdev_priv(dev);
        ppriv = netdev_priv(priv->parent);
 
-       mutex_lock(&ppriv->vlan_mutex);
+       down_write(&ppriv->vlan_rwsem);
        unregister_netdevice_queue(dev, head);
        list_del(&priv->list);
-       mutex_unlock(&ppriv->vlan_mutex);
+       up_write(&ppriv->vlan_rwsem);
 }
 
 static size_t ipoib_get_size(const struct net_device *dev)
index 8292554..9fad7b5 100644 (file)
@@ -140,7 +140,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
        if (!rtnl_trylock())
                return restart_syscall();
 
-       mutex_lock(&ppriv->vlan_mutex);
+       down_write(&ppriv->vlan_rwsem);
 
        /*
         * First ensure this isn't a duplicate. We check the parent device and
@@ -163,7 +163,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
        result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD);
 
 out:
-       mutex_unlock(&ppriv->vlan_mutex);
+       up_write(&ppriv->vlan_rwsem);
 
        if (result)
                free_netdev(priv->dev);
@@ -185,7 +185,8 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
 
        if (!rtnl_trylock())
                return restart_syscall();
-       mutex_lock(&ppriv->vlan_mutex);
+
+       down_write(&ppriv->vlan_rwsem);
        list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
                if (priv->pkey == pkey &&
                    priv->child_type == IPOIB_LEGACY_CHILD) {
@@ -195,7 +196,8 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
                        break;
                }
        }
-       mutex_unlock(&ppriv->vlan_mutex);
+       up_write(&ppriv->vlan_rwsem);
+
        rtnl_unlock();
 
        if (dev) {
index 6ca3073..8675d26 100644 (file)
@@ -98,6 +98,7 @@ enum {
 static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
                                           struct mlx5_cmd_msg *in,
                                           struct mlx5_cmd_msg *out,
+                                          void *uout, int uout_size,
                                           mlx5_cmd_cbk_t cbk,
                                           void *context, int page_queue)
 {
@@ -110,6 +111,8 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
 
        ent->in         = in;
        ent->out        = out;
+       ent->uout       = uout;
+       ent->uout_size  = uout_size;
        ent->callback   = cbk;
        ent->context    = context;
        ent->cmd        = cmd;
@@ -534,6 +537,7 @@ static void cmd_work_handler(struct work_struct *work)
        ent->lay = lay;
        memset(lay, 0, sizeof(*lay));
        memcpy(lay->in, ent->in->first.data, sizeof(lay->in));
+       ent->op = be32_to_cpu(lay->in[0]) >> 16;
        if (ent->in->next)
                lay->in_ptr = cpu_to_be64(ent->in->next->dma);
        lay->inlen = cpu_to_be32(ent->in->len);
@@ -628,7 +632,8 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
  *    2. page queue commands do not support asynchrous completion
  */
 static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
-                          struct mlx5_cmd_msg *out, mlx5_cmd_cbk_t callback,
+                          struct mlx5_cmd_msg *out, void *uout, int uout_size,
+                          mlx5_cmd_cbk_t callback,
                           void *context, int page_queue, u8 *status)
 {
        struct mlx5_cmd *cmd = &dev->cmd;
@@ -642,7 +647,8 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
        if (callback && page_queue)
                return -EINVAL;
 
-       ent = alloc_cmd(cmd, in, out, callback, context, page_queue);
+       ent = alloc_cmd(cmd, in, out, uout, uout_size, callback, context,
+                       page_queue);
        if (IS_ERR(ent))
                return PTR_ERR(ent);
 
@@ -670,10 +676,10 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
                op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
                if (op < ARRAY_SIZE(cmd->stats)) {
                        stats = &cmd->stats[op];
-                       spin_lock(&stats->lock);
+                       spin_lock_irq(&stats->lock);
                        stats->sum += ds;
                        ++stats->n;
-                       spin_unlock(&stats->lock);
+                       spin_unlock_irq(&stats->lock);
                }
                mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
                                   "fw exec time for %s is %lld nsec\n",
@@ -826,7 +832,7 @@ static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
        int n;
        int i;
 
-       msg = kzalloc(sizeof(*msg), GFP_KERNEL);
+       msg = kzalloc(sizeof(*msg), flags);
        if (!msg)
                return ERR_PTR(-ENOMEM);
 
@@ -1109,6 +1115,19 @@ void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
                up(&cmd->sem);
 }
 
+static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
+{
+       unsigned long flags;
+
+       if (msg->cache) {
+               spin_lock_irqsave(&msg->cache->lock, flags);
+               list_add_tail(&msg->list, &msg->cache->head);
+               spin_unlock_irqrestore(&msg->cache->lock, flags);
+       } else {
+               mlx5_free_cmd_msg(dev, msg);
+       }
+}
+
 void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
 {
        struct mlx5_cmd *cmd = &dev->cmd;
@@ -1117,6 +1136,10 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
        void *context;
        int err;
        int i;
+       ktime_t t1, t2, delta;
+       s64 ds;
+       struct mlx5_cmd_stats *stats;
+       unsigned long flags;
 
        for (i = 0; i < (1 << cmd->log_sz); i++) {
                if (test_bit(i, &vector)) {
@@ -1141,9 +1164,29 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
                        }
                        free_ent(cmd, ent->idx);
                        if (ent->callback) {
+                               t1 = timespec_to_ktime(ent->ts1);
+                               t2 = timespec_to_ktime(ent->ts2);
+                               delta = ktime_sub(t2, t1);
+                               ds = ktime_to_ns(delta);
+                               if (ent->op < ARRAY_SIZE(cmd->stats)) {
+                                       stats = &cmd->stats[ent->op];
+                                       spin_lock_irqsave(&stats->lock, flags);
+                                       stats->sum += ds;
+                                       ++stats->n;
+                                       spin_unlock_irqrestore(&stats->lock, flags);
+                               }
+
                                callback = ent->callback;
                                context = ent->context;
                                err = ent->ret;
+                               if (!err)
+                                       err = mlx5_copy_from_msg(ent->uout,
+                                                                ent->out,
+                                                                ent->uout_size);
+
+                               mlx5_free_cmd_msg(dev, ent->out);
+                               free_msg(dev, ent->in);
+
                                free_cmd(ent);
                                callback(err, context);
                        } else {
@@ -1160,7 +1203,8 @@ static int status_to_err(u8 status)
        return status ? -1 : 0; /* TBD more meaningful codes */
 }
 
-static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size)
+static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size,
+                                     gfp_t gfp)
 {
        struct mlx5_cmd_msg *msg = ERR_PTR(-ENOMEM);
        struct mlx5_cmd *cmd = &dev->cmd;
@@ -1172,7 +1216,7 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size)
                ent = &cmd->cache.med;
 
        if (ent) {
-               spin_lock(&ent->lock);
+               spin_lock_irq(&ent->lock);
                if (!list_empty(&ent->head)) {
                        msg = list_entry(ent->head.next, typeof(*msg), list);
                        /* For cached lists, we must explicitly state what is
@@ -1181,43 +1225,34 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size)
                        msg->len = in_size;
                        list_del(&msg->list);
                }
-               spin_unlock(&ent->lock);
+               spin_unlock_irq(&ent->lock);
        }
 
        if (IS_ERR(msg))
-               msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, in_size);
+               msg = mlx5_alloc_cmd_msg(dev, gfp, in_size);
 
        return msg;
 }
 
-static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
-{
-       if (msg->cache) {
-               spin_lock(&msg->cache->lock);
-               list_add_tail(&msg->list, &msg->cache->head);
-               spin_unlock(&msg->cache->lock);
-       } else {
-               mlx5_free_cmd_msg(dev, msg);
-       }
-}
-
 static int is_manage_pages(struct mlx5_inbox_hdr *in)
 {
        return be16_to_cpu(in->opcode) == MLX5_CMD_OP_MANAGE_PAGES;
 }
 
-int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
-                 int out_size)
+static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+                   int out_size, mlx5_cmd_cbk_t callback, void *context)
 {
        struct mlx5_cmd_msg *inb;
        struct mlx5_cmd_msg *outb;
        int pages_queue;
+       gfp_t gfp;
        int err;
        u8 status = 0;
 
        pages_queue = is_manage_pages(in);
+       gfp = callback ? GFP_ATOMIC : GFP_KERNEL;
 
-       inb = alloc_msg(dev, in_size);
+       inb = alloc_msg(dev, in_size, gfp);
        if (IS_ERR(inb)) {
                err = PTR_ERR(inb);
                return err;
@@ -1229,13 +1264,14 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
                goto out_in;
        }
 
-       outb = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, out_size);
+       outb = mlx5_alloc_cmd_msg(dev, gfp, out_size);
        if (IS_ERR(outb)) {
                err = PTR_ERR(outb);
                goto out_in;
        }
 
-       err = mlx5_cmd_invoke(dev, inb, outb, NULL, NULL, pages_queue, &status);
+       err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context,
+                             pages_queue, &status);
        if (err)
                goto out_out;
 
@@ -1248,14 +1284,30 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
        err = mlx5_copy_from_msg(out, outb, out_size);
 
 out_out:
-       mlx5_free_cmd_msg(dev, outb);
+       if (!callback)
+               mlx5_free_cmd_msg(dev, outb);
 
 out_in:
-       free_msg(dev, inb);
+       if (!callback)
+               free_msg(dev, inb);
        return err;
 }
+
+int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+                 int out_size)
+{
+       return cmd_exec(dev, in, in_size, out, out_size, NULL, NULL);
+}
 EXPORT_SYMBOL(mlx5_cmd_exec);
 
+int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
+                    void *out, int out_size, mlx5_cmd_cbk_t callback,
+                    void *context)
+{
+       return cmd_exec(dev, in, in_size, out, out_size, callback, context);
+}
+EXPORT_SYMBOL(mlx5_cmd_exec_cb);
+
 static void destroy_msg_cache(struct mlx5_core_dev *dev)
 {
        struct mlx5_cmd *cmd = &dev->cmd;
index 9c7194b..80f6d12 100644 (file)
@@ -154,10 +154,10 @@ static ssize_t average_read(struct file *filp, char __user *buf, size_t count,
                return 0;
 
        stats = filp->private_data;
-       spin_lock(&stats->lock);
+       spin_lock_irq(&stats->lock);
        if (stats->n)
                field = div64_u64(stats->sum, stats->n);
-       spin_unlock(&stats->lock);
+       spin_unlock_irq(&stats->lock);
        ret = snprintf(tbuf, sizeof(tbuf), "%llu\n", field);
        if (ret > 0) {
                if (copy_to_user(buf, tbuf, ret))
@@ -175,10 +175,10 @@ static ssize_t average_write(struct file *filp, const char __user *buf,
        struct mlx5_cmd_stats *stats;
 
        stats = filp->private_data;
-       spin_lock(&stats->lock);
+       spin_lock_irq(&stats->lock);
        stats->sum = 0;
        stats->n = 0;
-       spin_unlock(&stats->lock);
+       spin_unlock_irq(&stats->lock);
 
        *pos += count;
 
index 2231d93..64a61b2 100644 (file)
@@ -354,7 +354,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
        in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ);
        in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | uar->index);
        in->ctx.intr = vecidx;
-       in->ctx.log_page_size = PAGE_SHIFT - 12;
+       in->ctx.log_page_size = eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
        in->events_mask = cpu_to_be64(mask);
 
        err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
index bc0f5fb..40a9f5e 100644 (file)
@@ -159,6 +159,36 @@ struct mlx5_reg_host_endianess {
        u8      rsvd[15];
 };
 
+
+#define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos))
+
+enum {
+       MLX5_CAP_BITS_RW_MASK   = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
+                                 CAP_MASK(MLX5_CAP_OFF_DCT, 1),
+};
+
+/* selectively copy writable fields clearing any reserved area
+ */
+static void copy_rw_fields(struct mlx5_hca_cap *to, struct mlx5_hca_cap *from)
+{
+       u64 v64;
+
+       to->log_max_qp = from->log_max_qp & 0x1f;
+       to->log_max_ra_req_dc = from->log_max_ra_req_dc & 0x3f;
+       to->log_max_ra_res_dc = from->log_max_ra_res_dc & 0x3f;
+       to->log_max_ra_req_qp = from->log_max_ra_req_qp & 0x3f;
+       to->log_max_ra_res_qp = from->log_max_ra_res_qp & 0x3f;
+       to->log_max_atomic_size_qp = from->log_max_atomic_size_qp;
+       to->log_max_atomic_size_dc = from->log_max_atomic_size_dc;
+       v64 = be64_to_cpu(from->flags) & MLX5_CAP_BITS_RW_MASK;
+       to->flags = cpu_to_be64(v64);
+}
+
+enum {
+       HCA_CAP_OPMOD_GET_MAX   = 0,
+       HCA_CAP_OPMOD_GET_CUR   = 1,
+};
+
 static int handle_hca_cap(struct mlx5_core_dev *dev)
 {
        struct mlx5_cmd_query_hca_cap_mbox_out *query_out = NULL;
@@ -180,7 +210,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
        }
 
        query_ctx.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP);
-       query_ctx.hdr.opmod  = cpu_to_be16(0x1);
+       query_ctx.hdr.opmod  = cpu_to_be16(HCA_CAP_OPMOD_GET_CUR);
        err = mlx5_cmd_exec(dev, &query_ctx, sizeof(query_ctx),
                                 query_out, sizeof(*query_out));
        if (err)
@@ -192,8 +222,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
                goto query_ex;
        }
 
-       memcpy(&set_ctx->hca_cap, &query_out->hca_cap,
-              sizeof(set_ctx->hca_cap));
+       copy_rw_fields(&set_ctx->hca_cap, &query_out->hca_cap);
 
        if (dev->profile->mask & MLX5_PROF_MASK_QP_SIZE)
                set_ctx->hca_cap.log_max_qp = dev->profile->log_max_qp;
index 5b44e2e..35e514d 100644 (file)
 #include "mlx5_core.h"
 
 int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
-                         struct mlx5_create_mkey_mbox_in *in, int inlen)
+                         struct mlx5_create_mkey_mbox_in *in, int inlen,
+                         mlx5_cmd_cbk_t callback, void *context,
+                         struct mlx5_create_mkey_mbox_out *out)
 {
-       struct mlx5_create_mkey_mbox_out out;
+       struct mlx5_create_mkey_mbox_out lout;
        int err;
        u8 key;
 
-       memset(&out, 0, sizeof(out));
-       spin_lock(&dev->priv.mkey_lock);
+       memset(&lout, 0, sizeof(lout));
+       spin_lock_irq(&dev->priv.mkey_lock);
        key = dev->priv.mkey_key++;
-       spin_unlock(&dev->priv.mkey_lock);
+       spin_unlock_irq(&dev->priv.mkey_lock);
        in->seg.qpn_mkey7_0 |= cpu_to_be32(key);
        in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_MKEY);
-       err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+       if (callback) {
+               err = mlx5_cmd_exec_cb(dev, in, inlen, out, sizeof(*out),
+                                      callback, context);
+               return err;
+       } else {
+               err = mlx5_cmd_exec(dev, in, inlen, &lout, sizeof(lout));
+       }
+
        if (err) {
                mlx5_core_dbg(dev, "cmd exec faile %d\n", err);
                return err;
        }
 
-       if (out.hdr.status) {
-               mlx5_core_dbg(dev, "status %d\n", out.hdr.status);
-               return mlx5_cmd_status_to_err(&out.hdr);
+       if (lout.hdr.status) {
+               mlx5_core_dbg(dev, "status %d\n", lout.hdr.status);
+               return mlx5_cmd_status_to_err(&lout.hdr);
        }
 
-       mr->key = mlx5_idx_to_mkey(be32_to_cpu(out.mkey) & 0xffffff) | key;
-       mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", be32_to_cpu(out.mkey), key, mr->key);
+       mr->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key;
+       mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
+                     be32_to_cpu(lout.mkey), key, mr->key);
 
        return err;
 }
index 7b12acf..37b6ad1 100644 (file)
@@ -57,10 +57,13 @@ struct mlx5_pages_req {
 };
 
 struct fw_page {
-       struct rb_node  rb_node;
-       u64             addr;
-       struct page     *page;
-       u16             func_id;
+       struct rb_node          rb_node;
+       u64                     addr;
+       struct page            *page;
+       u16                     func_id;
+       unsigned long           bitmask;
+       struct list_head        list;
+       unsigned                free_count;
 };
 
 struct mlx5_query_pages_inbox {
@@ -94,6 +97,11 @@ enum {
        MAX_RECLAIM_TIME_MSECS  = 5000,
 };
 
+enum {
+       MLX5_MAX_RECLAIM_TIME_MILI      = 5000,
+       MLX5_NUM_4K_IN_PAGE             = PAGE_SIZE / 4096,
+};
+
 static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id)
 {
        struct rb_root *root = &dev->priv.page_root;
@@ -101,6 +109,7 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u
        struct rb_node *parent = NULL;
        struct fw_page *nfp;
        struct fw_page *tfp;
+       int i;
 
        while (*new) {
                parent = *new;
@@ -113,25 +122,29 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u
                        return -EEXIST;
        }
 
-       nfp = kmalloc(sizeof(*nfp), GFP_KERNEL);
+       nfp = kzalloc(sizeof(*nfp), GFP_KERNEL);
        if (!nfp)
                return -ENOMEM;
 
        nfp->addr = addr;
        nfp->page = page;
        nfp->func_id = func_id;
+       nfp->free_count = MLX5_NUM_4K_IN_PAGE;
+       for (i = 0; i < MLX5_NUM_4K_IN_PAGE; i++)
+               set_bit(i, &nfp->bitmask);
 
        rb_link_node(&nfp->rb_node, parent, new);
        rb_insert_color(&nfp->rb_node, root);
+       list_add(&nfp->list, &dev->priv.free_list);
 
        return 0;
 }
 
-static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr)
+static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr)
 {
        struct rb_root *root = &dev->priv.page_root;
        struct rb_node *tmp = root->rb_node;
-       struct page *result = NULL;
+       struct fw_page *result = NULL;
        struct fw_page *tfp;
 
        while (tmp) {
@@ -141,9 +154,7 @@ static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr)
                } else if (tfp->addr > addr) {
                        tmp = tmp->rb_right;
                } else {
-                       rb_erase(&tfp->rb_node, root);
-                       result = tfp->page;
-                       kfree(tfp);
+                       result = tfp;
                        break;
                }
        }
@@ -176,12 +187,98 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
        return err;
 }
 
+static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
+{
+       struct fw_page *fp;
+       unsigned n;
+
+       if (list_empty(&dev->priv.free_list)) {
+               return -ENOMEM;
+               mlx5_core_warn(dev, "\n");
+       }
+
+       fp = list_entry(dev->priv.free_list.next, struct fw_page, list);
+       n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask));
+       if (n >= MLX5_NUM_4K_IN_PAGE) {
+               mlx5_core_warn(dev, "alloc 4k bug\n");
+               return -ENOENT;
+       }
+       clear_bit(n, &fp->bitmask);
+       fp->free_count--;
+       if (!fp->free_count)
+               list_del(&fp->list);
+
+       *addr = fp->addr + n * 4096;
+
+       return 0;
+}
+
+static void free_4k(struct mlx5_core_dev *dev, u64 addr)
+{
+       struct fw_page *fwp;
+       int n;
+
+       fwp = find_fw_page(dev, addr & PAGE_MASK);
+       if (!fwp) {
+               mlx5_core_warn(dev, "page not found\n");
+               return;
+       }
+
+       n = (addr & ~PAGE_MASK) % 4096;
+       fwp->free_count++;
+       set_bit(n, &fwp->bitmask);
+       if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
+               rb_erase(&fwp->rb_node, &dev->priv.page_root);
+               if (fwp->free_count != 1)
+                       list_del(&fwp->list);
+               dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+               __free_page(fwp->page);
+               kfree(fwp);
+       } else if (fwp->free_count == 1) {
+               list_add(&fwp->list, &dev->priv.free_list);
+       }
+}
+
+static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id)
+{
+       struct page *page;
+       u64 addr;
+       int err;
+
+       page = alloc_page(GFP_HIGHUSER);
+       if (!page) {
+               mlx5_core_warn(dev, "failed to allocate page\n");
+               return -ENOMEM;
+       }
+       addr = dma_map_page(&dev->pdev->dev, page, 0,
+                           PAGE_SIZE, DMA_BIDIRECTIONAL);
+       if (dma_mapping_error(&dev->pdev->dev, addr)) {
+               mlx5_core_warn(dev, "failed dma mapping page\n");
+               err = -ENOMEM;
+               goto out_alloc;
+       }
+       err = insert_page(dev, addr, page, func_id);
+       if (err) {
+               mlx5_core_err(dev, "failed to track allocated page\n");
+               goto out_mapping;
+       }
+
+       return 0;
+
+out_mapping:
+       dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+out_alloc:
+       __free_page(page);
+
+       return err;
+}
 static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
                      int notify_fail)
 {
        struct mlx5_manage_pages_inbox *in;
        struct mlx5_manage_pages_outbox out;
-       struct page *page;
+       struct mlx5_manage_pages_inbox *nin;
        int inlen;
        u64 addr;
        int err;
@@ -196,27 +293,15 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
        memset(&out, 0, sizeof(out));
 
        for (i = 0; i < npages; i++) {
-               page = alloc_page(GFP_HIGHUSER);
-               if (!page) {
-                       err = -ENOMEM;
-                       mlx5_core_warn(dev, "failed to allocate page\n");
-                       goto out_alloc;
-               }
-               addr = dma_map_page(&dev->pdev->dev, page, 0,
-                                   PAGE_SIZE, DMA_BIDIRECTIONAL);
-               if (dma_mapping_error(&dev->pdev->dev, addr)) {
-                       mlx5_core_warn(dev, "failed dma mapping page\n");
-                       __free_page(page);
-                       err = -ENOMEM;
-                       goto out_alloc;
-               }
-               err = insert_page(dev, addr, page, func_id);
+retry:
+               err = alloc_4k(dev, &addr);
                if (err) {
-                       mlx5_core_err(dev, "failed to track allocated page\n");
-                       dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
-                       __free_page(page);
-                       err = -ENOMEM;
-                       goto out_alloc;
+                       if (err == -ENOMEM)
+                               err = alloc_system_page(dev, func_id);
+                       if (err)
+                               goto out_4k;
+
+                       goto retry;
                }
                in->pas[i] = cpu_to_be64(addr);
        }
@@ -226,7 +311,6 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
        in->func_id = cpu_to_be16(func_id);
        in->num_entries = cpu_to_be32(npages);
        err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
-       mlx5_core_dbg(dev, "err %d\n", err);
        if (err) {
                mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err);
                goto out_alloc;
@@ -247,25 +331,22 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
 
 out_alloc:
        if (notify_fail) {
-               memset(in, 0, inlen);
-               memset(&out, 0, sizeof(out));
-               in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
-               in->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE);
-               if (mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out)))
-                       mlx5_core_warn(dev, "\n");
-       }
-       for (i--; i >= 0; i--) {
-               addr = be64_to_cpu(in->pas[i]);
-               page = remove_page(dev, addr);
-               if (!page) {
-                       mlx5_core_err(dev, "BUG: can't remove page at addr 0x%llx\n",
-                                     addr);
-                       continue;
+               nin = kzalloc(sizeof(*nin), GFP_KERNEL);
+               if (!nin) {
+                       mlx5_core_warn(dev, "allocation failed\n");
+                       goto out_4k;
                }
-               dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
-               __free_page(page);
+               memset(&out, 0, sizeof(out));
+               nin->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
+               nin->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE);
+               if (mlx5_cmd_exec(dev, nin, sizeof(*nin), &out, sizeof(out)))
+                       mlx5_core_warn(dev, "page notify failed\n");
+               kfree(nin);
        }
 
+out_4k:
+       for (i--; i >= 0; i--)
+               free_4k(dev, be64_to_cpu(in->pas[i]));
 out_free:
        mlx5_vfree(in);
        return err;
@@ -276,7 +357,6 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 {
        struct mlx5_manage_pages_inbox   in;
        struct mlx5_manage_pages_outbox *out;
-       struct page *page;
        int num_claimed;
        int outlen;
        u64 addr;
@@ -315,13 +395,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 
        for (i = 0; i < num_claimed; i++) {
                addr = be64_to_cpu(out->pas[i]);
-               page = remove_page(dev, addr);
-               if (!page) {
-                       mlx5_core_warn(dev, "FW reported unknown DMA address 0x%llx\n", addr);
-               } else {
-                       dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
-                       __free_page(page);
-               }
+               free_4k(dev, addr);
        }
 
 out_free:
@@ -381,14 +455,19 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
        return give_pages(dev, func_id, npages, 0);
 }
 
+enum {
+       MLX5_BLKS_FOR_RECLAIM_PAGES = 12
+};
+
 static int optimal_reclaimed_pages(void)
 {
        struct mlx5_cmd_prot_block *block;
        struct mlx5_cmd_layout *lay;
        int ret;
 
-       ret = (sizeof(lay->in) + sizeof(block->data) -
-              sizeof(struct mlx5_manage_pages_outbox)) / 8;
+       ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) -
+              sizeof(struct mlx5_manage_pages_outbox)) /
+              FIELD_SIZEOF(struct mlx5_manage_pages_outbox, pas[0]);
 
        return ret;
 }
@@ -427,6 +506,7 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
 void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
 {
        dev->priv.page_root = RB_ROOT;
+       INIT_LIST_HEAD(&dev->priv.free_list);
 }
 
 void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
index 5eb4e31..da78875 100644 (file)
@@ -230,6 +230,15 @@ enum {
        MLX5_MAX_PAGE_SHIFT             = 31
 };
 
+enum {
+       MLX5_ADAPTER_PAGE_SHIFT         = 12
+};
+
+enum {
+       MLX5_CAP_OFF_DCT                = 41,
+       MLX5_CAP_OFF_CMDIF_CSUM         = 46,
+};
+
 struct mlx5_inbox_hdr {
        __be16          opcode;
        u8              rsvd[4];
@@ -319,9 +328,9 @@ struct mlx5_hca_cap {
        u8      rsvd25[42];
        __be16  log_uar_page_sz;
        u8      rsvd26[28];
-       u8      log_msx_atomic_size_qp;
+       u8      log_max_atomic_size_qp;
        u8      rsvd27[2];
-       u8      log_msx_atomic_size_dc;
+       u8      log_max_atomic_size_dc;
        u8      rsvd28[76];
 };
 
index 6b8c496..554548c 100644 (file)
@@ -483,6 +483,7 @@ struct mlx5_priv {
        struct rb_root          page_root;
        int                     fw_pages;
        int                     reg_pages;
+       struct list_head        free_list;
 
        struct mlx5_core_health health;
 
@@ -557,9 +558,11 @@ typedef void (*mlx5_cmd_cbk_t)(int status, void *context);
 struct mlx5_cmd_work_ent {
        struct mlx5_cmd_msg    *in;
        struct mlx5_cmd_msg    *out;
+       void                   *uout;
+       int                     uout_size;
        mlx5_cmd_cbk_t          callback;
        void                   *context;
-       int idx;
+       int                     idx;
        struct completion       done;
        struct mlx5_cmd        *cmd;
        struct work_struct      work;
@@ -570,6 +573,7 @@ struct mlx5_cmd_work_ent {
        u8                      token;
        struct timespec         ts1;
        struct timespec         ts2;
+       u16                     op;
 };
 
 struct mlx5_pas {
@@ -653,6 +657,9 @@ void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
 int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr);
 int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
                  int out_size);
+int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
+                    void *out, int out_size, mlx5_cmd_cbk_t callback,
+                    void *context);
 int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
 int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
 int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
@@ -676,7 +683,9 @@ int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
                      u16 lwm, int is_srq);
 int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
-                         struct mlx5_create_mkey_mbox_in *in, int inlen);
+                         struct mlx5_create_mkey_mbox_in *in, int inlen,
+                         mlx5_cmd_cbk_t callback, void *context,
+                         struct mlx5_create_mkey_mbox_out *out);
 int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr);
 int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
                         struct mlx5_query_mkey_mbox_out *out, int outlen);
@@ -745,6 +754,11 @@ static inline u32 mlx5_idx_to_mkey(u32 mkey_idx)
        return mkey_idx << 8;
 }
 
+static inline u8 mlx5_mkey_variant(u32 mkey)
+{
+       return mkey & 0xff;
+}
+
 enum {
        MLX5_PROF_MASK_QP_SIZE          = (u64)1 << 0,
        MLX5_PROF_MASK_MR_CACHE         = (u64)1 << 1,
index e393171..979874c 100644 (file)
@@ -67,12 +67,14 @@ enum rdma_node_type {
        RDMA_NODE_IB_CA         = 1,
        RDMA_NODE_IB_SWITCH,
        RDMA_NODE_IB_ROUTER,
-       RDMA_NODE_RNIC
+       RDMA_NODE_RNIC,
+       RDMA_NODE_USNIC,
 };
 
 enum rdma_transport_type {
        RDMA_TRANSPORT_IB,
-       RDMA_TRANSPORT_IWARP
+       RDMA_TRANSPORT_IWARP,
+       RDMA_TRANSPORT_USNIC
 };
 
 enum rdma_transport_type
@@ -1436,6 +1438,7 @@ struct ib_device {
 
        int                          uverbs_abi_ver;
        u64                          uverbs_cmd_mask;
+       u64                          uverbs_ex_cmd_mask;
 
        char                         node_desc[64];
        __be64                       node_guid;
@@ -2384,4 +2387,17 @@ struct ib_flow *ib_create_flow(struct ib_qp *qp,
                               struct ib_flow_attr *flow_attr, int domain);
 int ib_destroy_flow(struct ib_flow *flow_id);
 
+static inline int ib_check_mr_access(int flags)
+{
+       /*
+        * Local write permission is required if remote write or
+        * remote atomic permission is also requested.
+        */
+       if (flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
+           !(flags & IB_ACCESS_LOCAL_WRITE))
+               return -EINVAL;
+
+       return 0;
+}
+
 #endif /* IB_VERBS_H */
index e3ddd86..cbfdd4c 100644 (file)
@@ -87,10 +87,11 @@ enum {
        IB_USER_VERBS_CMD_CLOSE_XRCD,
        IB_USER_VERBS_CMD_CREATE_XSRQ,
        IB_USER_VERBS_CMD_OPEN_QP,
-#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
-       IB_USER_VERBS_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
-       IB_USER_VERBS_CMD_DESTROY_FLOW
-#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
+};
+
+enum {
+       IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
+       IB_USER_VERBS_EX_CMD_DESTROY_FLOW
 };
 
 /*
@@ -122,22 +123,24 @@ struct ib_uverbs_comp_event_desc {
  * the rest of the command struct based on these value.
  */
 
+#define IB_USER_VERBS_CMD_COMMAND_MASK 0xff
+#define IB_USER_VERBS_CMD_FLAGS_MASK 0xff000000u
+#define IB_USER_VERBS_CMD_FLAGS_SHIFT 24
+
+#define IB_USER_VERBS_CMD_FLAG_EXTENDED 0x80
+
 struct ib_uverbs_cmd_hdr {
        __u32 command;
        __u16 in_words;
        __u16 out_words;
 };
 
-#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
-struct ib_uverbs_cmd_hdr_ex {
-       __u32 command;
-       __u16 in_words;
-       __u16 out_words;
+struct ib_uverbs_ex_cmd_hdr {
+       __u64 response;
        __u16 provider_in_words;
        __u16 provider_out_words;
        __u32 cmd_hdr_reserved;
 };
-#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
 
 struct ib_uverbs_get_context {
        __u64 response;
@@ -700,62 +703,71 @@ struct ib_uverbs_detach_mcast {
        __u64 driver_data[0];
 };
 
-#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
-struct ib_kern_eth_filter {
+struct ib_uverbs_flow_spec_hdr {
+       __u32 type;
+       __u16 size;
+       __u16 reserved;
+       /* followed by flow_spec */
+       __u64 flow_spec_data[0];
+};
+
+struct ib_uverbs_flow_eth_filter {
        __u8  dst_mac[6];
        __u8  src_mac[6];
        __be16 ether_type;
        __be16 vlan_tag;
 };
 
-struct ib_kern_spec_eth {
-       __u32  type;
-       __u16  size;
-       __u16  reserved;
-       struct ib_kern_eth_filter val;
-       struct ib_kern_eth_filter mask;
+struct ib_uverbs_flow_spec_eth {
+       union {
+               struct ib_uverbs_flow_spec_hdr hdr;
+               struct {
+                       __u32 type;
+                       __u16 size;
+                       __u16 reserved;
+               };
+       };
+       struct ib_uverbs_flow_eth_filter val;
+       struct ib_uverbs_flow_eth_filter mask;
 };
 
-struct ib_kern_ipv4_filter {
+struct ib_uverbs_flow_ipv4_filter {
        __be32 src_ip;
        __be32 dst_ip;
 };
 
-struct ib_kern_spec_ipv4 {
-       __u32  type;
-       __u16  size;
-       __u16  reserved;
-       struct ib_kern_ipv4_filter val;
-       struct ib_kern_ipv4_filter mask;
+struct ib_uverbs_flow_spec_ipv4 {
+       union {
+               struct ib_uverbs_flow_spec_hdr hdr;
+               struct {
+                       __u32 type;
+                       __u16 size;
+                       __u16 reserved;
+               };
+       };
+       struct ib_uverbs_flow_ipv4_filter val;
+       struct ib_uverbs_flow_ipv4_filter mask;
 };
 
-struct ib_kern_tcp_udp_filter {
+struct ib_uverbs_flow_tcp_udp_filter {
        __be16 dst_port;
        __be16 src_port;
 };
 
-struct ib_kern_spec_tcp_udp {
-       __u32  type;
-       __u16  size;
-       __u16  reserved;
-       struct ib_kern_tcp_udp_filter val;
-       struct ib_kern_tcp_udp_filter mask;
-};
-
-struct ib_kern_spec {
+struct ib_uverbs_flow_spec_tcp_udp {
        union {
+               struct ib_uverbs_flow_spec_hdr hdr;
                struct {
                        __u32 type;
                        __u16 size;
                        __u16 reserved;
                };
-               struct ib_kern_spec_eth     eth;
-               struct ib_kern_spec_ipv4    ipv4;
-               struct ib_kern_spec_tcp_udp tcp_udp;
        };
+       struct ib_uverbs_flow_tcp_udp_filter val;
+       struct ib_uverbs_flow_tcp_udp_filter mask;
 };
 
-struct ib_kern_flow_attr {
+struct ib_uverbs_flow_attr {
        __u32 type;
        __u16 size;
        __u16 priority;
@@ -767,13 +779,13 @@ struct ib_kern_flow_attr {
         * struct ib_flow_spec_xxx
         * struct ib_flow_spec_yyy
         */
+       struct ib_uverbs_flow_spec_hdr flow_specs[0];
 };
 
 struct ib_uverbs_create_flow  {
        __u32 comp_mask;
-       __u64 response;
        __u32 qp_handle;
-       struct ib_kern_flow_attr flow_attr;
+       struct ib_uverbs_flow_attr flow_attr;
 };
 
 struct ib_uverbs_create_flow_resp {
@@ -785,7 +797,6 @@ struct ib_uverbs_destroy_flow  {
        __u32 comp_mask;
        __u32 flow_handle;
 };
-#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
 
 struct ib_uverbs_create_srq {
        __u64 response;