mlx4: Implement IP based gids support for RoCE/SRIOV
[cascardo/linux.git] / drivers / infiniband / hw / mlx4 / mad.c
index f2a3f48..2c572ae 100644 (file)
@@ -467,6 +467,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
        int ret = 0;
        u16 tun_pkey_ix;
        u16 cached_pkey;
+       u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
 
        if (dest_qpt > IB_QPT_GSI)
                return -EINVAL;
@@ -509,6 +510,10 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
         * The driver will set the force loopback bit in post_send */
        memset(&attr, 0, sizeof attr);
        attr.port_num = port;
+       if (is_eth) {
+               memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16);
+               attr.ah_flags = IB_AH_GRH;
+       }
        ah = ib_create_ah(tun_ctx->pd, &attr);
        if (IS_ERR(ah))
                return -ENOMEM;
@@ -540,11 +545,36 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
 
        /* adjust tunnel data */
        tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix);
-       tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
-       tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
        tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF);
        tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0;
 
+       if (is_eth) {
+               u16 vlan = 0;
+               if (mlx4_get_slave_default_vlan(dev->dev, port, slave, &vlan,
+                                               NULL)) {
+                       /* VST mode */
+                       if (vlan != wc->vlan_id)
+                               /* Packet vlan is not the VST-assigned vlan.
+                                * Drop the packet.
+                                */
+                               goto out;
+                        else
+                               /* Remove the vlan tag before forwarding
+                                * the packet to the VF.
+                                */
+                               vlan = 0xffff;
+               } else {
+                       vlan = wc->vlan_id;
+               }
+
+               tun_mad->hdr.sl_vid = cpu_to_be16(vlan);
+               memcpy((char *)&tun_mad->hdr.mac_31_0, &(wc->smac[0]), 4);
+               memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2);
+       } else {
+               tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
+               tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
+       }
+
        ib_dma_sync_single_for_device(&dev->ib_dev,
                                      tun_qp->tx_ring[tun_tx_ix].buf.map,
                                      sizeof (struct mlx4_rcv_tunnel_mad),
@@ -580,6 +610,41 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
        int err;
        int slave;
        u8 *slave_id;
+       int is_eth = 0;
+
+       if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
+               is_eth = 0;
+       else
+               is_eth = 1;
+
+       if (is_eth) {
+               if (!(wc->wc_flags & IB_WC_GRH)) {
+                       mlx4_ib_warn(ibdev, "RoCE grh not present.\n");
+                       return -EINVAL;
+               }
+               if (mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_CM) {
+                       mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n");
+                       return -EINVAL;
+               }
+               if (mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave)) {
+                       mlx4_ib_warn(ibdev, "failed matching grh\n");
+                       return -ENOENT;
+               }
+               if (slave >= dev->dev->caps.sqp_demux) {
+                       mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
+                                    slave, dev->dev->caps.sqp_demux);
+                       return -ENOENT;
+               }
+
+               if (mlx4_ib_demux_cm_handler(ibdev, port, NULL, mad))
+                       return 0;
+
+               err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
+               if (err)
+                       pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
+                                slave, err);
+               return 0;
+       }
 
        /* Initially assume that this mad is for us */
        slave = mlx4_master_func_num(dev->dev);
@@ -1076,8 +1141,9 @@ static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
 
 
 int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
-                        enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
-                        u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad)
+                        enum ib_qp_type dest_qpt, u16 pkey_index,
+                        u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr,
+                        u8 *s_mac, struct ib_mad *mad)
 {
        struct ib_sge list;
        struct ib_send_wr wr, *bad_wr;
@@ -1166,6 +1232,9 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
        wr.num_sge = 1;
        wr.opcode = IB_WR_SEND;
        wr.send_flags = IB_SEND_SIGNALED;
+       if (s_mac)
+               memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6);
+
 
        ret = ib_post_send(send_qp, &wr, &bad_wr);
 out:
@@ -1174,6 +1243,34 @@ out:
        return ret;
 }
 
+static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port)
+{
+       int gids;
+       int vfs;
+
+       if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
+               return slave;
+
+       gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS;
+       vfs = dev->dev->num_vfs;
+
+       if (slave == 0)
+               return 0;
+       if (slave <= gids % vfs)
+               return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave - 1);
+
+       return MLX4_ROCE_PF_GIDS + (gids % vfs) + ((gids / vfs) * (slave - 1));
+}
+
+static void fill_in_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port,
+                                   struct ib_ah_attr *ah_attr)
+{
+       if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
+               ah_attr->grh.sgid_index = slave;
+       else
+               ah_attr->grh.sgid_index += get_slave_base_gid_ix(dev, slave, port);
+}
+
 static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc)
 {
        struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
@@ -1260,12 +1357,14 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
        memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av));
        ah.ibah.device = ctx->ib_dev;
        mlx4_ib_query_ah(&ah.ibah, &ah_attr);
-       if ((ah_attr.ah_flags & IB_AH_GRH) &&
-           (ah_attr.grh.sgid_index != slave)) {
-               mlx4_ib_warn(ctx->ib_dev, "slave:%d accessed invalid sgid_index:%d\n",
-                            slave, ah_attr.grh.sgid_index);
-               return;
-       }
+       if (ah_attr.ah_flags & IB_AH_GRH)
+               fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr);
+
+       memcpy(ah_attr.dmac, tunnel->hdr.mac, 6);
+       ah_attr.vlan_id = be16_to_cpu(tunnel->hdr.vlan);
+       /* if slave have default vlan use it */
+       mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave,
+                                   &ah_attr.vlan_id, &ah_attr.sl);
 
        mlx4_ib_send_to_wire(dev, slave, ctx->port,
                             is_proxy_qp0(dev, wc->src_qp, slave) ?
@@ -1273,7 +1372,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
                             be16_to_cpu(tunnel->hdr.pkey_index),
                             be32_to_cpu(tunnel->hdr.remote_qpn),
                             be32_to_cpu(tunnel->hdr.qkey),
-                            &ah_attr, &tunnel->mad);
+                            &ah_attr, wc->smac, &tunnel->mad);
 }
 
 static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,