net/mlx4_core: Enhance the catas flow to support device reset
[cascardo/linux.git] / drivers / net / ethernet / mellanox / mlx4 / main.c
1 /*
2  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4  * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
5  * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/errno.h>
39 #include <linux/pci.h>
40 #include <linux/dma-mapping.h>
41 #include <linux/slab.h>
42 #include <linux/io-mapping.h>
43 #include <linux/delay.h>
44 #include <linux/kmod.h>
45
46 #include <linux/mlx4/device.h>
47 #include <linux/mlx4/doorbell.h>
48
49 #include "mlx4.h"
50 #include "fw.h"
51 #include "icm.h"
52
53 MODULE_AUTHOR("Roland Dreier");
54 MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver");
55 MODULE_LICENSE("Dual BSD/GPL");
56 MODULE_VERSION(DRV_VERSION);
57
58 struct workqueue_struct *mlx4_wq;
59
60 #ifdef CONFIG_MLX4_DEBUG
61
62 int mlx4_debug_level = 0;
63 module_param_named(debug_level, mlx4_debug_level, int, 0644);
64 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
65
66 #endif /* CONFIG_MLX4_DEBUG */
67
68 #ifdef CONFIG_PCI_MSI
69
70 static int msi_x = 1;
71 module_param(msi_x, int, 0444);
72 MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
73
74 #else /* CONFIG_PCI_MSI */
75
76 #define msi_x (0)
77
78 #endif /* CONFIG_PCI_MSI */
79
80 static uint8_t num_vfs[3] = {0, 0, 0};
81 static int num_vfs_argc;
82 module_param_array(num_vfs, byte , &num_vfs_argc, 0444);
83 MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0\n"
84                           "num_vfs=port1,port2,port1+2");
85
86 static uint8_t probe_vf[3] = {0, 0, 0};
87 static int probe_vfs_argc;
88 module_param_array(probe_vf, byte, &probe_vfs_argc, 0444);
89 MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)\n"
90                            "probe_vf=port1,port2,port1+2");
91
92 int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
93 module_param_named(log_num_mgm_entry_size,
94                         mlx4_log_num_mgm_entry_size, int, 0444);
95 MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
96                                          " of qp per mcg, for example:"
97                                          " 10 gives 248.range: 7 <="
98                                          " log_num_mgm_entry_size <= 12."
99                                          " To activate device managed"
100                                          " flow steering when available, set to -1");
101
102 static bool enable_64b_cqe_eqe = true;
103 module_param(enable_64b_cqe_eqe, bool, 0444);
104 MODULE_PARM_DESC(enable_64b_cqe_eqe,
105                  "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)");
106
107 #define PF_CONTEXT_BEHAVIOUR_MASK       (MLX4_FUNC_CAP_64B_EQE_CQE | \
108                                          MLX4_FUNC_CAP_EQE_CQE_STRIDE | \
109                                          MLX4_FUNC_CAP_DMFS_A0_STATIC)
110
111 static char mlx4_version[] =
112         DRV_NAME ": Mellanox ConnectX core driver v"
113         DRV_VERSION " (" DRV_RELDATE ")\n";
114
115 static struct mlx4_profile default_profile = {
116         .num_qp         = 1 << 18,
117         .num_srq        = 1 << 16,
118         .rdmarc_per_qp  = 1 << 4,
119         .num_cq         = 1 << 16,
120         .num_mcg        = 1 << 13,
121         .num_mpt        = 1 << 19,
122         .num_mtt        = 1 << 20, /* It is really num mtt segements */
123 };
124
125 static struct mlx4_profile low_mem_profile = {
126         .num_qp         = 1 << 17,
127         .num_srq        = 1 << 6,
128         .rdmarc_per_qp  = 1 << 4,
129         .num_cq         = 1 << 8,
130         .num_mcg        = 1 << 8,
131         .num_mpt        = 1 << 9,
132         .num_mtt        = 1 << 7,
133 };
134
135 static int log_num_mac = 7;
136 module_param_named(log_num_mac, log_num_mac, int, 0444);
137 MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
138
139 static int log_num_vlan;
140 module_param_named(log_num_vlan, log_num_vlan, int, 0444);
141 MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)");
142 /* Log2 max number of VLANs per ETH port (0-7) */
143 #define MLX4_LOG_NUM_VLANS 7
144 #define MLX4_MIN_LOG_NUM_VLANS 0
145 #define MLX4_MIN_LOG_NUM_MAC 1
146
147 static bool use_prio;
148 module_param_named(use_prio, use_prio, bool, 0444);
149 MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports (deprecated)");
150
151 int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
152 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
153 MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)");
154
155 static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE};
156 static int arr_argc = 2;
157 module_param_array(port_type_array, int, &arr_argc, 0444);
158 MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default "
159                                 "1 for IB, 2 for Ethernet");
160
161 struct mlx4_port_config {
162         struct list_head list;
163         enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
164         struct pci_dev *pdev;
165 };
166
167 static atomic_t pf_loading = ATOMIC_INIT(0);
168
169 int mlx4_check_port_params(struct mlx4_dev *dev,
170                            enum mlx4_port_type *port_type)
171 {
172         int i;
173
174         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
175                 for (i = 0; i < dev->caps.num_ports - 1; i++) {
176                         if (port_type[i] != port_type[i + 1]) {
177                                 mlx4_err(dev, "Only same port types supported on this HCA, aborting\n");
178                                 return -EINVAL;
179                         }
180                 }
181         }
182
183         for (i = 0; i < dev->caps.num_ports; i++) {
184                 if (!(port_type[i] & dev->caps.supported_type[i+1])) {
185                         mlx4_err(dev, "Requested port type for port %d is not supported on this HCA\n",
186                                  i + 1);
187                         return -EINVAL;
188                 }
189         }
190         return 0;
191 }
192
193 static void mlx4_set_port_mask(struct mlx4_dev *dev)
194 {
195         int i;
196
197         for (i = 1; i <= dev->caps.num_ports; ++i)
198                 dev->caps.port_mask[i] = dev->caps.port_type[i];
199 }
200
201 enum {
202         MLX4_QUERY_FUNC_NUM_SYS_EQS = 1 << 0,
203 };
204
205 static int mlx4_query_func(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
206 {
207         int err = 0;
208         struct mlx4_func func;
209
210         if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
211                 err = mlx4_QUERY_FUNC(dev, &func, 0);
212                 if (err) {
213                         mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
214                         return err;
215                 }
216                 dev_cap->max_eqs = func.max_eq;
217                 dev_cap->reserved_eqs = func.rsvd_eqs;
218                 dev_cap->reserved_uars = func.rsvd_uars;
219                 err |= MLX4_QUERY_FUNC_NUM_SYS_EQS;
220         }
221         return err;
222 }
223
224 static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev)
225 {
226         struct mlx4_caps *dev_cap = &dev->caps;
227
228         /* FW not supporting or cancelled by user */
229         if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) ||
230             !(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE))
231                 return;
232
233         /* Must have 64B CQE_EQE enabled by FW to use bigger stride
234          * When FW has NCSI it may decide not to report 64B CQE/EQEs
235          */
236         if (!(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_EQE) ||
237             !(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_CQE)) {
238                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
239                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
240                 return;
241         }
242
243         if (cache_line_size() == 128 || cache_line_size() == 256) {
244                 mlx4_dbg(dev, "Enabling CQE stride cacheLine supported\n");
245                 /* Changing the real data inside CQE size to 32B */
246                 dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
247                 dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
248
249                 if (mlx4_is_master(dev))
250                         dev_cap->function_caps |= MLX4_FUNC_CAP_EQE_CQE_STRIDE;
251         } else {
252                 mlx4_dbg(dev, "Disabling CQE stride cacheLine unsupported\n");
253                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
254                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
255         }
256 }
257
258 static int _mlx4_dev_port(struct mlx4_dev *dev, int port,
259                           struct mlx4_port_cap *port_cap)
260 {
261         dev->caps.vl_cap[port]      = port_cap->max_vl;
262         dev->caps.ib_mtu_cap[port]          = port_cap->ib_mtu;
263         dev->phys_caps.gid_phys_table_len[port]  = port_cap->max_gids;
264         dev->phys_caps.pkey_phys_table_len[port] = port_cap->max_pkeys;
265         /* set gid and pkey table operating lengths by default
266          * to non-sriov values
267          */
268         dev->caps.gid_table_len[port]  = port_cap->max_gids;
269         dev->caps.pkey_table_len[port] = port_cap->max_pkeys;
270         dev->caps.port_width_cap[port] = port_cap->max_port_width;
271         dev->caps.eth_mtu_cap[port]    = port_cap->eth_mtu;
272         dev->caps.def_mac[port]        = port_cap->def_mac;
273         dev->caps.supported_type[port] = port_cap->supported_port_types;
274         dev->caps.suggested_type[port] = port_cap->suggested_type;
275         dev->caps.default_sense[port] = port_cap->default_sense;
276         dev->caps.trans_type[port]          = port_cap->trans_type;
277         dev->caps.vendor_oui[port]     = port_cap->vendor_oui;
278         dev->caps.wavelength[port]     = port_cap->wavelength;
279         dev->caps.trans_code[port]     = port_cap->trans_code;
280
281         return 0;
282 }
283
284 static int mlx4_dev_port(struct mlx4_dev *dev, int port,
285                          struct mlx4_port_cap *port_cap)
286 {
287         int err = 0;
288
289         err = mlx4_QUERY_PORT(dev, port, port_cap);
290
291         if (err)
292                 mlx4_err(dev, "QUERY_PORT command failed.\n");
293
294         return err;
295 }
296
297 #define MLX4_A0_STEERING_TABLE_SIZE     256
298 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
299 {
300         int err;
301         int i;
302
303         err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
304         if (err) {
305                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
306                 return err;
307         }
308         mlx4_dev_cap_dump(dev, dev_cap);
309
310         if (dev_cap->min_page_sz > PAGE_SIZE) {
311                 mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
312                          dev_cap->min_page_sz, PAGE_SIZE);
313                 return -ENODEV;
314         }
315         if (dev_cap->num_ports > MLX4_MAX_PORTS) {
316                 mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
317                          dev_cap->num_ports, MLX4_MAX_PORTS);
318                 return -ENODEV;
319         }
320
321         if (dev_cap->uar_size > pci_resource_len(dev->persist->pdev, 2)) {
322                 mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
323                          dev_cap->uar_size,
324                          (unsigned long long)
325                          pci_resource_len(dev->persist->pdev, 2));
326                 return -ENODEV;
327         }
328
329         dev->caps.num_ports          = dev_cap->num_ports;
330         dev->caps.num_sys_eqs = dev_cap->num_sys_eqs;
331         dev->phys_caps.num_phys_eqs = dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS ?
332                                       dev->caps.num_sys_eqs :
333                                       MLX4_MAX_EQ_NUM;
334         for (i = 1; i <= dev->caps.num_ports; ++i) {
335                 err = _mlx4_dev_port(dev, i, dev_cap->port_cap + i);
336                 if (err) {
337                         mlx4_err(dev, "QUERY_PORT command failed, aborting\n");
338                         return err;
339                 }
340         }
341
342         dev->caps.uar_page_size      = PAGE_SIZE;
343         dev->caps.num_uars           = dev_cap->uar_size / PAGE_SIZE;
344         dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
345         dev->caps.bf_reg_size        = dev_cap->bf_reg_size;
346         dev->caps.bf_regs_per_page   = dev_cap->bf_regs_per_page;
347         dev->caps.max_sq_sg          = dev_cap->max_sq_sg;
348         dev->caps.max_rq_sg          = dev_cap->max_rq_sg;
349         dev->caps.max_wqes           = dev_cap->max_qp_sz;
350         dev->caps.max_qp_init_rdma   = dev_cap->max_requester_per_qp;
351         dev->caps.max_srq_wqes       = dev_cap->max_srq_sz;
352         dev->caps.max_srq_sge        = dev_cap->max_rq_sg - 1;
353         dev->caps.reserved_srqs      = dev_cap->reserved_srqs;
354         dev->caps.max_sq_desc_sz     = dev_cap->max_sq_desc_sz;
355         dev->caps.max_rq_desc_sz     = dev_cap->max_rq_desc_sz;
356         /*
357          * Subtract 1 from the limit because we need to allocate a
358          * spare CQE so the HCA HW can tell the difference between an
359          * empty CQ and a full CQ.
360          */
361         dev->caps.max_cqes           = dev_cap->max_cq_sz - 1;
362         dev->caps.reserved_cqs       = dev_cap->reserved_cqs;
363         dev->caps.reserved_eqs       = dev_cap->reserved_eqs;
364         dev->caps.reserved_mtts      = dev_cap->reserved_mtts;
365         dev->caps.reserved_mrws      = dev_cap->reserved_mrws;
366
367         /* The first 128 UARs are used for EQ doorbells */
368         dev->caps.reserved_uars      = max_t(int, 128, dev_cap->reserved_uars);
369         dev->caps.reserved_pds       = dev_cap->reserved_pds;
370         dev->caps.reserved_xrcds     = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
371                                         dev_cap->reserved_xrcds : 0;
372         dev->caps.max_xrcds          = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
373                                         dev_cap->max_xrcds : 0;
374         dev->caps.mtt_entry_sz       = dev_cap->mtt_entry_sz;
375
376         dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
377         dev->caps.page_size_cap      = ~(u32) (dev_cap->min_page_sz - 1);
378         dev->caps.flags              = dev_cap->flags;
379         dev->caps.flags2             = dev_cap->flags2;
380         dev->caps.bmme_flags         = dev_cap->bmme_flags;
381         dev->caps.reserved_lkey      = dev_cap->reserved_lkey;
382         dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
383         dev->caps.max_gso_sz         = dev_cap->max_gso_sz;
384         dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
385
386         /* Sense port always allowed on supported devices for ConnectX-1 and -2 */
387         if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT)
388                 dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
389         /* Don't do sense port on multifunction devices (for now at least) */
390         if (mlx4_is_mfunc(dev))
391                 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
392
393         if (mlx4_low_memory_profile()) {
394                 dev->caps.log_num_macs  = MLX4_MIN_LOG_NUM_MAC;
395                 dev->caps.log_num_vlans = MLX4_MIN_LOG_NUM_VLANS;
396         } else {
397                 dev->caps.log_num_macs  = log_num_mac;
398                 dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
399         }
400
401         for (i = 1; i <= dev->caps.num_ports; ++i) {
402                 dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
403                 if (dev->caps.supported_type[i]) {
404                         /* if only ETH is supported - assign ETH */
405                         if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH)
406                                 dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
407                         /* if only IB is supported, assign IB */
408                         else if (dev->caps.supported_type[i] ==
409                                  MLX4_PORT_TYPE_IB)
410                                 dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
411                         else {
412                                 /* if IB and ETH are supported, we set the port
413                                  * type according to user selection of port type;
414                                  * if user selected none, take the FW hint */
415                                 if (port_type_array[i - 1] == MLX4_PORT_TYPE_NONE)
416                                         dev->caps.port_type[i] = dev->caps.suggested_type[i] ?
417                                                 MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB;
418                                 else
419                                         dev->caps.port_type[i] = port_type_array[i - 1];
420                         }
421                 }
422                 /*
423                  * Link sensing is allowed on the port if 3 conditions are true:
424                  * 1. Both protocols are supported on the port.
425                  * 2. Different types are supported on the port
426                  * 3. FW declared that it supports link sensing
427                  */
428                 mlx4_priv(dev)->sense.sense_allowed[i] =
429                         ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) &&
430                          (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
431                          (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT));
432
433                 /*
434                  * If "default_sense" bit is set, we move the port to "AUTO" mode
435                  * and perform sense_port FW command to try and set the correct
436                  * port type from beginning
437                  */
438                 if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) {
439                         enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE;
440                         dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO;
441                         mlx4_SENSE_PORT(dev, i, &sensed_port);
442                         if (sensed_port != MLX4_PORT_TYPE_NONE)
443                                 dev->caps.port_type[i] = sensed_port;
444                 } else {
445                         dev->caps.possible_type[i] = dev->caps.port_type[i];
446                 }
447
448                 if (dev->caps.log_num_macs > dev_cap->port_cap[i].log_max_macs) {
449                         dev->caps.log_num_macs = dev_cap->port_cap[i].log_max_macs;
450                         mlx4_warn(dev, "Requested number of MACs is too much for port %d, reducing to %d\n",
451                                   i, 1 << dev->caps.log_num_macs);
452                 }
453                 if (dev->caps.log_num_vlans > dev_cap->port_cap[i].log_max_vlans) {
454                         dev->caps.log_num_vlans = dev_cap->port_cap[i].log_max_vlans;
455                         mlx4_warn(dev, "Requested number of VLANs is too much for port %d, reducing to %d\n",
456                                   i, 1 << dev->caps.log_num_vlans);
457                 }
458         }
459
460         dev->caps.max_counters = 1 << ilog2(dev_cap->max_counters);
461
462         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
463         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
464                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
465                 (1 << dev->caps.log_num_macs) *
466                 (1 << dev->caps.log_num_vlans) *
467                 dev->caps.num_ports;
468         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
469
470         if (dev_cap->dmfs_high_rate_qpn_base > 0 &&
471             dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN)
472                 dev->caps.dmfs_high_rate_qpn_base = dev_cap->dmfs_high_rate_qpn_base;
473         else
474                 dev->caps.dmfs_high_rate_qpn_base =
475                         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
476
477         if (dev_cap->dmfs_high_rate_qpn_range > 0 &&
478             dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) {
479                 dev->caps.dmfs_high_rate_qpn_range = dev_cap->dmfs_high_rate_qpn_range;
480                 dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DEFAULT;
481                 dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_FS_A0;
482         } else {
483                 dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_NOT_SUPPORTED;
484                 dev->caps.dmfs_high_rate_qpn_base =
485                         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
486                 dev->caps.dmfs_high_rate_qpn_range = MLX4_A0_STEERING_TABLE_SIZE;
487         }
488
489         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_RSS_RAW_ETH] =
490                 dev->caps.dmfs_high_rate_qpn_range;
491
492         dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
493                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
494                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
495                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
496
497         dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
498
499         if (!enable_64b_cqe_eqe && !mlx4_is_slave(dev)) {
500                 if (dev_cap->flags &
501                     (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
502                         mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
503                         dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
504                         dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
505                 }
506
507                 if (dev_cap->flags2 &
508                     (MLX4_DEV_CAP_FLAG2_CQE_STRIDE |
509                      MLX4_DEV_CAP_FLAG2_EQE_STRIDE)) {
510                         mlx4_warn(dev, "Disabling EQE/CQE stride per user request\n");
511                         dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
512                         dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
513                 }
514         }
515
516         if ((dev->caps.flags &
517             (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
518             mlx4_is_master(dev))
519                 dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
520
521         if (!mlx4_is_slave(dev)) {
522                 mlx4_enable_cqe_eqe_stride(dev);
523                 dev->caps.alloc_res_qp_mask =
524                         (dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0) |
525                         MLX4_RESERVE_A0_QP;
526         } else {
527                 dev->caps.alloc_res_qp_mask = 0;
528         }
529
530         return 0;
531 }
532
533 static int mlx4_get_pcie_dev_link_caps(struct mlx4_dev *dev,
534                                        enum pci_bus_speed *speed,
535                                        enum pcie_link_width *width)
536 {
537         u32 lnkcap1, lnkcap2;
538         int err1, err2;
539
540 #define  PCIE_MLW_CAP_SHIFT 4   /* start of MLW mask in link capabilities */
541
542         *speed = PCI_SPEED_UNKNOWN;
543         *width = PCIE_LNK_WIDTH_UNKNOWN;
544
545         err1 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP,
546                                           &lnkcap1);
547         err2 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP2,
548                                           &lnkcap2);
549         if (!err2 && lnkcap2) { /* PCIe r3.0-compliant */
550                 if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB)
551                         *speed = PCIE_SPEED_8_0GT;
552                 else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_5_0GB)
553                         *speed = PCIE_SPEED_5_0GT;
554                 else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_2_5GB)
555                         *speed = PCIE_SPEED_2_5GT;
556         }
557         if (!err1) {
558                 *width = (lnkcap1 & PCI_EXP_LNKCAP_MLW) >> PCIE_MLW_CAP_SHIFT;
559                 if (!lnkcap2) { /* pre-r3.0 */
560                         if (lnkcap1 & PCI_EXP_LNKCAP_SLS_5_0GB)
561                                 *speed = PCIE_SPEED_5_0GT;
562                         else if (lnkcap1 & PCI_EXP_LNKCAP_SLS_2_5GB)
563                                 *speed = PCIE_SPEED_2_5GT;
564                 }
565         }
566
567         if (*speed == PCI_SPEED_UNKNOWN || *width == PCIE_LNK_WIDTH_UNKNOWN) {
568                 return err1 ? err1 :
569                         err2 ? err2 : -EINVAL;
570         }
571         return 0;
572 }
573
574 static void mlx4_check_pcie_caps(struct mlx4_dev *dev)
575 {
576         enum pcie_link_width width, width_cap;
577         enum pci_bus_speed speed, speed_cap;
578         int err;
579
580 #define PCIE_SPEED_STR(speed) \
581         (speed == PCIE_SPEED_8_0GT ? "8.0GT/s" : \
582          speed == PCIE_SPEED_5_0GT ? "5.0GT/s" : \
583          speed == PCIE_SPEED_2_5GT ? "2.5GT/s" : \
584          "Unknown")
585
586         err = mlx4_get_pcie_dev_link_caps(dev, &speed_cap, &width_cap);
587         if (err) {
588                 mlx4_warn(dev,
589                           "Unable to determine PCIe device BW capabilities\n");
590                 return;
591         }
592
593         err = pcie_get_minimum_link(dev->persist->pdev, &speed, &width);
594         if (err || speed == PCI_SPEED_UNKNOWN ||
595             width == PCIE_LNK_WIDTH_UNKNOWN) {
596                 mlx4_warn(dev,
597                           "Unable to determine PCI device chain minimum BW\n");
598                 return;
599         }
600
601         if (width != width_cap || speed != speed_cap)
602                 mlx4_warn(dev,
603                           "PCIe BW is different than device's capability\n");
604
605         mlx4_info(dev, "PCIe link speed is %s, device supports %s\n",
606                   PCIE_SPEED_STR(speed), PCIE_SPEED_STR(speed_cap));
607         mlx4_info(dev, "PCIe link width is x%d, device supports x%d\n",
608                   width, width_cap);
609         return;
610 }
611
612 /*The function checks if there are live vf, return the num of them*/
613 static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
614 {
615         struct mlx4_priv *priv = mlx4_priv(dev);
616         struct mlx4_slave_state *s_state;
617         int i;
618         int ret = 0;
619
620         for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) {
621                 s_state = &priv->mfunc.master.slave_state[i];
622                 if (s_state->active && s_state->last_cmd !=
623                     MLX4_COMM_CMD_RESET) {
624                         mlx4_warn(dev, "%s: slave: %d is still active\n",
625                                   __func__, i);
626                         ret++;
627                 }
628         }
629         return ret;
630 }
631
632 int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey)
633 {
634         u32 qk = MLX4_RESERVED_QKEY_BASE;
635
636         if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
637             qpn < dev->phys_caps.base_proxy_sqpn)
638                 return -EINVAL;
639
640         if (qpn >= dev->phys_caps.base_tunnel_sqpn)
641                 /* tunnel qp */
642                 qk += qpn - dev->phys_caps.base_tunnel_sqpn;
643         else
644                 qk += qpn - dev->phys_caps.base_proxy_sqpn;
645         *qkey = qk;
646         return 0;
647 }
648 EXPORT_SYMBOL(mlx4_get_parav_qkey);
649
650 void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val)
651 {
652         struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
653
654         if (!mlx4_is_master(dev))
655                 return;
656
657         priv->virt2phys_pkey[slave][port - 1][i] = val;
658 }
659 EXPORT_SYMBOL(mlx4_sync_pkey_table);
660
661 void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid)
662 {
663         struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
664
665         if (!mlx4_is_master(dev))
666                 return;
667
668         priv->slave_node_guids[slave] = guid;
669 }
670 EXPORT_SYMBOL(mlx4_put_slave_node_guid);
671
672 __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave)
673 {
674         struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
675
676         if (!mlx4_is_master(dev))
677                 return 0;
678
679         return priv->slave_node_guids[slave];
680 }
681 EXPORT_SYMBOL(mlx4_get_slave_node_guid);
682
683 int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
684 {
685         struct mlx4_priv *priv = mlx4_priv(dev);
686         struct mlx4_slave_state *s_slave;
687
688         if (!mlx4_is_master(dev))
689                 return 0;
690
691         s_slave = &priv->mfunc.master.slave_state[slave];
692         return !!s_slave->active;
693 }
694 EXPORT_SYMBOL(mlx4_is_slave_active);
695
696 static void slave_adjust_steering_mode(struct mlx4_dev *dev,
697                                        struct mlx4_dev_cap *dev_cap,
698                                        struct mlx4_init_hca_param *hca_param)
699 {
700         dev->caps.steering_mode = hca_param->steering_mode;
701         if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
702                 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
703                 dev->caps.fs_log_max_ucast_qp_range_size =
704                         dev_cap->fs_log_max_ucast_qp_range_size;
705         } else
706                 dev->caps.num_qp_per_mgm =
707                         4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2);
708
709         mlx4_dbg(dev, "Steering mode is: %s\n",
710                  mlx4_steering_mode_str(dev->caps.steering_mode));
711 }
712
713 static int mlx4_slave_cap(struct mlx4_dev *dev)
714 {
715         int                        err;
716         u32                        page_size;
717         struct mlx4_dev_cap        dev_cap;
718         struct mlx4_func_cap       func_cap;
719         struct mlx4_init_hca_param hca_param;
720         u8                         i;
721
722         memset(&hca_param, 0, sizeof(hca_param));
723         err = mlx4_QUERY_HCA(dev, &hca_param);
724         if (err) {
725                 mlx4_err(dev, "QUERY_HCA command failed, aborting\n");
726                 return err;
727         }
728
729         /* fail if the hca has an unknown global capability
730          * at this time global_caps should be always zeroed
731          */
732         if (hca_param.global_caps) {
733                 mlx4_err(dev, "Unknown hca global capabilities\n");
734                 return -ENOSYS;
735         }
736
737         mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz;
738
739         dev->caps.hca_core_clock = hca_param.hca_core_clock;
740
741         memset(&dev_cap, 0, sizeof(dev_cap));
742         dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp;
743         err = mlx4_dev_cap(dev, &dev_cap);
744         if (err) {
745                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
746                 return err;
747         }
748
749         err = mlx4_QUERY_FW(dev);
750         if (err)
751                 mlx4_err(dev, "QUERY_FW command failed: could not get FW version\n");
752
753         page_size = ~dev->caps.page_size_cap + 1;
754         mlx4_warn(dev, "HCA minimum page size:%d\n", page_size);
755         if (page_size > PAGE_SIZE) {
756                 mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
757                          page_size, PAGE_SIZE);
758                 return -ENODEV;
759         }
760
761         /* slave gets uar page size from QUERY_HCA fw command */
762         dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12);
763
764         /* TODO: relax this assumption */
765         if (dev->caps.uar_page_size != PAGE_SIZE) {
766                 mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %ld\n",
767                          dev->caps.uar_page_size, PAGE_SIZE);
768                 return -ENODEV;
769         }
770
771         memset(&func_cap, 0, sizeof(func_cap));
772         err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap);
773         if (err) {
774                 mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d)\n",
775                          err);
776                 return err;
777         }
778
779         if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
780             PF_CONTEXT_BEHAVIOUR_MASK) {
781                 mlx4_err(dev, "Unknown pf context behaviour %x known flags %x\n",
782                          func_cap.pf_context_behaviour, PF_CONTEXT_BEHAVIOUR_MASK);
783                 return -ENOSYS;
784         }
785
786         dev->caps.num_ports             = func_cap.num_ports;
787         dev->quotas.qp                  = func_cap.qp_quota;
788         dev->quotas.srq                 = func_cap.srq_quota;
789         dev->quotas.cq                  = func_cap.cq_quota;
790         dev->quotas.mpt                 = func_cap.mpt_quota;
791         dev->quotas.mtt                 = func_cap.mtt_quota;
792         dev->caps.num_qps               = 1 << hca_param.log_num_qps;
793         dev->caps.num_srqs              = 1 << hca_param.log_num_srqs;
794         dev->caps.num_cqs               = 1 << hca_param.log_num_cqs;
795         dev->caps.num_mpts              = 1 << hca_param.log_mpt_sz;
796         dev->caps.num_eqs               = func_cap.max_eq;
797         dev->caps.reserved_eqs          = func_cap.reserved_eq;
798         dev->caps.num_pds               = MLX4_NUM_PDS;
799         dev->caps.num_mgms              = 0;
800         dev->caps.num_amgms             = 0;
801
802         if (dev->caps.num_ports > MLX4_MAX_PORTS) {
803                 mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
804                          dev->caps.num_ports, MLX4_MAX_PORTS);
805                 return -ENODEV;
806         }
807
808         dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL);
809         dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
810         dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
811         dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
812         dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
813
814         if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
815             !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy ||
816             !dev->caps.qp0_qkey) {
817                 err = -ENOMEM;
818                 goto err_mem;
819         }
820
821         for (i = 1; i <= dev->caps.num_ports; ++i) {
822                 err = mlx4_QUERY_FUNC_CAP(dev, i, &func_cap);
823                 if (err) {
824                         mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n",
825                                  i, err);
826                         goto err_mem;
827                 }
828                 dev->caps.qp0_qkey[i - 1] = func_cap.qp0_qkey;
829                 dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn;
830                 dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn;
831                 dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn;
832                 dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn;
833                 dev->caps.port_mask[i] = dev->caps.port_type[i];
834                 dev->caps.phys_port_id[i] = func_cap.phys_port_id;
835                 if (mlx4_get_slave_pkey_gid_tbl_len(dev, i,
836                                                     &dev->caps.gid_table_len[i],
837                                                     &dev->caps.pkey_table_len[i]))
838                         goto err_mem;
839         }
840
841         if (dev->caps.uar_page_size * (dev->caps.num_uars -
842                                        dev->caps.reserved_uars) >
843                                        pci_resource_len(dev->persist->pdev,
844                                                         2)) {
845                 mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
846                          dev->caps.uar_page_size * dev->caps.num_uars,
847                          (unsigned long long)
848                          pci_resource_len(dev->persist->pdev, 2));
849                 goto err_mem;
850         }
851
852         if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
853                 dev->caps.eqe_size   = 64;
854                 dev->caps.eqe_factor = 1;
855         } else {
856                 dev->caps.eqe_size   = 32;
857                 dev->caps.eqe_factor = 0;
858         }
859
860         if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
861                 dev->caps.cqe_size   = 64;
862                 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
863         } else {
864                 dev->caps.cqe_size   = 32;
865         }
866
867         if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) {
868                 dev->caps.eqe_size = hca_param.eqe_size;
869                 dev->caps.eqe_factor = 0;
870         }
871
872         if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) {
873                 dev->caps.cqe_size = hca_param.cqe_size;
874                 /* User still need to know when CQE > 32B */
875                 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
876         }
877
878         dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
879         mlx4_warn(dev, "Timestamping is not supported in slave mode\n");
880
881         slave_adjust_steering_mode(dev, &dev_cap, &hca_param);
882
883         if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP &&
884             dev->caps.bf_reg_size)
885                 dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP;
886
887         if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP)
888                 dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_A0_QP;
889
890         return 0;
891
892 err_mem:
893         kfree(dev->caps.qp0_qkey);
894         kfree(dev->caps.qp0_tunnel);
895         kfree(dev->caps.qp0_proxy);
896         kfree(dev->caps.qp1_tunnel);
897         kfree(dev->caps.qp1_proxy);
898         dev->caps.qp0_qkey = NULL;
899         dev->caps.qp0_tunnel = NULL;
900         dev->caps.qp0_proxy = NULL;
901         dev->caps.qp1_tunnel = NULL;
902         dev->caps.qp1_proxy = NULL;
903
904         return err;
905 }
906
907 static void mlx4_request_modules(struct mlx4_dev *dev)
908 {
909         int port;
910         int has_ib_port = false;
911         int has_eth_port = false;
912 #define EN_DRV_NAME     "mlx4_en"
913 #define IB_DRV_NAME     "mlx4_ib"
914
915         for (port = 1; port <= dev->caps.num_ports; port++) {
916                 if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB)
917                         has_ib_port = true;
918                 else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
919                         has_eth_port = true;
920         }
921
922         if (has_eth_port)
923                 request_module_nowait(EN_DRV_NAME);
924         if (has_ib_port || (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
925                 request_module_nowait(IB_DRV_NAME);
926 }
927
928 /*
929  * Change the port configuration of the device.
930  * Every user of this function must hold the port mutex.
931  */
932 int mlx4_change_port_types(struct mlx4_dev *dev,
933                            enum mlx4_port_type *port_types)
934 {
935         int err = 0;
936         int change = 0;
937         int port;
938
939         for (port = 0; port <  dev->caps.num_ports; port++) {
940                 /* Change the port type only if the new type is different
941                  * from the current, and not set to Auto */
942                 if (port_types[port] != dev->caps.port_type[port + 1])
943                         change = 1;
944         }
945         if (change) {
946                 mlx4_unregister_device(dev);
947                 for (port = 1; port <= dev->caps.num_ports; port++) {
948                         mlx4_CLOSE_PORT(dev, port);
949                         dev->caps.port_type[port] = port_types[port - 1];
950                         err = mlx4_SET_PORT(dev, port, -1);
951                         if (err) {
952                                 mlx4_err(dev, "Failed to set port %d, aborting\n",
953                                          port);
954                                 goto out;
955                         }
956                 }
957                 mlx4_set_port_mask(dev);
958                 err = mlx4_register_device(dev);
959                 if (err) {
960                         mlx4_err(dev, "Failed to register device\n");
961                         goto out;
962                 }
963                 mlx4_request_modules(dev);
964         }
965
966 out:
967         return err;
968 }
969
970 static ssize_t show_port_type(struct device *dev,
971                               struct device_attribute *attr,
972                               char *buf)
973 {
974         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
975                                                    port_attr);
976         struct mlx4_dev *mdev = info->dev;
977         char type[8];
978
979         sprintf(type, "%s",
980                 (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ?
981                 "ib" : "eth");
982         if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO)
983                 sprintf(buf, "auto (%s)\n", type);
984         else
985                 sprintf(buf, "%s\n", type);
986
987         return strlen(buf);
988 }
989
990 static ssize_t set_port_type(struct device *dev,
991                              struct device_attribute *attr,
992                              const char *buf, size_t count)
993 {
994         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
995                                                    port_attr);
996         struct mlx4_dev *mdev = info->dev;
997         struct mlx4_priv *priv = mlx4_priv(mdev);
998         enum mlx4_port_type types[MLX4_MAX_PORTS];
999         enum mlx4_port_type new_types[MLX4_MAX_PORTS];
1000         static DEFINE_MUTEX(set_port_type_mutex);
1001         int i;
1002         int err = 0;
1003
1004         mutex_lock(&set_port_type_mutex);
1005
1006         if (!strcmp(buf, "ib\n"))
1007                 info->tmp_type = MLX4_PORT_TYPE_IB;
1008         else if (!strcmp(buf, "eth\n"))
1009                 info->tmp_type = MLX4_PORT_TYPE_ETH;
1010         else if (!strcmp(buf, "auto\n"))
1011                 info->tmp_type = MLX4_PORT_TYPE_AUTO;
1012         else {
1013                 mlx4_err(mdev, "%s is not supported port type\n", buf);
1014                 err = -EINVAL;
1015                 goto err_out;
1016         }
1017
1018         mlx4_stop_sense(mdev);
1019         mutex_lock(&priv->port_mutex);
1020         /* Possible type is always the one that was delivered */
1021         mdev->caps.possible_type[info->port] = info->tmp_type;
1022
1023         for (i = 0; i < mdev->caps.num_ports; i++) {
1024                 types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type :
1025                                         mdev->caps.possible_type[i+1];
1026                 if (types[i] == MLX4_PORT_TYPE_AUTO)
1027                         types[i] = mdev->caps.port_type[i+1];
1028         }
1029
1030         if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
1031             !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) {
1032                 for (i = 1; i <= mdev->caps.num_ports; i++) {
1033                         if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
1034                                 mdev->caps.possible_type[i] = mdev->caps.port_type[i];
1035                                 err = -EINVAL;
1036                         }
1037                 }
1038         }
1039         if (err) {
1040                 mlx4_err(mdev, "Auto sensing is not supported on this HCA. Set only 'eth' or 'ib' for both ports (should be the same)\n");
1041                 goto out;
1042         }
1043
1044         mlx4_do_sense_ports(mdev, new_types, types);
1045
1046         err = mlx4_check_port_params(mdev, new_types);
1047         if (err)
1048                 goto out;
1049
1050         /* We are about to apply the changes after the configuration
1051          * was verified, no need to remember the temporary types
1052          * any more */
1053         for (i = 0; i < mdev->caps.num_ports; i++)
1054                 priv->port[i + 1].tmp_type = 0;
1055
1056         err = mlx4_change_port_types(mdev, new_types);
1057
1058 out:
1059         mlx4_start_sense(mdev);
1060         mutex_unlock(&priv->port_mutex);
1061 err_out:
1062         mutex_unlock(&set_port_type_mutex);
1063
1064         return err ? err : count;
1065 }
1066
1067 enum ibta_mtu {
1068         IB_MTU_256  = 1,
1069         IB_MTU_512  = 2,
1070         IB_MTU_1024 = 3,
1071         IB_MTU_2048 = 4,
1072         IB_MTU_4096 = 5
1073 };
1074
1075 static inline int int_to_ibta_mtu(int mtu)
1076 {
1077         switch (mtu) {
1078         case 256:  return IB_MTU_256;
1079         case 512:  return IB_MTU_512;
1080         case 1024: return IB_MTU_1024;
1081         case 2048: return IB_MTU_2048;
1082         case 4096: return IB_MTU_4096;
1083         default: return -1;
1084         }
1085 }
1086
1087 static inline int ibta_mtu_to_int(enum ibta_mtu mtu)
1088 {
1089         switch (mtu) {
1090         case IB_MTU_256:  return  256;
1091         case IB_MTU_512:  return  512;
1092         case IB_MTU_1024: return 1024;
1093         case IB_MTU_2048: return 2048;
1094         case IB_MTU_4096: return 4096;
1095         default: return -1;
1096         }
1097 }
1098
1099 static ssize_t show_port_ib_mtu(struct device *dev,
1100                              struct device_attribute *attr,
1101                              char *buf)
1102 {
1103         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1104                                                    port_mtu_attr);
1105         struct mlx4_dev *mdev = info->dev;
1106
1107         if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH)
1108                 mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
1109
1110         sprintf(buf, "%d\n",
1111                         ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port]));
1112         return strlen(buf);
1113 }
1114
1115 static ssize_t set_port_ib_mtu(struct device *dev,
1116                              struct device_attribute *attr,
1117                              const char *buf, size_t count)
1118 {
1119         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1120                                                    port_mtu_attr);
1121         struct mlx4_dev *mdev = info->dev;
1122         struct mlx4_priv *priv = mlx4_priv(mdev);
1123         int err, port, mtu, ibta_mtu = -1;
1124
1125         if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) {
1126                 mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
1127                 return -EINVAL;
1128         }
1129
1130         err = kstrtoint(buf, 0, &mtu);
1131         if (!err)
1132                 ibta_mtu = int_to_ibta_mtu(mtu);
1133
1134         if (err || ibta_mtu < 0) {
1135                 mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf);
1136                 return -EINVAL;
1137         }
1138
1139         mdev->caps.port_ib_mtu[info->port] = ibta_mtu;
1140
1141         mlx4_stop_sense(mdev);
1142         mutex_lock(&priv->port_mutex);
1143         mlx4_unregister_device(mdev);
1144         for (port = 1; port <= mdev->caps.num_ports; port++) {
1145                 mlx4_CLOSE_PORT(mdev, port);
1146                 err = mlx4_SET_PORT(mdev, port, -1);
1147                 if (err) {
1148                         mlx4_err(mdev, "Failed to set port %d, aborting\n",
1149                                  port);
1150                         goto err_set_port;
1151                 }
1152         }
1153         err = mlx4_register_device(mdev);
1154 err_set_port:
1155         mutex_unlock(&priv->port_mutex);
1156         mlx4_start_sense(mdev);
1157         return err ? err : count;
1158 }
1159
1160 static int mlx4_load_fw(struct mlx4_dev *dev)
1161 {
1162         struct mlx4_priv *priv = mlx4_priv(dev);
1163         int err;
1164
1165         priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
1166                                          GFP_HIGHUSER | __GFP_NOWARN, 0);
1167         if (!priv->fw.fw_icm) {
1168                 mlx4_err(dev, "Couldn't allocate FW area, aborting\n");
1169                 return -ENOMEM;
1170         }
1171
1172         err = mlx4_MAP_FA(dev, priv->fw.fw_icm);
1173         if (err) {
1174                 mlx4_err(dev, "MAP_FA command failed, aborting\n");
1175                 goto err_free;
1176         }
1177
1178         err = mlx4_RUN_FW(dev);
1179         if (err) {
1180                 mlx4_err(dev, "RUN_FW command failed, aborting\n");
1181                 goto err_unmap_fa;
1182         }
1183
1184         return 0;
1185
1186 err_unmap_fa:
1187         mlx4_UNMAP_FA(dev);
1188
1189 err_free:
1190         mlx4_free_icm(dev, priv->fw.fw_icm, 0);
1191         return err;
1192 }
1193
1194 static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
1195                                 int cmpt_entry_sz)
1196 {
1197         struct mlx4_priv *priv = mlx4_priv(dev);
1198         int err;
1199         int num_eqs;
1200
1201         err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
1202                                   cmpt_base +
1203                                   ((u64) (MLX4_CMPT_TYPE_QP *
1204                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1205                                   cmpt_entry_sz, dev->caps.num_qps,
1206                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1207                                   0, 0);
1208         if (err)
1209                 goto err;
1210
1211         err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table,
1212                                   cmpt_base +
1213                                   ((u64) (MLX4_CMPT_TYPE_SRQ *
1214                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1215                                   cmpt_entry_sz, dev->caps.num_srqs,
1216                                   dev->caps.reserved_srqs, 0, 0);
1217         if (err)
1218                 goto err_qp;
1219
1220         err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table,
1221                                   cmpt_base +
1222                                   ((u64) (MLX4_CMPT_TYPE_CQ *
1223                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1224                                   cmpt_entry_sz, dev->caps.num_cqs,
1225                                   dev->caps.reserved_cqs, 0, 0);
1226         if (err)
1227                 goto err_srq;
1228
1229         num_eqs = dev->phys_caps.num_phys_eqs;
1230         err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
1231                                   cmpt_base +
1232                                   ((u64) (MLX4_CMPT_TYPE_EQ *
1233                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1234                                   cmpt_entry_sz, num_eqs, num_eqs, 0, 0);
1235         if (err)
1236                 goto err_cq;
1237
1238         return 0;
1239
1240 err_cq:
1241         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1242
1243 err_srq:
1244         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1245
1246 err_qp:
1247         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1248
1249 err:
1250         return err;
1251 }
1252
1253 static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
1254                          struct mlx4_init_hca_param *init_hca, u64 icm_size)
1255 {
1256         struct mlx4_priv *priv = mlx4_priv(dev);
1257         u64 aux_pages;
1258         int num_eqs;
1259         int err;
1260
1261         err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
1262         if (err) {
1263                 mlx4_err(dev, "SET_ICM_SIZE command failed, aborting\n");
1264                 return err;
1265         }
1266
1267         mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory\n",
1268                  (unsigned long long) icm_size >> 10,
1269                  (unsigned long long) aux_pages << 2);
1270
1271         priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages,
1272                                           GFP_HIGHUSER | __GFP_NOWARN, 0);
1273         if (!priv->fw.aux_icm) {
1274                 mlx4_err(dev, "Couldn't allocate aux memory, aborting\n");
1275                 return -ENOMEM;
1276         }
1277
1278         err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm);
1279         if (err) {
1280                 mlx4_err(dev, "MAP_ICM_AUX command failed, aborting\n");
1281                 goto err_free_aux;
1282         }
1283
1284         err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz);
1285         if (err) {
1286                 mlx4_err(dev, "Failed to map cMPT context memory, aborting\n");
1287                 goto err_unmap_aux;
1288         }
1289
1290
1291         num_eqs = dev->phys_caps.num_phys_eqs;
1292         err = mlx4_init_icm_table(dev, &priv->eq_table.table,
1293                                   init_hca->eqc_base, dev_cap->eqc_entry_sz,
1294                                   num_eqs, num_eqs, 0, 0);
1295         if (err) {
1296                 mlx4_err(dev, "Failed to map EQ context memory, aborting\n");
1297                 goto err_unmap_cmpt;
1298         }
1299
1300         /*
1301          * Reserved MTT entries must be aligned up to a cacheline
1302          * boundary, since the FW will write to them, while the driver
1303          * writes to all other MTT entries. (The variable
1304          * dev->caps.mtt_entry_sz below is really the MTT segment
1305          * size, not the raw entry size)
1306          */
1307         dev->caps.reserved_mtts =
1308                 ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
1309                       dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;
1310
1311         err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
1312                                   init_hca->mtt_base,
1313                                   dev->caps.mtt_entry_sz,
1314                                   dev->caps.num_mtts,
1315                                   dev->caps.reserved_mtts, 1, 0);
1316         if (err) {
1317                 mlx4_err(dev, "Failed to map MTT context memory, aborting\n");
1318                 goto err_unmap_eq;
1319         }
1320
1321         err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table,
1322                                   init_hca->dmpt_base,
1323                                   dev_cap->dmpt_entry_sz,
1324                                   dev->caps.num_mpts,
1325                                   dev->caps.reserved_mrws, 1, 1);
1326         if (err) {
1327                 mlx4_err(dev, "Failed to map dMPT context memory, aborting\n");
1328                 goto err_unmap_mtt;
1329         }
1330
1331         err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table,
1332                                   init_hca->qpc_base,
1333                                   dev_cap->qpc_entry_sz,
1334                                   dev->caps.num_qps,
1335                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1336                                   0, 0);
1337         if (err) {
1338                 mlx4_err(dev, "Failed to map QP context memory, aborting\n");
1339                 goto err_unmap_dmpt;
1340         }
1341
1342         err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table,
1343                                   init_hca->auxc_base,
1344                                   dev_cap->aux_entry_sz,
1345                                   dev->caps.num_qps,
1346                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1347                                   0, 0);
1348         if (err) {
1349                 mlx4_err(dev, "Failed to map AUXC context memory, aborting\n");
1350                 goto err_unmap_qp;
1351         }
1352
1353         err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table,
1354                                   init_hca->altc_base,
1355                                   dev_cap->altc_entry_sz,
1356                                   dev->caps.num_qps,
1357                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1358                                   0, 0);
1359         if (err) {
1360                 mlx4_err(dev, "Failed to map ALTC context memory, aborting\n");
1361                 goto err_unmap_auxc;
1362         }
1363
1364         err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table,
1365                                   init_hca->rdmarc_base,
1366                                   dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
1367                                   dev->caps.num_qps,
1368                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1369                                   0, 0);
1370         if (err) {
1371                 mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
1372                 goto err_unmap_altc;
1373         }
1374
1375         err = mlx4_init_icm_table(dev, &priv->cq_table.table,
1376                                   init_hca->cqc_base,
1377                                   dev_cap->cqc_entry_sz,
1378                                   dev->caps.num_cqs,
1379                                   dev->caps.reserved_cqs, 0, 0);
1380         if (err) {
1381                 mlx4_err(dev, "Failed to map CQ context memory, aborting\n");
1382                 goto err_unmap_rdmarc;
1383         }
1384
1385         err = mlx4_init_icm_table(dev, &priv->srq_table.table,
1386                                   init_hca->srqc_base,
1387                                   dev_cap->srq_entry_sz,
1388                                   dev->caps.num_srqs,
1389                                   dev->caps.reserved_srqs, 0, 0);
1390         if (err) {
1391                 mlx4_err(dev, "Failed to map SRQ context memory, aborting\n");
1392                 goto err_unmap_cq;
1393         }
1394
1395         /*
1396          * For flow steering device managed mode it is required to use
1397          * mlx4_init_icm_table. For B0 steering mode it's not strictly
1398          * required, but for simplicity just map the whole multicast
1399          * group table now.  The table isn't very big and it's a lot
1400          * easier than trying to track ref counts.
1401          */
1402         err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
1403                                   init_hca->mc_base,
1404                                   mlx4_get_mgm_entry_size(dev),
1405                                   dev->caps.num_mgms + dev->caps.num_amgms,
1406                                   dev->caps.num_mgms + dev->caps.num_amgms,
1407                                   0, 0);
1408         if (err) {
1409                 mlx4_err(dev, "Failed to map MCG context memory, aborting\n");
1410                 goto err_unmap_srq;
1411         }
1412
1413         return 0;
1414
1415 err_unmap_srq:
1416         mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1417
1418 err_unmap_cq:
1419         mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1420
1421 err_unmap_rdmarc:
1422         mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1423
1424 err_unmap_altc:
1425         mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1426
1427 err_unmap_auxc:
1428         mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1429
1430 err_unmap_qp:
1431         mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1432
1433 err_unmap_dmpt:
1434         mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1435
1436 err_unmap_mtt:
1437         mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1438
1439 err_unmap_eq:
1440         mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1441
1442 err_unmap_cmpt:
1443         mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1444         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1445         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1446         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1447
1448 err_unmap_aux:
1449         mlx4_UNMAP_ICM_AUX(dev);
1450
1451 err_free_aux:
1452         mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1453
1454         return err;
1455 }
1456
1457 static void mlx4_free_icms(struct mlx4_dev *dev)
1458 {
1459         struct mlx4_priv *priv = mlx4_priv(dev);
1460
1461         mlx4_cleanup_icm_table(dev, &priv->mcg_table.table);
1462         mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1463         mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1464         mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1465         mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1466         mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1467         mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1468         mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1469         mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1470         mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1471         mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1472         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1473         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1474         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1475
1476         mlx4_UNMAP_ICM_AUX(dev);
1477         mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1478 }
1479
1480 static void mlx4_slave_exit(struct mlx4_dev *dev)
1481 {
1482         struct mlx4_priv *priv = mlx4_priv(dev);
1483
1484         mutex_lock(&priv->cmd.slave_cmd_mutex);
1485         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME))
1486                 mlx4_warn(dev, "Failed to close slave function\n");
1487         mutex_unlock(&priv->cmd.slave_cmd_mutex);
1488 }
1489
1490 static int map_bf_area(struct mlx4_dev *dev)
1491 {
1492         struct mlx4_priv *priv = mlx4_priv(dev);
1493         resource_size_t bf_start;
1494         resource_size_t bf_len;
1495         int err = 0;
1496
1497         if (!dev->caps.bf_reg_size)
1498                 return -ENXIO;
1499
1500         bf_start = pci_resource_start(dev->persist->pdev, 2) +
1501                         (dev->caps.num_uars << PAGE_SHIFT);
1502         bf_len = pci_resource_len(dev->persist->pdev, 2) -
1503                         (dev->caps.num_uars << PAGE_SHIFT);
1504         priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
1505         if (!priv->bf_mapping)
1506                 err = -ENOMEM;
1507
1508         return err;
1509 }
1510
1511 static void unmap_bf_area(struct mlx4_dev *dev)
1512 {
1513         if (mlx4_priv(dev)->bf_mapping)
1514                 io_mapping_free(mlx4_priv(dev)->bf_mapping);
1515 }
1516
1517 cycle_t mlx4_read_clock(struct mlx4_dev *dev)
1518 {
1519         u32 clockhi, clocklo, clockhi1;
1520         cycle_t cycles;
1521         int i;
1522         struct mlx4_priv *priv = mlx4_priv(dev);
1523
1524         for (i = 0; i < 10; i++) {
1525                 clockhi = swab32(readl(priv->clock_mapping));
1526                 clocklo = swab32(readl(priv->clock_mapping + 4));
1527                 clockhi1 = swab32(readl(priv->clock_mapping));
1528                 if (clockhi == clockhi1)
1529                         break;
1530         }
1531
1532         cycles = (u64) clockhi << 32 | (u64) clocklo;
1533
1534         return cycles;
1535 }
1536 EXPORT_SYMBOL_GPL(mlx4_read_clock);
1537
1538
1539 static int map_internal_clock(struct mlx4_dev *dev)
1540 {
1541         struct mlx4_priv *priv = mlx4_priv(dev);
1542
1543         priv->clock_mapping =
1544                 ioremap(pci_resource_start(dev->persist->pdev,
1545                                            priv->fw.clock_bar) +
1546                         priv->fw.clock_offset, MLX4_CLOCK_SIZE);
1547
1548         if (!priv->clock_mapping)
1549                 return -ENOMEM;
1550
1551         return 0;
1552 }
1553
1554 static void unmap_internal_clock(struct mlx4_dev *dev)
1555 {
1556         struct mlx4_priv *priv = mlx4_priv(dev);
1557
1558         if (priv->clock_mapping)
1559                 iounmap(priv->clock_mapping);
1560 }
1561
1562 static void mlx4_close_hca(struct mlx4_dev *dev)
1563 {
1564         unmap_internal_clock(dev);
1565         unmap_bf_area(dev);
1566         if (mlx4_is_slave(dev))
1567                 mlx4_slave_exit(dev);
1568         else {
1569                 mlx4_CLOSE_HCA(dev, 0);
1570                 mlx4_free_icms(dev);
1571         }
1572 }
1573
1574 static void mlx4_close_fw(struct mlx4_dev *dev)
1575 {
1576         if (!mlx4_is_slave(dev)) {
1577                 mlx4_UNMAP_FA(dev);
1578                 mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
1579         }
1580 }
1581
1582 static int mlx4_init_slave(struct mlx4_dev *dev)
1583 {
1584         struct mlx4_priv *priv = mlx4_priv(dev);
1585         u64 dma = (u64) priv->mfunc.vhcr_dma;
1586         int ret_from_reset = 0;
1587         u32 slave_read;
1588         u32 cmd_channel_ver;
1589
1590         if (atomic_read(&pf_loading)) {
1591                 mlx4_warn(dev, "PF is not ready - Deferring probe\n");
1592                 return -EPROBE_DEFER;
1593         }
1594
1595         mutex_lock(&priv->cmd.slave_cmd_mutex);
1596         priv->cmd.max_cmds = 1;
1597         mlx4_warn(dev, "Sending reset\n");
1598         ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
1599                                        MLX4_COMM_TIME);
1600         /* if we are in the middle of flr the slave will try
1601          * NUM_OF_RESET_RETRIES times before leaving.*/
1602         if (ret_from_reset) {
1603                 if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) {
1604                         mlx4_warn(dev, "slave is currently in the middle of FLR - Deferring probe\n");
1605                         mutex_unlock(&priv->cmd.slave_cmd_mutex);
1606                         return -EPROBE_DEFER;
1607                 } else
1608                         goto err;
1609         }
1610
1611         /* check the driver version - the slave I/F revision
1612          * must match the master's */
1613         slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
1614         cmd_channel_ver = mlx4_comm_get_version();
1615
1616         if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) !=
1617                 MLX4_COMM_GET_IF_REV(slave_read)) {
1618                 mlx4_err(dev, "slave driver version is not supported by the master\n");
1619                 goto err;
1620         }
1621
1622         mlx4_warn(dev, "Sending vhcr0\n");
1623         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48,
1624                                                     MLX4_COMM_TIME))
1625                 goto err;
1626         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32,
1627                                                     MLX4_COMM_TIME))
1628                 goto err;
1629         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16,
1630                                                     MLX4_COMM_TIME))
1631                 goto err;
1632         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME))
1633                 goto err;
1634
1635         mutex_unlock(&priv->cmd.slave_cmd_mutex);
1636         return 0;
1637
1638 err:
1639         mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0);
1640         mutex_unlock(&priv->cmd.slave_cmd_mutex);
1641         return -EIO;
1642 }
1643
1644 static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev)
1645 {
1646         int i;
1647
1648         for (i = 1; i <= dev->caps.num_ports; i++) {
1649                 if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
1650                         dev->caps.gid_table_len[i] =
1651                                 mlx4_get_slave_num_gids(dev, 0, i);
1652                 else
1653                         dev->caps.gid_table_len[i] = 1;
1654                 dev->caps.pkey_table_len[i] =
1655                         dev->phys_caps.pkey_phys_table_len[i] - 1;
1656         }
1657 }
1658
1659 static int choose_log_fs_mgm_entry_size(int qp_per_entry)
1660 {
1661         int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE;
1662
1663         for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE;
1664               i++) {
1665                 if (qp_per_entry <= 4 * ((1 << i) / 16 - 2))
1666                         break;
1667         }
1668
1669         return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1;
1670 }
1671
1672 static const char *dmfs_high_rate_steering_mode_str(int dmfs_high_steer_mode)
1673 {
1674         switch (dmfs_high_steer_mode) {
1675         case MLX4_STEERING_DMFS_A0_DEFAULT:
1676                 return "default performance";
1677
1678         case MLX4_STEERING_DMFS_A0_DYNAMIC:
1679                 return "dynamic hybrid mode";
1680
1681         case MLX4_STEERING_DMFS_A0_STATIC:
1682                 return "performance optimized for limited rule configuration (static)";
1683
1684         case MLX4_STEERING_DMFS_A0_DISABLE:
1685                 return "disabled performance optimized steering";
1686
1687         case MLX4_STEERING_DMFS_A0_NOT_SUPPORTED:
1688                 return "performance optimized steering not supported";
1689
1690         default:
1691                 return "Unrecognized mode";
1692         }
1693 }
1694
1695 #define MLX4_DMFS_A0_STEERING                   (1UL << 2)
1696
1697 static void choose_steering_mode(struct mlx4_dev *dev,
1698                                  struct mlx4_dev_cap *dev_cap)
1699 {
1700         if (mlx4_log_num_mgm_entry_size <= 0) {
1701                 if ((-mlx4_log_num_mgm_entry_size) & MLX4_DMFS_A0_STEERING) {
1702                         if (dev->caps.dmfs_high_steer_mode ==
1703                             MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
1704                                 mlx4_err(dev, "DMFS high rate mode not supported\n");
1705                         else
1706                                 dev->caps.dmfs_high_steer_mode =
1707                                         MLX4_STEERING_DMFS_A0_STATIC;
1708                 }
1709         }
1710
1711         if (mlx4_log_num_mgm_entry_size <= 0 &&
1712             dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
1713             (!mlx4_is_mfunc(dev) ||
1714              (dev_cap->fs_max_num_qp_per_entry >=
1715              (dev->persist->num_vfs + 1))) &&
1716             choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >=
1717                 MLX4_MIN_MGM_LOG_ENTRY_SIZE) {
1718                 dev->oper_log_mgm_entry_size =
1719                         choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry);
1720                 dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED;
1721                 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
1722                 dev->caps.fs_log_max_ucast_qp_range_size =
1723                         dev_cap->fs_log_max_ucast_qp_range_size;
1724         } else {
1725                 if (dev->caps.dmfs_high_steer_mode !=
1726                     MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
1727                         dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DISABLE;
1728                 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER &&
1729                     dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
1730                         dev->caps.steering_mode = MLX4_STEERING_MODE_B0;
1731                 else {
1732                         dev->caps.steering_mode = MLX4_STEERING_MODE_A0;
1733
1734                         if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER ||
1735                             dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
1736                                 mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags set to use B0 steering - falling back to A0 steering mode\n");
1737                 }
1738                 dev->oper_log_mgm_entry_size =
1739                         mlx4_log_num_mgm_entry_size > 0 ?
1740                         mlx4_log_num_mgm_entry_size :
1741                         MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
1742                 dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev);
1743         }
1744         mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, modparam log_num_mgm_entry_size = %d\n",
1745                  mlx4_steering_mode_str(dev->caps.steering_mode),
1746                  dev->oper_log_mgm_entry_size,
1747                  mlx4_log_num_mgm_entry_size);
1748 }
1749
1750 static void choose_tunnel_offload_mode(struct mlx4_dev *dev,
1751                                        struct mlx4_dev_cap *dev_cap)
1752 {
1753         if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED &&
1754             dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS &&
1755             dev->caps.dmfs_high_steer_mode != MLX4_STEERING_DMFS_A0_STATIC)
1756                 dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_VXLAN;
1757         else
1758                 dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_NONE;
1759
1760         mlx4_dbg(dev, "Tunneling offload mode is: %s\n",  (dev->caps.tunnel_offload_mode
1761                  == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) ? "vxlan" : "none");
1762 }
1763
1764 static int mlx4_validate_optimized_steering(struct mlx4_dev *dev)
1765 {
1766         int i;
1767         struct mlx4_port_cap port_cap;
1768
1769         if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
1770                 return -EINVAL;
1771
1772         for (i = 1; i <= dev->caps.num_ports; i++) {
1773                 if (mlx4_dev_port(dev, i, &port_cap)) {
1774                         mlx4_err(dev,
1775                                  "QUERY_DEV_CAP command failed, can't veify DMFS high rate steering.\n");
1776                 } else if ((dev->caps.dmfs_high_steer_mode !=
1777                             MLX4_STEERING_DMFS_A0_DEFAULT) &&
1778                            (port_cap.dmfs_optimized_state ==
1779                             !!(dev->caps.dmfs_high_steer_mode ==
1780                             MLX4_STEERING_DMFS_A0_DISABLE))) {
1781                         mlx4_err(dev,
1782                                  "DMFS high rate steer mode differ, driver requested %s but %s in FW.\n",
1783                                  dmfs_high_rate_steering_mode_str(
1784                                         dev->caps.dmfs_high_steer_mode),
1785                                  (port_cap.dmfs_optimized_state ?
1786                                         "enabled" : "disabled"));
1787                 }
1788         }
1789
1790         return 0;
1791 }
1792
1793 static int mlx4_init_fw(struct mlx4_dev *dev)
1794 {
1795         struct mlx4_mod_stat_cfg   mlx4_cfg;
1796         int err = 0;
1797
1798         if (!mlx4_is_slave(dev)) {
1799                 err = mlx4_QUERY_FW(dev);
1800                 if (err) {
1801                         if (err == -EACCES)
1802                                 mlx4_info(dev, "non-primary physical function, skipping\n");
1803                         else
1804                                 mlx4_err(dev, "QUERY_FW command failed, aborting\n");
1805                         return err;
1806                 }
1807
1808                 err = mlx4_load_fw(dev);
1809                 if (err) {
1810                         mlx4_err(dev, "Failed to start FW, aborting\n");
1811                         return err;
1812                 }
1813
1814                 mlx4_cfg.log_pg_sz_m = 1;
1815                 mlx4_cfg.log_pg_sz = 0;
1816                 err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
1817                 if (err)
1818                         mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
1819         }
1820
1821         return err;
1822 }
1823
1824 static int mlx4_init_hca(struct mlx4_dev *dev)
1825 {
1826         struct mlx4_priv          *priv = mlx4_priv(dev);
1827         struct mlx4_adapter        adapter;
1828         struct mlx4_dev_cap        dev_cap;
1829         struct mlx4_profile        profile;
1830         struct mlx4_init_hca_param init_hca;
1831         u64 icm_size;
1832         struct mlx4_config_dev_params params;
1833         int err;
1834
1835         if (!mlx4_is_slave(dev)) {
1836                 err = mlx4_dev_cap(dev, &dev_cap);
1837                 if (err) {
1838                         mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
1839                         return err;
1840                 }
1841
1842                 choose_steering_mode(dev, &dev_cap);
1843                 choose_tunnel_offload_mode(dev, &dev_cap);
1844
1845                 if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC &&
1846                     mlx4_is_master(dev))
1847                         dev->caps.function_caps |= MLX4_FUNC_CAP_DMFS_A0_STATIC;
1848
1849                 err = mlx4_get_phys_port_id(dev);
1850                 if (err)
1851                         mlx4_err(dev, "Fail to get physical port id\n");
1852
1853                 if (mlx4_is_master(dev))
1854                         mlx4_parav_master_pf_caps(dev);
1855
1856                 if (mlx4_low_memory_profile()) {
1857                         mlx4_info(dev, "Running from within kdump kernel. Using low memory profile\n");
1858                         profile = low_mem_profile;
1859                 } else {
1860                         profile = default_profile;
1861                 }
1862                 if (dev->caps.steering_mode ==
1863                     MLX4_STEERING_MODE_DEVICE_MANAGED)
1864                         profile.num_mcg = MLX4_FS_NUM_MCG;
1865
1866                 icm_size = mlx4_make_profile(dev, &profile, &dev_cap,
1867                                              &init_hca);
1868                 if ((long long) icm_size < 0) {
1869                         err = icm_size;
1870                         return err;
1871                 }
1872
1873                 dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
1874
1875                 init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
1876                 init_hca.uar_page_sz = PAGE_SHIFT - 12;
1877                 init_hca.mw_enabled = 0;
1878                 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
1879                     dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)
1880                         init_hca.mw_enabled = INIT_HCA_TPT_MW_ENABLE;
1881
1882                 err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
1883                 if (err)
1884                         return err;
1885
1886                 err = mlx4_INIT_HCA(dev, &init_hca);
1887                 if (err) {
1888                         mlx4_err(dev, "INIT_HCA command failed, aborting\n");
1889                         goto err_free_icm;
1890                 }
1891
1892                 if (dev_cap.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
1893                         err = mlx4_query_func(dev, &dev_cap);
1894                         if (err < 0) {
1895                                 mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n");
1896                                 goto err_close;
1897                         } else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) {
1898                                 dev->caps.num_eqs = dev_cap.max_eqs;
1899                                 dev->caps.reserved_eqs = dev_cap.reserved_eqs;
1900                                 dev->caps.reserved_uars = dev_cap.reserved_uars;
1901                         }
1902                 }
1903
1904                 /*
1905                  * If TS is supported by FW
1906                  * read HCA frequency by QUERY_HCA command
1907                  */
1908                 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) {
1909                         memset(&init_hca, 0, sizeof(init_hca));
1910                         err = mlx4_QUERY_HCA(dev, &init_hca);
1911                         if (err) {
1912                                 mlx4_err(dev, "QUERY_HCA command failed, disable timestamp\n");
1913                                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
1914                         } else {
1915                                 dev->caps.hca_core_clock =
1916                                         init_hca.hca_core_clock;
1917                         }
1918
1919                         /* In case we got HCA frequency 0 - disable timestamping
1920                          * to avoid dividing by zero
1921                          */
1922                         if (!dev->caps.hca_core_clock) {
1923                                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
1924                                 mlx4_err(dev,
1925                                          "HCA frequency is 0 - timestamping is not supported\n");
1926                         } else if (map_internal_clock(dev)) {
1927                                 /*
1928                                  * Map internal clock,
1929                                  * in case of failure disable timestamping
1930                                  */
1931                                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
1932                                 mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported\n");
1933                         }
1934                 }
1935
1936                 if (dev->caps.dmfs_high_steer_mode !=
1937                     MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) {
1938                         if (mlx4_validate_optimized_steering(dev))
1939                                 mlx4_warn(dev, "Optimized steering validation failed\n");
1940
1941                         if (dev->caps.dmfs_high_steer_mode ==
1942                             MLX4_STEERING_DMFS_A0_DISABLE) {
1943                                 dev->caps.dmfs_high_rate_qpn_base =
1944                                         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
1945                                 dev->caps.dmfs_high_rate_qpn_range =
1946                                         MLX4_A0_STEERING_TABLE_SIZE;
1947                         }
1948
1949                         mlx4_dbg(dev, "DMFS high rate steer mode is: %s\n",
1950                                  dmfs_high_rate_steering_mode_str(
1951                                         dev->caps.dmfs_high_steer_mode));
1952                 }
1953         } else {
1954                 err = mlx4_init_slave(dev);
1955                 if (err) {
1956                         if (err != -EPROBE_DEFER)
1957                                 mlx4_err(dev, "Failed to initialize slave\n");
1958                         return err;
1959                 }
1960
1961                 err = mlx4_slave_cap(dev);
1962                 if (err) {
1963                         mlx4_err(dev, "Failed to obtain slave caps\n");
1964                         goto err_close;
1965                 }
1966         }
1967
1968         if (map_bf_area(dev))
1969                 mlx4_dbg(dev, "Failed to map blue flame area\n");
1970
1971         /*Only the master set the ports, all the rest got it from it.*/
1972         if (!mlx4_is_slave(dev))
1973                 mlx4_set_port_mask(dev);
1974
1975         err = mlx4_QUERY_ADAPTER(dev, &adapter);
1976         if (err) {
1977                 mlx4_err(dev, "QUERY_ADAPTER command failed, aborting\n");
1978                 goto unmap_bf;
1979         }
1980
1981         /* Query CONFIG_DEV parameters */
1982         err = mlx4_config_dev_retrieval(dev, &params);
1983         if (err && err != -ENOTSUPP) {
1984                 mlx4_err(dev, "Failed to query CONFIG_DEV parameters\n");
1985         } else if (!err) {
1986                 dev->caps.rx_checksum_flags_port[1] = params.rx_csum_flags_port_1;
1987                 dev->caps.rx_checksum_flags_port[2] = params.rx_csum_flags_port_2;
1988         }
1989         priv->eq_table.inta_pin = adapter.inta_pin;
1990         memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id);
1991
1992         return 0;
1993
1994 unmap_bf:
1995         unmap_internal_clock(dev);
1996         unmap_bf_area(dev);
1997
1998         if (mlx4_is_slave(dev)) {
1999                 kfree(dev->caps.qp0_qkey);
2000                 kfree(dev->caps.qp0_tunnel);
2001                 kfree(dev->caps.qp0_proxy);
2002                 kfree(dev->caps.qp1_tunnel);
2003                 kfree(dev->caps.qp1_proxy);
2004         }
2005
2006 err_close:
2007         if (mlx4_is_slave(dev))
2008                 mlx4_slave_exit(dev);
2009         else
2010                 mlx4_CLOSE_HCA(dev, 0);
2011
2012 err_free_icm:
2013         if (!mlx4_is_slave(dev))
2014                 mlx4_free_icms(dev);
2015
2016         return err;
2017 }
2018
2019 static int mlx4_init_counters_table(struct mlx4_dev *dev)
2020 {
2021         struct mlx4_priv *priv = mlx4_priv(dev);
2022         int nent;
2023
2024         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2025                 return -ENOENT;
2026
2027         nent = dev->caps.max_counters;
2028         return mlx4_bitmap_init(&priv->counters_bitmap, nent, nent - 1, 0, 0);
2029 }
2030
2031 static void mlx4_cleanup_counters_table(struct mlx4_dev *dev)
2032 {
2033         mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap);
2034 }
2035
2036 int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
2037 {
2038         struct mlx4_priv *priv = mlx4_priv(dev);
2039
2040         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2041                 return -ENOENT;
2042
2043         *idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
2044         if (*idx == -1)
2045                 return -ENOMEM;
2046
2047         return 0;
2048 }
2049
2050 int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
2051 {
2052         u64 out_param;
2053         int err;
2054
2055         if (mlx4_is_mfunc(dev)) {
2056                 err = mlx4_cmd_imm(dev, 0, &out_param, RES_COUNTER,
2057                                    RES_OP_RESERVE, MLX4_CMD_ALLOC_RES,
2058                                    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
2059                 if (!err)
2060                         *idx = get_param_l(&out_param);
2061
2062                 return err;
2063         }
2064         return __mlx4_counter_alloc(dev, idx);
2065 }
2066 EXPORT_SYMBOL_GPL(mlx4_counter_alloc);
2067
2068 void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
2069 {
2070         mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx, MLX4_USE_RR);
2071         return;
2072 }
2073
2074 void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
2075 {
2076         u64 in_param = 0;
2077
2078         if (mlx4_is_mfunc(dev)) {
2079                 set_param_l(&in_param, idx);
2080                 mlx4_cmd(dev, in_param, RES_COUNTER, RES_OP_RESERVE,
2081                          MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
2082                          MLX4_CMD_WRAPPED);
2083                 return;
2084         }
2085         __mlx4_counter_free(dev, idx);
2086 }
2087 EXPORT_SYMBOL_GPL(mlx4_counter_free);
2088
2089 static int mlx4_setup_hca(struct mlx4_dev *dev)
2090 {
2091         struct mlx4_priv *priv = mlx4_priv(dev);
2092         int err;
2093         int port;
2094         __be32 ib_port_default_caps;
2095
2096         err = mlx4_init_uar_table(dev);
2097         if (err) {
2098                 mlx4_err(dev, "Failed to initialize user access region table, aborting\n");
2099                  return err;
2100         }
2101
2102         err = mlx4_uar_alloc(dev, &priv->driver_uar);
2103         if (err) {
2104                 mlx4_err(dev, "Failed to allocate driver access region, aborting\n");
2105                 goto err_uar_table_free;
2106         }
2107
2108         priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
2109         if (!priv->kar) {
2110                 mlx4_err(dev, "Couldn't map kernel access region, aborting\n");
2111                 err = -ENOMEM;
2112                 goto err_uar_free;
2113         }
2114
2115         err = mlx4_init_pd_table(dev);
2116         if (err) {
2117                 mlx4_err(dev, "Failed to initialize protection domain table, aborting\n");
2118                 goto err_kar_unmap;
2119         }
2120
2121         err = mlx4_init_xrcd_table(dev);
2122         if (err) {
2123                 mlx4_err(dev, "Failed to initialize reliable connection domain table, aborting\n");
2124                 goto err_pd_table_free;
2125         }
2126
2127         err = mlx4_init_mr_table(dev);
2128         if (err) {
2129                 mlx4_err(dev, "Failed to initialize memory region table, aborting\n");
2130                 goto err_xrcd_table_free;
2131         }
2132
2133         if (!mlx4_is_slave(dev)) {
2134                 err = mlx4_init_mcg_table(dev);
2135                 if (err) {
2136                         mlx4_err(dev, "Failed to initialize multicast group table, aborting\n");
2137                         goto err_mr_table_free;
2138                 }
2139                 err = mlx4_config_mad_demux(dev);
2140                 if (err) {
2141                         mlx4_err(dev, "Failed in config_mad_demux, aborting\n");
2142                         goto err_mcg_table_free;
2143                 }
2144         }
2145
2146         err = mlx4_init_eq_table(dev);
2147         if (err) {
2148                 mlx4_err(dev, "Failed to initialize event queue table, aborting\n");
2149                 goto err_mcg_table_free;
2150         }
2151
2152         err = mlx4_cmd_use_events(dev);
2153         if (err) {
2154                 mlx4_err(dev, "Failed to switch to event-driven firmware commands, aborting\n");
2155                 goto err_eq_table_free;
2156         }
2157
2158         err = mlx4_NOP(dev);
2159         if (err) {
2160                 if (dev->flags & MLX4_FLAG_MSI_X) {
2161                         mlx4_warn(dev, "NOP command failed to generate MSI-X interrupt IRQ %d)\n",
2162                                   priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
2163                         mlx4_warn(dev, "Trying again without MSI-X\n");
2164                 } else {
2165                         mlx4_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting\n",
2166                                  priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
2167                         mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
2168                 }
2169
2170                 goto err_cmd_poll;
2171         }
2172
2173         mlx4_dbg(dev, "NOP command IRQ test passed\n");
2174
2175         err = mlx4_init_cq_table(dev);
2176         if (err) {
2177                 mlx4_err(dev, "Failed to initialize completion queue table, aborting\n");
2178                 goto err_cmd_poll;
2179         }
2180
2181         err = mlx4_init_srq_table(dev);
2182         if (err) {
2183                 mlx4_err(dev, "Failed to initialize shared receive queue table, aborting\n");
2184                 goto err_cq_table_free;
2185         }
2186
2187         err = mlx4_init_qp_table(dev);
2188         if (err) {
2189                 mlx4_err(dev, "Failed to initialize queue pair table, aborting\n");
2190                 goto err_srq_table_free;
2191         }
2192
2193         err = mlx4_init_counters_table(dev);
2194         if (err && err != -ENOENT) {
2195                 mlx4_err(dev, "Failed to initialize counters table, aborting\n");
2196                 goto err_qp_table_free;
2197         }
2198
2199         if (!mlx4_is_slave(dev)) {
2200                 for (port = 1; port <= dev->caps.num_ports; port++) {
2201                         ib_port_default_caps = 0;
2202                         err = mlx4_get_port_ib_caps(dev, port,
2203                                                     &ib_port_default_caps);
2204                         if (err)
2205                                 mlx4_warn(dev, "failed to get port %d default ib capabilities (%d). Continuing with caps = 0\n",
2206                                           port, err);
2207                         dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
2208
2209                         /* initialize per-slave default ib port capabilities */
2210                         if (mlx4_is_master(dev)) {
2211                                 int i;
2212                                 for (i = 0; i < dev->num_slaves; i++) {
2213                                         if (i == mlx4_master_func_num(dev))
2214                                                 continue;
2215                                         priv->mfunc.master.slave_state[i].ib_cap_mask[port] =
2216                                                 ib_port_default_caps;
2217                                 }
2218                         }
2219
2220                         if (mlx4_is_mfunc(dev))
2221                                 dev->caps.port_ib_mtu[port] = IB_MTU_2048;
2222                         else
2223                                 dev->caps.port_ib_mtu[port] = IB_MTU_4096;
2224
2225                         err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ?
2226                                             dev->caps.pkey_table_len[port] : -1);
2227                         if (err) {
2228                                 mlx4_err(dev, "Failed to set port %d, aborting\n",
2229                                          port);
2230                                 goto err_counters_table_free;
2231                         }
2232                 }
2233         }
2234
2235         return 0;
2236
2237 err_counters_table_free:
2238         mlx4_cleanup_counters_table(dev);
2239
2240 err_qp_table_free:
2241         mlx4_cleanup_qp_table(dev);
2242
2243 err_srq_table_free:
2244         mlx4_cleanup_srq_table(dev);
2245
2246 err_cq_table_free:
2247         mlx4_cleanup_cq_table(dev);
2248
2249 err_cmd_poll:
2250         mlx4_cmd_use_polling(dev);
2251
2252 err_eq_table_free:
2253         mlx4_cleanup_eq_table(dev);
2254
2255 err_mcg_table_free:
2256         if (!mlx4_is_slave(dev))
2257                 mlx4_cleanup_mcg_table(dev);
2258
2259 err_mr_table_free:
2260         mlx4_cleanup_mr_table(dev);
2261
2262 err_xrcd_table_free:
2263         mlx4_cleanup_xrcd_table(dev);
2264
2265 err_pd_table_free:
2266         mlx4_cleanup_pd_table(dev);
2267
2268 err_kar_unmap:
2269         iounmap(priv->kar);
2270
2271 err_uar_free:
2272         mlx4_uar_free(dev, &priv->driver_uar);
2273
2274 err_uar_table_free:
2275         mlx4_cleanup_uar_table(dev);
2276         return err;
2277 }
2278
2279 static void mlx4_enable_msi_x(struct mlx4_dev *dev)
2280 {
2281         struct mlx4_priv *priv = mlx4_priv(dev);
2282         struct msix_entry *entries;
2283         int i;
2284
2285         if (msi_x) {
2286                 int nreq = dev->caps.num_ports * num_online_cpus() + MSIX_LEGACY_SZ;
2287
2288                 nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
2289                              nreq);
2290
2291                 entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
2292                 if (!entries)
2293                         goto no_msi;
2294
2295                 for (i = 0; i < nreq; ++i)
2296                         entries[i].entry = i;
2297
2298                 nreq = pci_enable_msix_range(dev->persist->pdev, entries, 2,
2299                                              nreq);
2300
2301                 if (nreq < 0) {
2302                         kfree(entries);
2303                         goto no_msi;
2304                 } else if (nreq < MSIX_LEGACY_SZ +
2305                            dev->caps.num_ports * MIN_MSIX_P_PORT) {
2306                         /*Working in legacy mode , all EQ's shared*/
2307                         dev->caps.comp_pool           = 0;
2308                         dev->caps.num_comp_vectors = nreq - 1;
2309                 } else {
2310                         dev->caps.comp_pool           = nreq - MSIX_LEGACY_SZ;
2311                         dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1;
2312                 }
2313                 for (i = 0; i < nreq; ++i)
2314                         priv->eq_table.eq[i].irq = entries[i].vector;
2315
2316                 dev->flags |= MLX4_FLAG_MSI_X;
2317
2318                 kfree(entries);
2319                 return;
2320         }
2321
2322 no_msi:
2323         dev->caps.num_comp_vectors = 1;
2324         dev->caps.comp_pool        = 0;
2325
2326         for (i = 0; i < 2; ++i)
2327                 priv->eq_table.eq[i].irq = dev->persist->pdev->irq;
2328 }
2329
2330 static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
2331 {
2332         struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
2333         int err = 0;
2334
2335         info->dev = dev;
2336         info->port = port;
2337         if (!mlx4_is_slave(dev)) {
2338                 mlx4_init_mac_table(dev, &info->mac_table);
2339                 mlx4_init_vlan_table(dev, &info->vlan_table);
2340                 mlx4_init_roce_gid_table(dev, &info->gid_table);
2341                 info->base_qpn = mlx4_get_base_qpn(dev, port);
2342         }
2343
2344         sprintf(info->dev_name, "mlx4_port%d", port);
2345         info->port_attr.attr.name = info->dev_name;
2346         if (mlx4_is_mfunc(dev))
2347                 info->port_attr.attr.mode = S_IRUGO;
2348         else {
2349                 info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
2350                 info->port_attr.store     = set_port_type;
2351         }
2352         info->port_attr.show      = show_port_type;
2353         sysfs_attr_init(&info->port_attr.attr);
2354
2355         err = device_create_file(&dev->persist->pdev->dev, &info->port_attr);
2356         if (err) {
2357                 mlx4_err(dev, "Failed to create file for port %d\n", port);
2358                 info->port = -1;
2359         }
2360
2361         sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
2362         info->port_mtu_attr.attr.name = info->dev_mtu_name;
2363         if (mlx4_is_mfunc(dev))
2364                 info->port_mtu_attr.attr.mode = S_IRUGO;
2365         else {
2366                 info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR;
2367                 info->port_mtu_attr.store     = set_port_ib_mtu;
2368         }
2369         info->port_mtu_attr.show      = show_port_ib_mtu;
2370         sysfs_attr_init(&info->port_mtu_attr.attr);
2371
2372         err = device_create_file(&dev->persist->pdev->dev,
2373                                  &info->port_mtu_attr);
2374         if (err) {
2375                 mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
2376                 device_remove_file(&info->dev->persist->pdev->dev,
2377                                    &info->port_attr);
2378                 info->port = -1;
2379         }
2380
2381         return err;
2382 }
2383
2384 static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
2385 {
2386         if (info->port < 0)
2387                 return;
2388
2389         device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr);
2390         device_remove_file(&info->dev->persist->pdev->dev,
2391                            &info->port_mtu_attr);
2392 }
2393
2394 static int mlx4_init_steering(struct mlx4_dev *dev)
2395 {
2396         struct mlx4_priv *priv = mlx4_priv(dev);
2397         int num_entries = dev->caps.num_ports;
2398         int i, j;
2399
2400         priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL);
2401         if (!priv->steer)
2402                 return -ENOMEM;
2403
2404         for (i = 0; i < num_entries; i++)
2405                 for (j = 0; j < MLX4_NUM_STEERS; j++) {
2406                         INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]);
2407                         INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]);
2408                 }
2409         return 0;
2410 }
2411
2412 static void mlx4_clear_steering(struct mlx4_dev *dev)
2413 {
2414         struct mlx4_priv *priv = mlx4_priv(dev);
2415         struct mlx4_steer_index *entry, *tmp_entry;
2416         struct mlx4_promisc_qp *pqp, *tmp_pqp;
2417         int num_entries = dev->caps.num_ports;
2418         int i, j;
2419
2420         for (i = 0; i < num_entries; i++) {
2421                 for (j = 0; j < MLX4_NUM_STEERS; j++) {
2422                         list_for_each_entry_safe(pqp, tmp_pqp,
2423                                                  &priv->steer[i].promisc_qps[j],
2424                                                  list) {
2425                                 list_del(&pqp->list);
2426                                 kfree(pqp);
2427                         }
2428                         list_for_each_entry_safe(entry, tmp_entry,
2429                                                  &priv->steer[i].steer_entries[j],
2430                                                  list) {
2431                                 list_del(&entry->list);
2432                                 list_for_each_entry_safe(pqp, tmp_pqp,
2433                                                          &entry->duplicates,
2434                                                          list) {
2435                                         list_del(&pqp->list);
2436                                         kfree(pqp);
2437                                 }
2438                                 kfree(entry);
2439                         }
2440                 }
2441         }
2442         kfree(priv->steer);
2443 }
2444
2445 static int extended_func_num(struct pci_dev *pdev)
2446 {
2447         return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn);
2448 }
2449
2450 #define MLX4_OWNER_BASE 0x8069c
2451 #define MLX4_OWNER_SIZE 4
2452
2453 static int mlx4_get_ownership(struct mlx4_dev *dev)
2454 {
2455         void __iomem *owner;
2456         u32 ret;
2457
2458         if (pci_channel_offline(dev->persist->pdev))
2459                 return -EIO;
2460
2461         owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
2462                         MLX4_OWNER_BASE,
2463                         MLX4_OWNER_SIZE);
2464         if (!owner) {
2465                 mlx4_err(dev, "Failed to obtain ownership bit\n");
2466                 return -ENOMEM;
2467         }
2468
2469         ret = readl(owner);
2470         iounmap(owner);
2471         return (int) !!ret;
2472 }
2473
2474 static void mlx4_free_ownership(struct mlx4_dev *dev)
2475 {
2476         void __iomem *owner;
2477
2478         if (pci_channel_offline(dev->persist->pdev))
2479                 return;
2480
2481         owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
2482                         MLX4_OWNER_BASE,
2483                         MLX4_OWNER_SIZE);
2484         if (!owner) {
2485                 mlx4_err(dev, "Failed to obtain ownership bit\n");
2486                 return;
2487         }
2488         writel(0, owner);
2489         msleep(1000);
2490         iounmap(owner);
2491 }
2492
2493 #define SRIOV_VALID_STATE(flags) (!!((flags) & MLX4_FLAG_SRIOV) ==\
2494                                   !!((flags) & MLX4_FLAG_MASTER))
2495
2496 static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
2497                              u8 total_vfs, int existing_vfs)
2498 {
2499         u64 dev_flags = dev->flags;
2500         int err = 0;
2501
2502         atomic_inc(&pf_loading);
2503         if (dev->flags &  MLX4_FLAG_SRIOV) {
2504                 if (existing_vfs != total_vfs) {
2505                         mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n",
2506                                  existing_vfs, total_vfs);
2507                         total_vfs = existing_vfs;
2508                 }
2509         }
2510
2511         dev->dev_vfs = kzalloc(total_vfs * sizeof(*dev->dev_vfs), GFP_KERNEL);
2512         if (NULL == dev->dev_vfs) {
2513                 mlx4_err(dev, "Failed to allocate memory for VFs\n");
2514                 goto disable_sriov;
2515         }
2516
2517         if (!(dev->flags &  MLX4_FLAG_SRIOV)) {
2518                 mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs);
2519                 err = pci_enable_sriov(pdev, total_vfs);
2520         }
2521         if (err) {
2522                 mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n",
2523                          err);
2524                 goto disable_sriov;
2525         } else {
2526                 mlx4_warn(dev, "Running in master mode\n");
2527                 dev_flags |= MLX4_FLAG_SRIOV |
2528                         MLX4_FLAG_MASTER;
2529                 dev_flags &= ~MLX4_FLAG_SLAVE;
2530                 dev->persist->num_vfs = total_vfs;
2531         }
2532         return dev_flags;
2533
2534 disable_sriov:
2535         atomic_dec(&pf_loading);
2536         dev->persist->num_vfs = 0;
2537         kfree(dev->dev_vfs);
2538         return dev_flags & ~MLX4_FLAG_MASTER;
2539 }
2540
2541 enum {
2542         MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64 = -1,
2543 };
2544
2545 static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
2546                               int *nvfs)
2547 {
2548         int requested_vfs = nvfs[0] + nvfs[1] + nvfs[2];
2549         /* Checking for 64 VFs as a limitation of CX2 */
2550         if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_80_VFS) &&
2551             requested_vfs >= 64) {
2552                 mlx4_err(dev, "Requested %d VFs, but FW does not support more than 64\n",
2553                          requested_vfs);
2554                 return MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64;
2555         }
2556         return 0;
2557 }
2558
2559 static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
2560                          int total_vfs, int *nvfs, struct mlx4_priv *priv)
2561 {
2562         struct mlx4_dev *dev;
2563         unsigned sum = 0;
2564         int err;
2565         int port;
2566         int i;
2567         struct mlx4_dev_cap *dev_cap = NULL;
2568         int existing_vfs = 0;
2569
2570         dev = &priv->dev;
2571
2572         INIT_LIST_HEAD(&priv->ctx_list);
2573         spin_lock_init(&priv->ctx_lock);
2574
2575         mutex_init(&priv->port_mutex);
2576
2577         INIT_LIST_HEAD(&priv->pgdir_list);
2578         mutex_init(&priv->pgdir_mutex);
2579
2580         INIT_LIST_HEAD(&priv->bf_list);
2581         mutex_init(&priv->bf_mutex);
2582
2583         dev->rev_id = pdev->revision;
2584         dev->numa_node = dev_to_node(&pdev->dev);
2585
2586         /* Detect if this device is a virtual function */
2587         if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
2588                 mlx4_warn(dev, "Detected virtual function - running in slave mode\n");
2589                 dev->flags |= MLX4_FLAG_SLAVE;
2590         } else {
2591                 /* We reset the device and enable SRIOV only for physical
2592                  * devices.  Try to claim ownership on the device;
2593                  * if already taken, skip -- do not allow multiple PFs */
2594                 err = mlx4_get_ownership(dev);
2595                 if (err) {
2596                         if (err < 0)
2597                                 return err;
2598                         else {
2599                                 mlx4_warn(dev, "Multiple PFs not yet supported - Skipping PF\n");
2600                                 return -EINVAL;
2601                         }
2602                 }
2603
2604                 atomic_set(&priv->opreq_count, 0);
2605                 INIT_WORK(&priv->opreq_task, mlx4_opreq_action);
2606
2607                 /*
2608                  * Now reset the HCA before we touch the PCI capabilities or
2609                  * attempt a firmware command, since a boot ROM may have left
2610                  * the HCA in an undefined state.
2611                  */
2612                 err = mlx4_reset(dev);
2613                 if (err) {
2614                         mlx4_err(dev, "Failed to reset HCA, aborting\n");
2615                         goto err_sriov;
2616                 }
2617
2618                 if (total_vfs) {
2619                         dev->flags = MLX4_FLAG_MASTER;
2620                         existing_vfs = pci_num_vf(pdev);
2621                         if (existing_vfs)
2622                                 dev->flags |= MLX4_FLAG_SRIOV;
2623                         dev->persist->num_vfs = total_vfs;
2624                 }
2625         }
2626
2627         /* on load remove any previous indication of internal error,
2628          * device is up.
2629          */
2630         dev->persist->state = MLX4_DEVICE_STATE_UP;
2631
2632 slave_start:
2633         err = mlx4_cmd_init(dev);
2634         if (err) {
2635                 mlx4_err(dev, "Failed to init command interface, aborting\n");
2636                 goto err_sriov;
2637         }
2638
2639         /* In slave functions, the communication channel must be initialized
2640          * before posting commands. Also, init num_slaves before calling
2641          * mlx4_init_hca */
2642         if (mlx4_is_mfunc(dev)) {
2643                 if (mlx4_is_master(dev)) {
2644                         dev->num_slaves = MLX4_MAX_NUM_SLAVES;
2645
2646                 } else {
2647                         dev->num_slaves = 0;
2648                         err = mlx4_multi_func_init(dev);
2649                         if (err) {
2650                                 mlx4_err(dev, "Failed to init slave mfunc interface, aborting\n");
2651                                 goto err_cmd;
2652                         }
2653                 }
2654         }
2655
2656         err = mlx4_init_fw(dev);
2657         if (err) {
2658                 mlx4_err(dev, "Failed to init fw, aborting.\n");
2659                 goto err_mfunc;
2660         }
2661
2662         if (mlx4_is_master(dev)) {
2663                 /* when we hit the goto slave_start below, dev_cap already initialized */
2664                 if (!dev_cap) {
2665                         dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
2666
2667                         if (!dev_cap) {
2668                                 err = -ENOMEM;
2669                                 goto err_fw;
2670                         }
2671
2672                         err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
2673                         if (err) {
2674                                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
2675                                 goto err_fw;
2676                         }
2677
2678                         if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
2679                                 goto err_fw;
2680
2681                         if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
2682                                 u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs,
2683                                                                   existing_vfs);
2684
2685                                 mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
2686                                 dev->flags = dev_flags;
2687                                 if (!SRIOV_VALID_STATE(dev->flags)) {
2688                                         mlx4_err(dev, "Invalid SRIOV state\n");
2689                                         goto err_sriov;
2690                                 }
2691                                 err = mlx4_reset(dev);
2692                                 if (err) {
2693                                         mlx4_err(dev, "Failed to reset HCA, aborting.\n");
2694                                         goto err_sriov;
2695                                 }
2696                                 goto slave_start;
2697                         }
2698                 } else {
2699                         /* Legacy mode FW requires SRIOV to be enabled before
2700                          * doing QUERY_DEV_CAP, since max_eq's value is different if
2701                          * SRIOV is enabled.
2702                          */
2703                         memset(dev_cap, 0, sizeof(*dev_cap));
2704                         err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
2705                         if (err) {
2706                                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
2707                                 goto err_fw;
2708                         }
2709
2710                         if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
2711                                 goto err_fw;
2712                 }
2713         }
2714
2715         err = mlx4_init_hca(dev);
2716         if (err) {
2717                 if (err == -EACCES) {
2718                         /* Not primary Physical function
2719                          * Running in slave mode */
2720                         mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
2721                         /* We're not a PF */
2722                         if (dev->flags & MLX4_FLAG_SRIOV) {
2723                                 if (!existing_vfs)
2724                                         pci_disable_sriov(pdev);
2725                                 if (mlx4_is_master(dev))
2726                                         atomic_dec(&pf_loading);
2727                                 dev->flags &= ~MLX4_FLAG_SRIOV;
2728                         }
2729                         if (!mlx4_is_slave(dev))
2730                                 mlx4_free_ownership(dev);
2731                         dev->flags |= MLX4_FLAG_SLAVE;
2732                         dev->flags &= ~MLX4_FLAG_MASTER;
2733                         goto slave_start;
2734                 } else
2735                         goto err_fw;
2736         }
2737
2738         if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
2739                 u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, existing_vfs);
2740
2741                 if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) {
2742                         mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR);
2743                         dev->flags = dev_flags;
2744                         err = mlx4_cmd_init(dev);
2745                         if (err) {
2746                                 /* Only VHCR is cleaned up, so could still
2747                                  * send FW commands
2748                                  */
2749                                 mlx4_err(dev, "Failed to init VHCR command interface, aborting\n");
2750                                 goto err_close;
2751                         }
2752                 } else {
2753                         dev->flags = dev_flags;
2754                 }
2755
2756                 if (!SRIOV_VALID_STATE(dev->flags)) {
2757                         mlx4_err(dev, "Invalid SRIOV state\n");
2758                         goto err_close;
2759                 }
2760         }
2761
2762         /* check if the device is functioning at its maximum possible speed.
2763          * No return code for this call, just warn the user in case of PCI
2764          * express device capabilities are under-satisfied by the bus.
2765          */
2766         if (!mlx4_is_slave(dev))
2767                 mlx4_check_pcie_caps(dev);
2768
2769         /* In master functions, the communication channel must be initialized
2770          * after obtaining its address from fw */
2771         if (mlx4_is_master(dev)) {
2772                 int ib_ports = 0;
2773
2774                 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
2775                         ib_ports++;
2776
2777                 if (ib_ports &&
2778                     (num_vfs_argc > 1 || probe_vfs_argc > 1)) {
2779                         mlx4_err(dev,
2780                                  "Invalid syntax of num_vfs/probe_vfs with IB port - single port VFs syntax is only supported when all ports are configured as ethernet\n");
2781                         err = -EINVAL;
2782                         goto err_close;
2783                 }
2784                 if (dev->caps.num_ports < 2 &&
2785                     num_vfs_argc > 1) {
2786                         err = -EINVAL;
2787                         mlx4_err(dev,
2788                                  "Error: Trying to configure VFs on port 2, but HCA has only %d physical ports\n",
2789                                  dev->caps.num_ports);
2790                         goto err_close;
2791                 }
2792                 memcpy(dev->persist->nvfs, nvfs, sizeof(dev->persist->nvfs));
2793
2794                 for (i = 0;
2795                      i < sizeof(dev->persist->nvfs)/
2796                      sizeof(dev->persist->nvfs[0]); i++) {
2797                         unsigned j;
2798
2799                         for (j = 0; j < dev->persist->nvfs[i]; ++sum, ++j) {
2800                                 dev->dev_vfs[sum].min_port = i < 2 ? i + 1 : 1;
2801                                 dev->dev_vfs[sum].n_ports = i < 2 ? 1 :
2802                                         dev->caps.num_ports;
2803                         }
2804                 }
2805
2806                 /* In master functions, the communication channel
2807                  * must be initialized after obtaining its address from fw
2808                  */
2809                 err = mlx4_multi_func_init(dev);
2810                 if (err) {
2811                         mlx4_err(dev, "Failed to init master mfunc interface, aborting.\n");
2812                         goto err_close;
2813                 }
2814         }
2815
2816         err = mlx4_alloc_eq_table(dev);
2817         if (err)
2818                 goto err_master_mfunc;
2819
2820         priv->msix_ctl.pool_bm = 0;
2821         mutex_init(&priv->msix_ctl.pool_lock);
2822
2823         mlx4_enable_msi_x(dev);
2824         if ((mlx4_is_mfunc(dev)) &&
2825             !(dev->flags & MLX4_FLAG_MSI_X)) {
2826                 err = -ENOSYS;
2827                 mlx4_err(dev, "INTx is not supported in multi-function mode, aborting\n");
2828                 goto err_free_eq;
2829         }
2830
2831         if (!mlx4_is_slave(dev)) {
2832                 err = mlx4_init_steering(dev);
2833                 if (err)
2834                         goto err_disable_msix;
2835         }
2836
2837         err = mlx4_setup_hca(dev);
2838         if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) &&
2839             !mlx4_is_mfunc(dev)) {
2840                 dev->flags &= ~MLX4_FLAG_MSI_X;
2841                 dev->caps.num_comp_vectors = 1;
2842                 dev->caps.comp_pool        = 0;
2843                 pci_disable_msix(pdev);
2844                 err = mlx4_setup_hca(dev);
2845         }
2846
2847         if (err)
2848                 goto err_steer;
2849
2850         mlx4_init_quotas(dev);
2851
2852         for (port = 1; port <= dev->caps.num_ports; port++) {
2853                 err = mlx4_init_port_info(dev, port);
2854                 if (err)
2855                         goto err_port;
2856         }
2857
2858         err = mlx4_register_device(dev);
2859         if (err)
2860                 goto err_port;
2861
2862         mlx4_request_modules(dev);
2863
2864         mlx4_sense_init(dev);
2865         mlx4_start_sense(dev);
2866
2867         priv->removed = 0;
2868
2869         if (mlx4_is_master(dev) && dev->persist->num_vfs)
2870                 atomic_dec(&pf_loading);
2871
2872         kfree(dev_cap);
2873         return 0;
2874
2875 err_port:
2876         for (--port; port >= 1; --port)
2877                 mlx4_cleanup_port_info(&priv->port[port]);
2878
2879         mlx4_cleanup_counters_table(dev);
2880         mlx4_cleanup_qp_table(dev);
2881         mlx4_cleanup_srq_table(dev);
2882         mlx4_cleanup_cq_table(dev);
2883         mlx4_cmd_use_polling(dev);
2884         mlx4_cleanup_eq_table(dev);
2885         mlx4_cleanup_mcg_table(dev);
2886         mlx4_cleanup_mr_table(dev);
2887         mlx4_cleanup_xrcd_table(dev);
2888         mlx4_cleanup_pd_table(dev);
2889         mlx4_cleanup_uar_table(dev);
2890
2891 err_steer:
2892         if (!mlx4_is_slave(dev))
2893                 mlx4_clear_steering(dev);
2894
2895 err_disable_msix:
2896         if (dev->flags & MLX4_FLAG_MSI_X)
2897                 pci_disable_msix(pdev);
2898
2899 err_free_eq:
2900         mlx4_free_eq_table(dev);
2901
2902 err_master_mfunc:
2903         if (mlx4_is_master(dev))
2904                 mlx4_multi_func_cleanup(dev);
2905
2906         if (mlx4_is_slave(dev)) {
2907                 kfree(dev->caps.qp0_qkey);
2908                 kfree(dev->caps.qp0_tunnel);
2909                 kfree(dev->caps.qp0_proxy);
2910                 kfree(dev->caps.qp1_tunnel);
2911                 kfree(dev->caps.qp1_proxy);
2912         }
2913
2914 err_close:
2915         mlx4_close_hca(dev);
2916
2917 err_fw:
2918         mlx4_close_fw(dev);
2919
2920 err_mfunc:
2921         if (mlx4_is_slave(dev))
2922                 mlx4_multi_func_cleanup(dev);
2923
2924 err_cmd:
2925         mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
2926
2927 err_sriov:
2928         if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs)
2929                 pci_disable_sriov(pdev);
2930
2931         if (mlx4_is_master(dev) && dev->persist->num_vfs)
2932                 atomic_dec(&pf_loading);
2933
2934         kfree(priv->dev.dev_vfs);
2935
2936         if (!mlx4_is_slave(dev))
2937                 mlx4_free_ownership(dev);
2938
2939         kfree(dev_cap);
2940         return err;
2941 }
2942
2943 static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
2944                            struct mlx4_priv *priv)
2945 {
2946         int err;
2947         int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
2948         int prb_vf[MLX4_MAX_PORTS + 1] = {0, 0, 0};
2949         const int param_map[MLX4_MAX_PORTS + 1][MLX4_MAX_PORTS + 1] = {
2950                 {2, 0, 0}, {0, 1, 2}, {0, 1, 2} };
2951         unsigned total_vfs = 0;
2952         unsigned int i;
2953
2954         pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
2955
2956         err = pci_enable_device(pdev);
2957         if (err) {
2958                 dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
2959                 return err;
2960         }
2961
2962         /* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS
2963          * per port, we must limit the number of VFs to 63 (since their are
2964          * 128 MACs)
2965          */
2966         for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && i < num_vfs_argc;
2967              total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) {
2968                 nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i];
2969                 if (nvfs[i] < 0) {
2970                         dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n");
2971                         err = -EINVAL;
2972                         goto err_disable_pdev;
2973                 }
2974         }
2975         for (i = 0; i < sizeof(prb_vf)/sizeof(prb_vf[0]) && i < probe_vfs_argc;
2976              i++) {
2977                 prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i];
2978                 if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) {
2979                         dev_err(&pdev->dev, "probe_vf module parameter cannot be negative or greater than num_vfs\n");
2980                         err = -EINVAL;
2981                         goto err_disable_pdev;
2982                 }
2983         }
2984         if (total_vfs >= MLX4_MAX_NUM_VF) {
2985                 dev_err(&pdev->dev,
2986                         "Requested more VF's (%d) than allowed (%d)\n",
2987                         total_vfs, MLX4_MAX_NUM_VF - 1);
2988                 err = -EINVAL;
2989                 goto err_disable_pdev;
2990         }
2991
2992         for (i = 0; i < MLX4_MAX_PORTS; i++) {
2993                 if (nvfs[i] + nvfs[2] >= MLX4_MAX_NUM_VF_P_PORT) {
2994                         dev_err(&pdev->dev,
2995                                 "Requested more VF's (%d) for port (%d) than allowed (%d)\n",
2996                                 nvfs[i] + nvfs[2], i + 1,
2997                                 MLX4_MAX_NUM_VF_P_PORT - 1);
2998                         err = -EINVAL;
2999                         goto err_disable_pdev;
3000                 }
3001         }
3002
3003         /* Check for BARs. */
3004         if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
3005             !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
3006                 dev_err(&pdev->dev, "Missing DCS, aborting (driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n",
3007                         pci_dev_data, pci_resource_flags(pdev, 0));
3008                 err = -ENODEV;
3009                 goto err_disable_pdev;
3010         }
3011         if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
3012                 dev_err(&pdev->dev, "Missing UAR, aborting\n");
3013                 err = -ENODEV;
3014                 goto err_disable_pdev;
3015         }
3016
3017         err = pci_request_regions(pdev, DRV_NAME);
3018         if (err) {
3019                 dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
3020                 goto err_disable_pdev;
3021         }
3022
3023         pci_set_master(pdev);
3024
3025         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
3026         if (err) {
3027                 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
3028                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
3029                 if (err) {
3030                         dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
3031                         goto err_release_regions;
3032                 }
3033         }
3034         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
3035         if (err) {
3036                 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
3037                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
3038                 if (err) {
3039                         dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, aborting\n");
3040                         goto err_release_regions;
3041                 }
3042         }
3043
3044         /* Allow large DMA segments, up to the firmware limit of 1 GB */
3045         dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
3046         /* Detect if this device is a virtual function */
3047         if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
3048                 /* When acting as pf, we normally skip vfs unless explicitly
3049                  * requested to probe them.
3050                  */
3051                 if (total_vfs) {
3052                         unsigned vfs_offset = 0;
3053
3054                         for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) &&
3055                              vfs_offset + nvfs[i] < extended_func_num(pdev);
3056                              vfs_offset += nvfs[i], i++)
3057                                 ;
3058                         if (i == sizeof(nvfs)/sizeof(nvfs[0])) {
3059                                 err = -ENODEV;
3060                                 goto err_release_regions;
3061                         }
3062                         if ((extended_func_num(pdev) - vfs_offset)
3063                             > prb_vf[i]) {
3064                                 dev_warn(&pdev->dev, "Skipping virtual function:%d\n",
3065                                          extended_func_num(pdev));
3066                                 err = -ENODEV;
3067                                 goto err_release_regions;
3068                         }
3069                 }
3070         }
3071
3072         err = mlx4_catas_init(&priv->dev);
3073         if (err)
3074                 goto err_release_regions;
3075
3076         err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv);
3077         if (err)
3078                 goto err_catas;
3079
3080         return 0;
3081
3082 err_catas:
3083         mlx4_catas_end(&priv->dev);
3084
3085 err_release_regions:
3086         pci_release_regions(pdev);
3087
3088 err_disable_pdev:
3089         pci_disable_device(pdev);
3090         pci_set_drvdata(pdev, NULL);
3091         return err;
3092 }
3093
3094 static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
3095 {
3096         struct mlx4_priv *priv;
3097         struct mlx4_dev *dev;
3098         int ret;
3099
3100         printk_once(KERN_INFO "%s", mlx4_version);
3101
3102         priv = kzalloc(sizeof(*priv), GFP_KERNEL);
3103         if (!priv)
3104                 return -ENOMEM;
3105
3106         dev       = &priv->dev;
3107         dev->persist = kzalloc(sizeof(*dev->persist), GFP_KERNEL);
3108         if (!dev->persist) {
3109                 kfree(priv);
3110                 return -ENOMEM;
3111         }
3112         dev->persist->pdev = pdev;
3113         dev->persist->dev = dev;
3114         pci_set_drvdata(pdev, dev->persist);
3115         priv->pci_dev_data = id->driver_data;
3116         mutex_init(&dev->persist->device_state_mutex);
3117
3118         ret =  __mlx4_init_one(pdev, id->driver_data, priv);
3119         if (ret) {
3120                 kfree(dev->persist);
3121                 kfree(priv);
3122         }
3123         return ret;
3124 }
3125
3126 static void mlx4_clean_dev(struct mlx4_dev *dev)
3127 {
3128         struct mlx4_dev_persistent *persist = dev->persist;
3129         struct mlx4_priv *priv = mlx4_priv(dev);
3130
3131         memset(priv, 0, sizeof(*priv));
3132         priv->dev.persist = persist;
3133 }
3134
3135 static void mlx4_unload_one(struct pci_dev *pdev)
3136 {
3137         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3138         struct mlx4_dev  *dev  = persist->dev;
3139         struct mlx4_priv *priv = mlx4_priv(dev);
3140         int               pci_dev_data;
3141         int p, i;
3142         int active_vfs = 0;
3143
3144         if (priv->removed)
3145                 return;
3146
3147         /* saving current ports type for further use */
3148         for (i = 0; i < dev->caps.num_ports; i++) {
3149                 dev->persist->curr_port_type[i] = dev->caps.port_type[i + 1];
3150                 dev->persist->curr_port_poss_type[i] = dev->caps.
3151                                                        possible_type[i + 1];
3152         }
3153
3154         pci_dev_data = priv->pci_dev_data;
3155
3156         /* Disabling SR-IOV is not allowed while there are active vf's */
3157         if (mlx4_is_master(dev)) {
3158                 active_vfs = mlx4_how_many_lives_vf(dev);
3159                 if (active_vfs) {
3160                         pr_warn("Removing PF when there are active VF's !!\n");
3161                         pr_warn("Will not disable SR-IOV.\n");
3162                 }
3163         }
3164         mlx4_stop_sense(dev);
3165         mlx4_unregister_device(dev);
3166
3167         for (p = 1; p <= dev->caps.num_ports; p++) {
3168                 mlx4_cleanup_port_info(&priv->port[p]);
3169                 mlx4_CLOSE_PORT(dev, p);
3170         }
3171
3172         if (mlx4_is_master(dev))
3173                 mlx4_free_resource_tracker(dev,
3174                                            RES_TR_FREE_SLAVES_ONLY);
3175
3176         mlx4_cleanup_counters_table(dev);
3177         mlx4_cleanup_qp_table(dev);
3178         mlx4_cleanup_srq_table(dev);
3179         mlx4_cleanup_cq_table(dev);
3180         mlx4_cmd_use_polling(dev);
3181         mlx4_cleanup_eq_table(dev);
3182         mlx4_cleanup_mcg_table(dev);
3183         mlx4_cleanup_mr_table(dev);
3184         mlx4_cleanup_xrcd_table(dev);
3185         mlx4_cleanup_pd_table(dev);
3186
3187         if (mlx4_is_master(dev))
3188                 mlx4_free_resource_tracker(dev,
3189                                            RES_TR_FREE_STRUCTS_ONLY);
3190
3191         iounmap(priv->kar);
3192         mlx4_uar_free(dev, &priv->driver_uar);
3193         mlx4_cleanup_uar_table(dev);
3194         if (!mlx4_is_slave(dev))
3195                 mlx4_clear_steering(dev);
3196         mlx4_free_eq_table(dev);
3197         if (mlx4_is_master(dev))
3198                 mlx4_multi_func_cleanup(dev);
3199         mlx4_close_hca(dev);
3200         mlx4_close_fw(dev);
3201         if (mlx4_is_slave(dev))
3202                 mlx4_multi_func_cleanup(dev);
3203         mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3204
3205         if (dev->flags & MLX4_FLAG_MSI_X)
3206                 pci_disable_msix(pdev);
3207         if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
3208                 mlx4_warn(dev, "Disabling SR-IOV\n");
3209                 pci_disable_sriov(pdev);
3210                 dev->flags &= ~MLX4_FLAG_SRIOV;
3211                 dev->persist->num_vfs = 0;
3212         }
3213
3214         if (!mlx4_is_slave(dev))
3215                 mlx4_free_ownership(dev);
3216
3217         kfree(dev->caps.qp0_qkey);
3218         kfree(dev->caps.qp0_tunnel);
3219         kfree(dev->caps.qp0_proxy);
3220         kfree(dev->caps.qp1_tunnel);
3221         kfree(dev->caps.qp1_proxy);
3222         kfree(dev->dev_vfs);
3223
3224         mlx4_clean_dev(dev);
3225         priv->pci_dev_data = pci_dev_data;
3226         priv->removed = 1;
3227 }
3228
3229 static void mlx4_remove_one(struct pci_dev *pdev)
3230 {
3231         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3232         struct mlx4_dev  *dev  = persist->dev;
3233         struct mlx4_priv *priv = mlx4_priv(dev);
3234
3235         mlx4_unload_one(pdev);
3236         mlx4_catas_end(dev);
3237         pci_release_regions(pdev);
3238         pci_disable_device(pdev);
3239         kfree(dev->persist);
3240         kfree(priv);
3241         pci_set_drvdata(pdev, NULL);
3242 }
3243
3244 static int restore_current_port_types(struct mlx4_dev *dev,
3245                                       enum mlx4_port_type *types,
3246                                       enum mlx4_port_type *poss_types)
3247 {
3248         struct mlx4_priv *priv = mlx4_priv(dev);
3249         int err, i;
3250
3251         mlx4_stop_sense(dev);
3252
3253         mutex_lock(&priv->port_mutex);
3254         for (i = 0; i < dev->caps.num_ports; i++)
3255                 dev->caps.possible_type[i + 1] = poss_types[i];
3256         err = mlx4_change_port_types(dev, types);
3257         mlx4_start_sense(dev);
3258         mutex_unlock(&priv->port_mutex);
3259
3260         return err;
3261 }
3262
3263 int mlx4_restart_one(struct pci_dev *pdev)
3264 {
3265         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3266         struct mlx4_dev  *dev  = persist->dev;
3267         struct mlx4_priv *priv = mlx4_priv(dev);
3268         int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
3269         int pci_dev_data, err, total_vfs;
3270
3271         pci_dev_data = priv->pci_dev_data;
3272         total_vfs = dev->persist->num_vfs;
3273         memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
3274
3275         mlx4_unload_one(pdev);
3276         err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv);
3277         if (err) {
3278                 mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n",
3279                          __func__, pci_name(pdev), err);
3280                 return err;
3281         }
3282
3283         err = restore_current_port_types(dev, dev->persist->curr_port_type,
3284                                          dev->persist->curr_port_poss_type);
3285         if (err)
3286                 mlx4_err(dev, "could not restore original port types (%d)\n",
3287                          err);
3288
3289         return err;
3290 }
3291
3292 static const struct pci_device_id mlx4_pci_table[] = {
3293         /* MT25408 "Hermon" SDR */
3294         { PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3295         /* MT25408 "Hermon" DDR */
3296         { PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3297         /* MT25408 "Hermon" QDR */
3298         { PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3299         /* MT25408 "Hermon" DDR PCIe gen2 */
3300         { PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3301         /* MT25408 "Hermon" QDR PCIe gen2 */
3302         { PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3303         /* MT25408 "Hermon" EN 10GigE */
3304         { PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3305         /* MT25408 "Hermon" EN 10GigE PCIe gen2 */
3306         { PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3307         /* MT25458 ConnectX EN 10GBASE-T 10GigE */
3308         { PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3309         /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
3310         { PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3311         /* MT26468 ConnectX EN 10GigE PCIe gen2*/
3312         { PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3313         /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */
3314         { PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3315         /* MT26478 ConnectX2 40GigE PCIe gen2 */
3316         { PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3317         /* MT25400 Family [ConnectX-2 Virtual Function] */
3318         { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF },
3319         /* MT27500 Family [ConnectX-3] */
3320         { PCI_VDEVICE(MELLANOX, 0x1003), 0 },
3321         /* MT27500 Family [ConnectX-3 Virtual Function] */
3322         { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF },
3323         { PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */
3324         { PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */
3325         { PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */
3326         { PCI_VDEVICE(MELLANOX, 0x1008), 0 }, /* MT27521 Family */
3327         { PCI_VDEVICE(MELLANOX, 0x1009), 0 }, /* MT27530 Family */
3328         { PCI_VDEVICE(MELLANOX, 0x100a), 0 }, /* MT27531 Family */
3329         { PCI_VDEVICE(MELLANOX, 0x100b), 0 }, /* MT27540 Family */
3330         { PCI_VDEVICE(MELLANOX, 0x100c), 0 }, /* MT27541 Family */
3331         { PCI_VDEVICE(MELLANOX, 0x100d), 0 }, /* MT27550 Family */
3332         { PCI_VDEVICE(MELLANOX, 0x100e), 0 }, /* MT27551 Family */
3333         { PCI_VDEVICE(MELLANOX, 0x100f), 0 }, /* MT27560 Family */
3334         { PCI_VDEVICE(MELLANOX, 0x1010), 0 }, /* MT27561 Family */
3335         { 0, }
3336 };
3337
3338 MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
3339
3340 static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
3341                                               pci_channel_state_t state)
3342 {
3343         mlx4_unload_one(pdev);
3344
3345         return state == pci_channel_io_perm_failure ?
3346                 PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
3347 }
3348
3349 static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
3350 {
3351         struct mlx4_dev  *dev  = pci_get_drvdata(pdev);
3352         struct mlx4_priv *priv = mlx4_priv(dev);
3353         int               ret;
3354
3355         ret = __mlx4_init_one(pdev, priv->pci_dev_data, priv);
3356
3357         return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
3358 }
3359
3360 static const struct pci_error_handlers mlx4_err_handler = {
3361         .error_detected = mlx4_pci_err_detected,
3362         .slot_reset     = mlx4_pci_slot_reset,
3363 };
3364
3365 static struct pci_driver mlx4_driver = {
3366         .name           = DRV_NAME,
3367         .id_table       = mlx4_pci_table,
3368         .probe          = mlx4_init_one,
3369         .shutdown       = mlx4_unload_one,
3370         .remove         = mlx4_remove_one,
3371         .err_handler    = &mlx4_err_handler,
3372 };
3373
3374 static int __init mlx4_verify_params(void)
3375 {
3376         if ((log_num_mac < 0) || (log_num_mac > 7)) {
3377                 pr_warn("mlx4_core: bad num_mac: %d\n", log_num_mac);
3378                 return -1;
3379         }
3380
3381         if (log_num_vlan != 0)
3382                 pr_warn("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
3383                         MLX4_LOG_NUM_VLANS);
3384
3385         if (use_prio != 0)
3386                 pr_warn("mlx4_core: use_prio - obsolete module param, ignored\n");
3387
3388         if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) {
3389                 pr_warn("mlx4_core: bad log_mtts_per_seg: %d\n",
3390                         log_mtts_per_seg);
3391                 return -1;
3392         }
3393
3394         /* Check if module param for ports type has legal combination */
3395         if (port_type_array[0] == false && port_type_array[1] == true) {
3396                 pr_warn("Module parameter configuration ETH/IB is not supported. Switching to default configuration IB/IB\n");
3397                 port_type_array[0] = true;
3398         }
3399
3400         if (mlx4_log_num_mgm_entry_size < -7 ||
3401             (mlx4_log_num_mgm_entry_size > 0 &&
3402              (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE ||
3403               mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE))) {
3404                 pr_warn("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not in legal range (-7..0 or %d..%d)\n",
3405                         mlx4_log_num_mgm_entry_size,
3406                         MLX4_MIN_MGM_LOG_ENTRY_SIZE,
3407                         MLX4_MAX_MGM_LOG_ENTRY_SIZE);
3408                 return -1;
3409         }
3410
3411         return 0;
3412 }
3413
3414 static int __init mlx4_init(void)
3415 {
3416         int ret;
3417
3418         if (mlx4_verify_params())
3419                 return -EINVAL;
3420
3421
3422         mlx4_wq = create_singlethread_workqueue("mlx4");
3423         if (!mlx4_wq)
3424                 return -ENOMEM;
3425
3426         ret = pci_register_driver(&mlx4_driver);
3427         if (ret < 0)
3428                 destroy_workqueue(mlx4_wq);
3429         return ret < 0 ? ret : 0;
3430 }
3431
3432 static void __exit mlx4_cleanup(void)
3433 {
3434         pci_unregister_driver(&mlx4_driver);
3435         destroy_workqueue(mlx4_wq);
3436 }
3437
3438 module_init(mlx4_init);
3439 module_exit(mlx4_cleanup);