Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec...
[cascardo/linux.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58         { 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 struct workqueue_struct *be_wq;
64
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67         "CEV",
68         "CTX",
69         "DBUF",
70         "ERX",
71         "Host",
72         "MPU",
73         "NDMA",
74         "PTC ",
75         "RDMA ",
76         "RXF ",
77         "RXIPS ",
78         "RXULP0 ",
79         "RXULP1 ",
80         "RXULP2 ",
81         "TIM ",
82         "TPOST ",
83         "TPRE ",
84         "TXIPS ",
85         "TXULP0 ",
86         "TXULP1 ",
87         "UC ",
88         "WDMA ",
89         "TXULP2 ",
90         "HOST1 ",
91         "P0_OB_LINK ",
92         "P1_OB_LINK ",
93         "HOST_GPIO ",
94         "MBOX ",
95         "ERX2 ",
96         "SPARE ",
97         "JTAG ",
98         "MPU_INTPEND "
99 };
100
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103         "LPCMEMHOST",
104         "MGMT_MAC",
105         "PCS0ONLINE",
106         "MPU_IRAM",
107         "PCS1ONLINE",
108         "PCTL0",
109         "PCTL1",
110         "PMEM",
111         "RR",
112         "TXPB",
113         "RXPP",
114         "XAUI",
115         "TXP",
116         "ARM",
117         "IPC",
118         "HOST2",
119         "HOST3",
120         "HOST4",
121         "HOST5",
122         "HOST6",
123         "HOST7",
124         "ECRC",
125         "Poison TLP",
126         "NETC",
127         "PERIPH",
128         "LLTXULP",
129         "D2P",
130         "RCON",
131         "LDMA",
132         "LLTXP",
133         "LLTXPB",
134         "Unknown"
135 };
136
137 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
138                                  BE_IF_FLAGS_BROADCAST | \
139                                  BE_IF_FLAGS_MULTICAST | \
140                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
141
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144         struct be_dma_mem *mem = &q->dma_mem;
145
146         if (mem->va) {
147                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148                                   mem->dma);
149                 mem->va = NULL;
150         }
151 }
152
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154                           u16 len, u16 entry_size)
155 {
156         struct be_dma_mem *mem = &q->dma_mem;
157
158         memset(q, 0, sizeof(*q));
159         q->len = len;
160         q->entry_size = entry_size;
161         mem->size = len * entry_size;
162         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163                                       GFP_KERNEL);
164         if (!mem->va)
165                 return -ENOMEM;
166         return 0;
167 }
168
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171         u32 reg, enabled;
172
173         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174                               &reg);
175         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176
177         if (!enabled && enable)
178                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179         else if (enabled && !enable)
180                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181         else
182                 return;
183
184         pci_write_config_dword(adapter->pdev,
185                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190         int status = 0;
191
192         /* On lancer interrupts can't be controlled via this register */
193         if (lancer_chip(adapter))
194                 return;
195
196         if (be_check_error(adapter, BE_ERROR_EEH))
197                 return;
198
199         status = be_cmd_intr_set(adapter, enable);
200         if (status)
201                 be_reg_intr_set(adapter, enable);
202 }
203
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206         u32 val = 0;
207
208         if (be_check_error(adapter, BE_ERROR_HW))
209                 return;
210
211         val |= qid & DB_RQ_RING_ID_MASK;
212         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213
214         wmb();
215         iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219                           u16 posted)
220 {
221         u32 val = 0;
222
223         if (be_check_error(adapter, BE_ERROR_HW))
224                 return;
225
226         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228
229         wmb();
230         iowrite32(val, adapter->db + txo->db_offset);
231 }
232
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234                          bool arm, bool clear_int, u16 num_popped,
235                          u32 eq_delay_mult_enc)
236 {
237         u32 val = 0;
238
239         val |= qid & DB_EQ_RING_ID_MASK;
240         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241
242         if (be_check_error(adapter, BE_ERROR_HW))
243                 return;
244
245         if (arm)
246                 val |= 1 << DB_EQ_REARM_SHIFT;
247         if (clear_int)
248                 val |= 1 << DB_EQ_CLR_SHIFT;
249         val |= 1 << DB_EQ_EVNT_SHIFT;
250         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252         iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257         u32 val = 0;
258
259         val |= qid & DB_CQ_RING_ID_MASK;
260         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
262
263         if (be_check_error(adapter, BE_ERROR_HW))
264                 return;
265
266         if (arm)
267                 val |= 1 << DB_CQ_REARM_SHIFT;
268         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269         iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274         int i;
275
276         /* Check if mac has already been added as part of uc-list */
277         for (i = 0; i < adapter->uc_macs; i++) {
278                 if (ether_addr_equal((u8 *)&adapter->uc_list[i * ETH_ALEN],
279                                      mac)) {
280                         /* mac already added, skip addition */
281                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
282                         return 0;
283                 }
284         }
285
286         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
287                                &adapter->pmac_id[0], 0);
288 }
289
290 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
291 {
292         int i;
293
294         /* Skip deletion if the programmed mac is
295          * being used in uc-list
296          */
297         for (i = 0; i < adapter->uc_macs; i++) {
298                 if (adapter->pmac_id[i + 1] == pmac_id)
299                         return;
300         }
301         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
302 }
303
304 static int be_mac_addr_set(struct net_device *netdev, void *p)
305 {
306         struct be_adapter *adapter = netdev_priv(netdev);
307         struct device *dev = &adapter->pdev->dev;
308         struct sockaddr *addr = p;
309         int status;
310         u8 mac[ETH_ALEN];
311         u32 old_pmac_id = adapter->pmac_id[0];
312
313         if (!is_valid_ether_addr(addr->sa_data))
314                 return -EADDRNOTAVAIL;
315
316         /* Proceed further only if, User provided MAC is different
317          * from active MAC
318          */
319         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
320                 return 0;
321
322         /* if device is not running, copy MAC to netdev->dev_addr */
323         if (!netif_running(netdev))
324                 goto done;
325
326         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
327          * privilege or if PF did not provision the new MAC address.
328          * On BE3, this cmd will always fail if the VF doesn't have the
329          * FILTMGMT privilege. This failure is OK, only if the PF programmed
330          * the MAC for the VF.
331          */
332         mutex_lock(&adapter->rx_filter_lock);
333         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
334         if (!status) {
335
336                 /* Delete the old programmed MAC. This call may fail if the
337                  * old MAC was already deleted by the PF driver.
338                  */
339                 if (adapter->pmac_id[0] != old_pmac_id)
340                         be_dev_mac_del(adapter, old_pmac_id);
341         }
342
343         mutex_unlock(&adapter->rx_filter_lock);
344         /* Decide if the new MAC is successfully activated only after
345          * querying the FW
346          */
347         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
348                                        adapter->if_handle, true, 0);
349         if (status)
350                 goto err;
351
352         /* The MAC change did not happen, either due to lack of privilege
353          * or PF didn't pre-provision.
354          */
355         if (!ether_addr_equal(addr->sa_data, mac)) {
356                 status = -EPERM;
357                 goto err;
358         }
359 done:
360         ether_addr_copy(adapter->dev_mac, addr->sa_data);
361         ether_addr_copy(netdev->dev_addr, addr->sa_data);
362         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
363         return 0;
364 err:
365         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
366         return status;
367 }
368
369 /* BE2 supports only v0 cmd */
370 static void *hw_stats_from_cmd(struct be_adapter *adapter)
371 {
372         if (BE2_chip(adapter)) {
373                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
374
375                 return &cmd->hw_stats;
376         } else if (BE3_chip(adapter)) {
377                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
378
379                 return &cmd->hw_stats;
380         } else {
381                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
382
383                 return &cmd->hw_stats;
384         }
385 }
386
387 /* BE2 supports only v0 cmd */
388 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
389 {
390         if (BE2_chip(adapter)) {
391                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
392
393                 return &hw_stats->erx;
394         } else if (BE3_chip(adapter)) {
395                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
396
397                 return &hw_stats->erx;
398         } else {
399                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
400
401                 return &hw_stats->erx;
402         }
403 }
404
405 static void populate_be_v0_stats(struct be_adapter *adapter)
406 {
407         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
408         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
409         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
410         struct be_port_rxf_stats_v0 *port_stats =
411                                         &rxf_stats->port[adapter->port_num];
412         struct be_drv_stats *drvs = &adapter->drv_stats;
413
414         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
415         drvs->rx_pause_frames = port_stats->rx_pause_frames;
416         drvs->rx_crc_errors = port_stats->rx_crc_errors;
417         drvs->rx_control_frames = port_stats->rx_control_frames;
418         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
419         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
420         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
421         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
422         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
423         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
424         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
425         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
426         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
427         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
428         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
429         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
430         drvs->rx_dropped_header_too_small =
431                 port_stats->rx_dropped_header_too_small;
432         drvs->rx_address_filtered =
433                                         port_stats->rx_address_filtered +
434                                         port_stats->rx_vlan_filtered;
435         drvs->rx_alignment_symbol_errors =
436                 port_stats->rx_alignment_symbol_errors;
437
438         drvs->tx_pauseframes = port_stats->tx_pauseframes;
439         drvs->tx_controlframes = port_stats->tx_controlframes;
440
441         if (adapter->port_num)
442                 drvs->jabber_events = rxf_stats->port1_jabber_events;
443         else
444                 drvs->jabber_events = rxf_stats->port0_jabber_events;
445         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
446         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
447         drvs->forwarded_packets = rxf_stats->forwarded_packets;
448         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
449         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
450         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
451         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
452 }
453
454 static void populate_be_v1_stats(struct be_adapter *adapter)
455 {
456         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
457         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
458         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
459         struct be_port_rxf_stats_v1 *port_stats =
460                                         &rxf_stats->port[adapter->port_num];
461         struct be_drv_stats *drvs = &adapter->drv_stats;
462
463         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
464         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
465         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
466         drvs->rx_pause_frames = port_stats->rx_pause_frames;
467         drvs->rx_crc_errors = port_stats->rx_crc_errors;
468         drvs->rx_control_frames = port_stats->rx_control_frames;
469         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
470         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
471         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
472         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
473         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
474         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
475         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
476         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
477         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
478         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
479         drvs->rx_dropped_header_too_small =
480                 port_stats->rx_dropped_header_too_small;
481         drvs->rx_input_fifo_overflow_drop =
482                 port_stats->rx_input_fifo_overflow_drop;
483         drvs->rx_address_filtered = port_stats->rx_address_filtered;
484         drvs->rx_alignment_symbol_errors =
485                 port_stats->rx_alignment_symbol_errors;
486         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
487         drvs->tx_pauseframes = port_stats->tx_pauseframes;
488         drvs->tx_controlframes = port_stats->tx_controlframes;
489         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
490         drvs->jabber_events = port_stats->jabber_events;
491         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
492         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
493         drvs->forwarded_packets = rxf_stats->forwarded_packets;
494         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
495         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
496         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
497         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
498 }
499
500 static void populate_be_v2_stats(struct be_adapter *adapter)
501 {
502         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
503         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
504         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
505         struct be_port_rxf_stats_v2 *port_stats =
506                                         &rxf_stats->port[adapter->port_num];
507         struct be_drv_stats *drvs = &adapter->drv_stats;
508
509         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
510         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
511         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
512         drvs->rx_pause_frames = port_stats->rx_pause_frames;
513         drvs->rx_crc_errors = port_stats->rx_crc_errors;
514         drvs->rx_control_frames = port_stats->rx_control_frames;
515         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
516         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
517         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
518         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
519         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
520         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
521         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
522         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
523         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
524         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
525         drvs->rx_dropped_header_too_small =
526                 port_stats->rx_dropped_header_too_small;
527         drvs->rx_input_fifo_overflow_drop =
528                 port_stats->rx_input_fifo_overflow_drop;
529         drvs->rx_address_filtered = port_stats->rx_address_filtered;
530         drvs->rx_alignment_symbol_errors =
531                 port_stats->rx_alignment_symbol_errors;
532         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
533         drvs->tx_pauseframes = port_stats->tx_pauseframes;
534         drvs->tx_controlframes = port_stats->tx_controlframes;
535         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
536         drvs->jabber_events = port_stats->jabber_events;
537         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
538         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
539         drvs->forwarded_packets = rxf_stats->forwarded_packets;
540         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
541         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
542         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
543         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
544         if (be_roce_supported(adapter)) {
545                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
546                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
547                 drvs->rx_roce_frames = port_stats->roce_frames_received;
548                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
549                 drvs->roce_drops_payload_len =
550                         port_stats->roce_drops_payload_len;
551         }
552 }
553
554 static void populate_lancer_stats(struct be_adapter *adapter)
555 {
556         struct be_drv_stats *drvs = &adapter->drv_stats;
557         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
558
559         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
560         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
561         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
562         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
563         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
564         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
565         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
566         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
567         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
568         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
569         drvs->rx_dropped_tcp_length =
570                                 pport_stats->rx_dropped_invalid_tcp_length;
571         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
572         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
573         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
574         drvs->rx_dropped_header_too_small =
575                                 pport_stats->rx_dropped_header_too_small;
576         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
577         drvs->rx_address_filtered =
578                                         pport_stats->rx_address_filtered +
579                                         pport_stats->rx_vlan_filtered;
580         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
581         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
582         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
583         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
584         drvs->jabber_events = pport_stats->rx_jabbers;
585         drvs->forwarded_packets = pport_stats->num_forwards_lo;
586         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
587         drvs->rx_drops_too_many_frags =
588                                 pport_stats->rx_drops_too_many_frags_lo;
589 }
590
591 static void accumulate_16bit_val(u32 *acc, u16 val)
592 {
593 #define lo(x)                   (x & 0xFFFF)
594 #define hi(x)                   (x & 0xFFFF0000)
595         bool wrapped = val < lo(*acc);
596         u32 newacc = hi(*acc) + val;
597
598         if (wrapped)
599                 newacc += 65536;
600         ACCESS_ONCE(*acc) = newacc;
601 }
602
603 static void populate_erx_stats(struct be_adapter *adapter,
604                                struct be_rx_obj *rxo, u32 erx_stat)
605 {
606         if (!BEx_chip(adapter))
607                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
608         else
609                 /* below erx HW counter can actually wrap around after
610                  * 65535. Driver accumulates a 32-bit value
611                  */
612                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
613                                      (u16)erx_stat);
614 }
615
616 void be_parse_stats(struct be_adapter *adapter)
617 {
618         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
619         struct be_rx_obj *rxo;
620         int i;
621         u32 erx_stat;
622
623         if (lancer_chip(adapter)) {
624                 populate_lancer_stats(adapter);
625         } else {
626                 if (BE2_chip(adapter))
627                         populate_be_v0_stats(adapter);
628                 else if (BE3_chip(adapter))
629                         /* for BE3 */
630                         populate_be_v1_stats(adapter);
631                 else
632                         populate_be_v2_stats(adapter);
633
634                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
635                 for_all_rx_queues(adapter, rxo, i) {
636                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
637                         populate_erx_stats(adapter, rxo, erx_stat);
638                 }
639         }
640 }
641
642 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
643                                                 struct rtnl_link_stats64 *stats)
644 {
645         struct be_adapter *adapter = netdev_priv(netdev);
646         struct be_drv_stats *drvs = &adapter->drv_stats;
647         struct be_rx_obj *rxo;
648         struct be_tx_obj *txo;
649         u64 pkts, bytes;
650         unsigned int start;
651         int i;
652
653         for_all_rx_queues(adapter, rxo, i) {
654                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
655
656                 do {
657                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
658                         pkts = rx_stats(rxo)->rx_pkts;
659                         bytes = rx_stats(rxo)->rx_bytes;
660                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
661                 stats->rx_packets += pkts;
662                 stats->rx_bytes += bytes;
663                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
664                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
665                                         rx_stats(rxo)->rx_drops_no_frags;
666         }
667
668         for_all_tx_queues(adapter, txo, i) {
669                 const struct be_tx_stats *tx_stats = tx_stats(txo);
670
671                 do {
672                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
673                         pkts = tx_stats(txo)->tx_pkts;
674                         bytes = tx_stats(txo)->tx_bytes;
675                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
676                 stats->tx_packets += pkts;
677                 stats->tx_bytes += bytes;
678         }
679
680         /* bad pkts received */
681         stats->rx_errors = drvs->rx_crc_errors +
682                 drvs->rx_alignment_symbol_errors +
683                 drvs->rx_in_range_errors +
684                 drvs->rx_out_range_errors +
685                 drvs->rx_frame_too_long +
686                 drvs->rx_dropped_too_small +
687                 drvs->rx_dropped_too_short +
688                 drvs->rx_dropped_header_too_small +
689                 drvs->rx_dropped_tcp_length +
690                 drvs->rx_dropped_runt;
691
692         /* detailed rx errors */
693         stats->rx_length_errors = drvs->rx_in_range_errors +
694                 drvs->rx_out_range_errors +
695                 drvs->rx_frame_too_long;
696
697         stats->rx_crc_errors = drvs->rx_crc_errors;
698
699         /* frame alignment errors */
700         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
701
702         /* receiver fifo overrun */
703         /* drops_no_pbuf is no per i/f, it's per BE card */
704         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
705                                 drvs->rx_input_fifo_overflow_drop +
706                                 drvs->rx_drops_no_pbuf;
707         return stats;
708 }
709
710 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
711 {
712         struct net_device *netdev = adapter->netdev;
713
714         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
715                 netif_carrier_off(netdev);
716                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
717         }
718
719         if (link_status)
720                 netif_carrier_on(netdev);
721         else
722                 netif_carrier_off(netdev);
723
724         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
725 }
726
727 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
728 {
729         struct be_tx_stats *stats = tx_stats(txo);
730         u64 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
731
732         u64_stats_update_begin(&stats->sync);
733         stats->tx_reqs++;
734         stats->tx_bytes += skb->len;
735         stats->tx_pkts += tx_pkts;
736         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
737                 stats->tx_vxlan_offload_pkts += tx_pkts;
738         u64_stats_update_end(&stats->sync);
739 }
740
741 /* Returns number of WRBs needed for the skb */
742 static u32 skb_wrb_cnt(struct sk_buff *skb)
743 {
744         /* +1 for the header wrb */
745         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
746 }
747
748 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
749 {
750         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
751         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
752         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
753         wrb->rsvd0 = 0;
754 }
755
756 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
757  * to avoid the swap and shift/mask operations in wrb_fill().
758  */
759 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
760 {
761         wrb->frag_pa_hi = 0;
762         wrb->frag_pa_lo = 0;
763         wrb->frag_len = 0;
764         wrb->rsvd0 = 0;
765 }
766
767 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
768                                      struct sk_buff *skb)
769 {
770         u8 vlan_prio;
771         u16 vlan_tag;
772
773         vlan_tag = skb_vlan_tag_get(skb);
774         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
775         /* If vlan priority provided by OS is NOT in available bmap */
776         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
777                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
778                                 adapter->recommended_prio_bits;
779
780         return vlan_tag;
781 }
782
783 /* Used only for IP tunnel packets */
784 static u16 skb_inner_ip_proto(struct sk_buff *skb)
785 {
786         return (inner_ip_hdr(skb)->version == 4) ?
787                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
788 }
789
790 static u16 skb_ip_proto(struct sk_buff *skb)
791 {
792         return (ip_hdr(skb)->version == 4) ?
793                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
794 }
795
796 static inline bool be_is_txq_full(struct be_tx_obj *txo)
797 {
798         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
799 }
800
801 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
802 {
803         return atomic_read(&txo->q.used) < txo->q.len / 2;
804 }
805
806 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
807 {
808         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
809 }
810
811 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
812                                        struct sk_buff *skb,
813                                        struct be_wrb_params *wrb_params)
814 {
815         u16 proto;
816
817         if (skb_is_gso(skb)) {
818                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
819                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
820                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
821                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
822         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
823                 if (skb->encapsulation) {
824                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
825                         proto = skb_inner_ip_proto(skb);
826                 } else {
827                         proto = skb_ip_proto(skb);
828                 }
829                 if (proto == IPPROTO_TCP)
830                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
831                 else if (proto == IPPROTO_UDP)
832                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
833         }
834
835         if (skb_vlan_tag_present(skb)) {
836                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
837                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
838         }
839
840         BE_WRB_F_SET(wrb_params->features, CRC, 1);
841 }
842
843 static void wrb_fill_hdr(struct be_adapter *adapter,
844                          struct be_eth_hdr_wrb *hdr,
845                          struct be_wrb_params *wrb_params,
846                          struct sk_buff *skb)
847 {
848         memset(hdr, 0, sizeof(*hdr));
849
850         SET_TX_WRB_HDR_BITS(crc, hdr,
851                             BE_WRB_F_GET(wrb_params->features, CRC));
852         SET_TX_WRB_HDR_BITS(ipcs, hdr,
853                             BE_WRB_F_GET(wrb_params->features, IPCS));
854         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
855                             BE_WRB_F_GET(wrb_params->features, TCPCS));
856         SET_TX_WRB_HDR_BITS(udpcs, hdr,
857                             BE_WRB_F_GET(wrb_params->features, UDPCS));
858
859         SET_TX_WRB_HDR_BITS(lso, hdr,
860                             BE_WRB_F_GET(wrb_params->features, LSO));
861         SET_TX_WRB_HDR_BITS(lso6, hdr,
862                             BE_WRB_F_GET(wrb_params->features, LSO6));
863         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
864
865         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
866          * hack is not needed, the evt bit is set while ringing DB.
867          */
868         SET_TX_WRB_HDR_BITS(event, hdr,
869                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
870         SET_TX_WRB_HDR_BITS(vlan, hdr,
871                             BE_WRB_F_GET(wrb_params->features, VLAN));
872         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
873
874         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
875         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
876         SET_TX_WRB_HDR_BITS(mgmt, hdr,
877                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
878 }
879
880 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
881                           bool unmap_single)
882 {
883         dma_addr_t dma;
884         u32 frag_len = le32_to_cpu(wrb->frag_len);
885
886
887         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
888                 (u64)le32_to_cpu(wrb->frag_pa_lo);
889         if (frag_len) {
890                 if (unmap_single)
891                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
892                 else
893                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
894         }
895 }
896
897 /* Grab a WRB header for xmit */
898 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
899 {
900         u32 head = txo->q.head;
901
902         queue_head_inc(&txo->q);
903         return head;
904 }
905
906 /* Set up the WRB header for xmit */
907 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
908                                 struct be_tx_obj *txo,
909                                 struct be_wrb_params *wrb_params,
910                                 struct sk_buff *skb, u16 head)
911 {
912         u32 num_frags = skb_wrb_cnt(skb);
913         struct be_queue_info *txq = &txo->q;
914         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
915
916         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
917         be_dws_cpu_to_le(hdr, sizeof(*hdr));
918
919         BUG_ON(txo->sent_skb_list[head]);
920         txo->sent_skb_list[head] = skb;
921         txo->last_req_hdr = head;
922         atomic_add(num_frags, &txq->used);
923         txo->last_req_wrb_cnt = num_frags;
924         txo->pend_wrb_cnt += num_frags;
925 }
926
927 /* Setup a WRB fragment (buffer descriptor) for xmit */
928 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
929                                  int len)
930 {
931         struct be_eth_wrb *wrb;
932         struct be_queue_info *txq = &txo->q;
933
934         wrb = queue_head_node(txq);
935         wrb_fill(wrb, busaddr, len);
936         queue_head_inc(txq);
937 }
938
939 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
940  * was invoked. The producer index is restored to the previous packet and the
941  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
942  */
943 static void be_xmit_restore(struct be_adapter *adapter,
944                             struct be_tx_obj *txo, u32 head, bool map_single,
945                             u32 copied)
946 {
947         struct device *dev;
948         struct be_eth_wrb *wrb;
949         struct be_queue_info *txq = &txo->q;
950
951         dev = &adapter->pdev->dev;
952         txq->head = head;
953
954         /* skip the first wrb (hdr); it's not mapped */
955         queue_head_inc(txq);
956         while (copied) {
957                 wrb = queue_head_node(txq);
958                 unmap_tx_frag(dev, wrb, map_single);
959                 map_single = false;
960                 copied -= le32_to_cpu(wrb->frag_len);
961                 queue_head_inc(txq);
962         }
963
964         txq->head = head;
965 }
966
967 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
968  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
969  * of WRBs used up by the packet.
970  */
971 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
972                            struct sk_buff *skb,
973                            struct be_wrb_params *wrb_params)
974 {
975         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
976         struct device *dev = &adapter->pdev->dev;
977         struct be_queue_info *txq = &txo->q;
978         bool map_single = false;
979         u32 head = txq->head;
980         dma_addr_t busaddr;
981         int len;
982
983         head = be_tx_get_wrb_hdr(txo);
984
985         if (skb->len > skb->data_len) {
986                 len = skb_headlen(skb);
987
988                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
989                 if (dma_mapping_error(dev, busaddr))
990                         goto dma_err;
991                 map_single = true;
992                 be_tx_setup_wrb_frag(txo, busaddr, len);
993                 copied += len;
994         }
995
996         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
997                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
998                 len = skb_frag_size(frag);
999
1000                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1001                 if (dma_mapping_error(dev, busaddr))
1002                         goto dma_err;
1003                 be_tx_setup_wrb_frag(txo, busaddr, len);
1004                 copied += len;
1005         }
1006
1007         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1008
1009         be_tx_stats_update(txo, skb);
1010         return wrb_cnt;
1011
1012 dma_err:
1013         adapter->drv_stats.dma_map_errors++;
1014         be_xmit_restore(adapter, txo, head, map_single, copied);
1015         return 0;
1016 }
1017
1018 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1019 {
1020         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1021 }
1022
1023 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1024                                              struct sk_buff *skb,
1025                                              struct be_wrb_params
1026                                              *wrb_params)
1027 {
1028         u16 vlan_tag = 0;
1029
1030         skb = skb_share_check(skb, GFP_ATOMIC);
1031         if (unlikely(!skb))
1032                 return skb;
1033
1034         if (skb_vlan_tag_present(skb))
1035                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1036
1037         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1038                 if (!vlan_tag)
1039                         vlan_tag = adapter->pvid;
1040                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1041                  * skip VLAN insertion
1042                  */
1043                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1044         }
1045
1046         if (vlan_tag) {
1047                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1048                                                 vlan_tag);
1049                 if (unlikely(!skb))
1050                         return skb;
1051                 skb->vlan_tci = 0;
1052         }
1053
1054         /* Insert the outer VLAN, if any */
1055         if (adapter->qnq_vid) {
1056                 vlan_tag = adapter->qnq_vid;
1057                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1058                                                 vlan_tag);
1059                 if (unlikely(!skb))
1060                         return skb;
1061                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1062         }
1063
1064         return skb;
1065 }
1066
1067 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1068 {
1069         struct ethhdr *eh = (struct ethhdr *)skb->data;
1070         u16 offset = ETH_HLEN;
1071
1072         if (eh->h_proto == htons(ETH_P_IPV6)) {
1073                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1074
1075                 offset += sizeof(struct ipv6hdr);
1076                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1077                     ip6h->nexthdr != NEXTHDR_UDP) {
1078                         struct ipv6_opt_hdr *ehdr =
1079                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1080
1081                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1082                         if (ehdr->hdrlen == 0xff)
1083                                 return true;
1084                 }
1085         }
1086         return false;
1087 }
1088
1089 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1090 {
1091         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1092 }
1093
1094 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1095 {
1096         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1097 }
1098
1099 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1100                                                   struct sk_buff *skb,
1101                                                   struct be_wrb_params
1102                                                   *wrb_params)
1103 {
1104         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1105         unsigned int eth_hdr_len;
1106         struct iphdr *ip;
1107
1108         /* For padded packets, BE HW modifies tot_len field in IP header
1109          * incorrecly when VLAN tag is inserted by HW.
1110          * For padded packets, Lancer computes incorrect checksum.
1111          */
1112         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1113                                                 VLAN_ETH_HLEN : ETH_HLEN;
1114         if (skb->len <= 60 &&
1115             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1116             is_ipv4_pkt(skb)) {
1117                 ip = (struct iphdr *)ip_hdr(skb);
1118                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1119         }
1120
1121         /* If vlan tag is already inlined in the packet, skip HW VLAN
1122          * tagging in pvid-tagging mode
1123          */
1124         if (be_pvid_tagging_enabled(adapter) &&
1125             veh->h_vlan_proto == htons(ETH_P_8021Q))
1126                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1127
1128         /* HW has a bug wherein it will calculate CSUM for VLAN
1129          * pkts even though it is disabled.
1130          * Manually insert VLAN in pkt.
1131          */
1132         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1133             skb_vlan_tag_present(skb)) {
1134                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1135                 if (unlikely(!skb))
1136                         goto err;
1137         }
1138
1139         /* HW may lockup when VLAN HW tagging is requested on
1140          * certain ipv6 packets. Drop such pkts if the HW workaround to
1141          * skip HW tagging is not enabled by FW.
1142          */
1143         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1144                      (adapter->pvid || adapter->qnq_vid) &&
1145                      !qnq_async_evt_rcvd(adapter)))
1146                 goto tx_drop;
1147
1148         /* Manual VLAN tag insertion to prevent:
1149          * ASIC lockup when the ASIC inserts VLAN tag into
1150          * certain ipv6 packets. Insert VLAN tags in driver,
1151          * and set event, completion, vlan bits accordingly
1152          * in the Tx WRB.
1153          */
1154         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1155             be_vlan_tag_tx_chk(adapter, skb)) {
1156                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1157                 if (unlikely(!skb))
1158                         goto err;
1159         }
1160
1161         return skb;
1162 tx_drop:
1163         dev_kfree_skb_any(skb);
1164 err:
1165         return NULL;
1166 }
1167
1168 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1169                                            struct sk_buff *skb,
1170                                            struct be_wrb_params *wrb_params)
1171 {
1172         int err;
1173
1174         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1175          * packets that are 32b or less may cause a transmit stall
1176          * on that port. The workaround is to pad such packets
1177          * (len <= 32 bytes) to a minimum length of 36b.
1178          */
1179         if (skb->len <= 32) {
1180                 if (skb_put_padto(skb, 36))
1181                         return NULL;
1182         }
1183
1184         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1185                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1186                 if (!skb)
1187                         return NULL;
1188         }
1189
1190         /* The stack can send us skbs with length greater than
1191          * what the HW can handle. Trim the extra bytes.
1192          */
1193         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1194         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1195         WARN_ON(err);
1196
1197         return skb;
1198 }
1199
1200 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1201 {
1202         struct be_queue_info *txq = &txo->q;
1203         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1204
1205         /* Mark the last request eventable if it hasn't been marked already */
1206         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1207                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1208
1209         /* compose a dummy wrb if there are odd set of wrbs to notify */
1210         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1211                 wrb_fill_dummy(queue_head_node(txq));
1212                 queue_head_inc(txq);
1213                 atomic_inc(&txq->used);
1214                 txo->pend_wrb_cnt++;
1215                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1216                                            TX_HDR_WRB_NUM_SHIFT);
1217                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1218                                           TX_HDR_WRB_NUM_SHIFT);
1219         }
1220         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1221         txo->pend_wrb_cnt = 0;
1222 }
1223
1224 /* OS2BMC related */
1225
1226 #define DHCP_CLIENT_PORT        68
1227 #define DHCP_SERVER_PORT        67
1228 #define NET_BIOS_PORT1          137
1229 #define NET_BIOS_PORT2          138
1230 #define DHCPV6_RAS_PORT         547
1231
1232 #define is_mc_allowed_on_bmc(adapter, eh)       \
1233         (!is_multicast_filt_enabled(adapter) && \
1234          is_multicast_ether_addr(eh->h_dest) && \
1235          !is_broadcast_ether_addr(eh->h_dest))
1236
1237 #define is_bc_allowed_on_bmc(adapter, eh)       \
1238         (!is_broadcast_filt_enabled(adapter) && \
1239          is_broadcast_ether_addr(eh->h_dest))
1240
1241 #define is_arp_allowed_on_bmc(adapter, skb)     \
1242         (is_arp(skb) && is_arp_filt_enabled(adapter))
1243
1244 #define is_broadcast_packet(eh, adapter)        \
1245                 (is_multicast_ether_addr(eh->h_dest) && \
1246                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1247
1248 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1249
1250 #define is_arp_filt_enabled(adapter)    \
1251                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1252
1253 #define is_dhcp_client_filt_enabled(adapter)    \
1254                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1255
1256 #define is_dhcp_srvr_filt_enabled(adapter)      \
1257                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1258
1259 #define is_nbios_filt_enabled(adapter)  \
1260                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1261
1262 #define is_ipv6_na_filt_enabled(adapter)        \
1263                 (adapter->bmc_filt_mask &       \
1264                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1265
1266 #define is_ipv6_ra_filt_enabled(adapter)        \
1267                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1268
1269 #define is_ipv6_ras_filt_enabled(adapter)       \
1270                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1271
1272 #define is_broadcast_filt_enabled(adapter)      \
1273                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1274
1275 #define is_multicast_filt_enabled(adapter)      \
1276                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1277
1278 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1279                                struct sk_buff **skb)
1280 {
1281         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1282         bool os2bmc = false;
1283
1284         if (!be_is_os2bmc_enabled(adapter))
1285                 goto done;
1286
1287         if (!is_multicast_ether_addr(eh->h_dest))
1288                 goto done;
1289
1290         if (is_mc_allowed_on_bmc(adapter, eh) ||
1291             is_bc_allowed_on_bmc(adapter, eh) ||
1292             is_arp_allowed_on_bmc(adapter, (*skb))) {
1293                 os2bmc = true;
1294                 goto done;
1295         }
1296
1297         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1298                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1299                 u8 nexthdr = hdr->nexthdr;
1300
1301                 if (nexthdr == IPPROTO_ICMPV6) {
1302                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1303
1304                         switch (icmp6->icmp6_type) {
1305                         case NDISC_ROUTER_ADVERTISEMENT:
1306                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1307                                 goto done;
1308                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1309                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1310                                 goto done;
1311                         default:
1312                                 break;
1313                         }
1314                 }
1315         }
1316
1317         if (is_udp_pkt((*skb))) {
1318                 struct udphdr *udp = udp_hdr((*skb));
1319
1320                 switch (ntohs(udp->dest)) {
1321                 case DHCP_CLIENT_PORT:
1322                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1323                         goto done;
1324                 case DHCP_SERVER_PORT:
1325                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1326                         goto done;
1327                 case NET_BIOS_PORT1:
1328                 case NET_BIOS_PORT2:
1329                         os2bmc = is_nbios_filt_enabled(adapter);
1330                         goto done;
1331                 case DHCPV6_RAS_PORT:
1332                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1333                         goto done;
1334                 default:
1335                         break;
1336                 }
1337         }
1338 done:
1339         /* For packets over a vlan, which are destined
1340          * to BMC, asic expects the vlan to be inline in the packet.
1341          */
1342         if (os2bmc)
1343                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1344
1345         return os2bmc;
1346 }
1347
1348 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1349 {
1350         struct be_adapter *adapter = netdev_priv(netdev);
1351         u16 q_idx = skb_get_queue_mapping(skb);
1352         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1353         struct be_wrb_params wrb_params = { 0 };
1354         bool flush = !skb->xmit_more;
1355         u16 wrb_cnt;
1356
1357         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1358         if (unlikely(!skb))
1359                 goto drop;
1360
1361         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1362
1363         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1364         if (unlikely(!wrb_cnt)) {
1365                 dev_kfree_skb_any(skb);
1366                 goto drop;
1367         }
1368
1369         /* if os2bmc is enabled and if the pkt is destined to bmc,
1370          * enqueue the pkt a 2nd time with mgmt bit set.
1371          */
1372         if (be_send_pkt_to_bmc(adapter, &skb)) {
1373                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1374                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1375                 if (unlikely(!wrb_cnt))
1376                         goto drop;
1377                 else
1378                         skb_get(skb);
1379         }
1380
1381         if (be_is_txq_full(txo)) {
1382                 netif_stop_subqueue(netdev, q_idx);
1383                 tx_stats(txo)->tx_stops++;
1384         }
1385
1386         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1387                 be_xmit_flush(adapter, txo);
1388
1389         return NETDEV_TX_OK;
1390 drop:
1391         tx_stats(txo)->tx_drv_drops++;
1392         /* Flush the already enqueued tx requests */
1393         if (flush && txo->pend_wrb_cnt)
1394                 be_xmit_flush(adapter, txo);
1395
1396         return NETDEV_TX_OK;
1397 }
1398
1399 static int be_change_mtu(struct net_device *netdev, int new_mtu)
1400 {
1401         struct be_adapter *adapter = netdev_priv(netdev);
1402         struct device *dev = &adapter->pdev->dev;
1403
1404         if (new_mtu < BE_MIN_MTU || new_mtu > BE_MAX_MTU) {
1405                 dev_info(dev, "MTU must be between %d and %d bytes\n",
1406                          BE_MIN_MTU, BE_MAX_MTU);
1407                 return -EINVAL;
1408         }
1409
1410         dev_info(dev, "MTU changed from %d to %d bytes\n",
1411                  netdev->mtu, new_mtu);
1412         netdev->mtu = new_mtu;
1413         return 0;
1414 }
1415
1416 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1417 {
1418         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1419                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1420 }
1421
1422 static int be_set_vlan_promisc(struct be_adapter *adapter)
1423 {
1424         struct device *dev = &adapter->pdev->dev;
1425         int status;
1426
1427         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1428                 return 0;
1429
1430         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1431         if (!status) {
1432                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1433                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1434         } else {
1435                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1436         }
1437         return status;
1438 }
1439
1440 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1441 {
1442         struct device *dev = &adapter->pdev->dev;
1443         int status;
1444
1445         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1446         if (!status) {
1447                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1448                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1449         }
1450         return status;
1451 }
1452
1453 /*
1454  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1455  * If the user configures more, place BE in vlan promiscuous mode.
1456  */
1457 static int be_vid_config(struct be_adapter *adapter)
1458 {
1459         struct device *dev = &adapter->pdev->dev;
1460         u16 vids[BE_NUM_VLANS_SUPPORTED];
1461         u16 num = 0, i = 0;
1462         int status = 0;
1463
1464         /* No need to change the VLAN state if the I/F is in promiscuous */
1465         if (adapter->netdev->flags & IFF_PROMISC)
1466                 return 0;
1467
1468         if (adapter->vlans_added > be_max_vlans(adapter))
1469                 return be_set_vlan_promisc(adapter);
1470
1471         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1472                 status = be_clear_vlan_promisc(adapter);
1473                 if (status)
1474                         return status;
1475         }
1476         /* Construct VLAN Table to give to HW */
1477         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1478                 vids[num++] = cpu_to_le16(i);
1479
1480         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1481         if (status) {
1482                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1483                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1484                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1485                     addl_status(status) ==
1486                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1487                         return be_set_vlan_promisc(adapter);
1488         }
1489         return status;
1490 }
1491
1492 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1493 {
1494         struct be_adapter *adapter = netdev_priv(netdev);
1495         int status = 0;
1496
1497         mutex_lock(&adapter->rx_filter_lock);
1498
1499         /* Packets with VID 0 are always received by Lancer by default */
1500         if (lancer_chip(adapter) && vid == 0)
1501                 goto done;
1502
1503         if (test_bit(vid, adapter->vids))
1504                 goto done;
1505
1506         set_bit(vid, adapter->vids);
1507         adapter->vlans_added++;
1508
1509         status = be_vid_config(adapter);
1510 done:
1511         mutex_unlock(&adapter->rx_filter_lock);
1512         return status;
1513 }
1514
1515 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1516 {
1517         struct be_adapter *adapter = netdev_priv(netdev);
1518         int status = 0;
1519
1520         mutex_lock(&adapter->rx_filter_lock);
1521
1522         /* Packets with VID 0 are always received by Lancer by default */
1523         if (lancer_chip(adapter) && vid == 0)
1524                 goto done;
1525
1526         if (!test_bit(vid, adapter->vids))
1527                 goto done;
1528
1529         clear_bit(vid, adapter->vids);
1530         adapter->vlans_added--;
1531
1532         status = be_vid_config(adapter);
1533 done:
1534         mutex_unlock(&adapter->rx_filter_lock);
1535         return status;
1536 }
1537
1538 static void be_set_all_promisc(struct be_adapter *adapter)
1539 {
1540         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1541         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1542 }
1543
1544 static void be_set_mc_promisc(struct be_adapter *adapter)
1545 {
1546         int status;
1547
1548         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1549                 return;
1550
1551         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1552         if (!status)
1553                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1554 }
1555
1556 static void be_set_uc_promisc(struct be_adapter *adapter)
1557 {
1558         int status;
1559
1560         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1561                 return;
1562
1563         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1564         if (!status)
1565                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1566 }
1567
1568 static void be_clear_uc_promisc(struct be_adapter *adapter)
1569 {
1570         int status;
1571
1572         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1573                 return;
1574
1575         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1576         if (!status)
1577                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1578 }
1579
1580 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1581  * We use a single callback function for both sync and unsync. We really don't
1582  * add/remove addresses through this callback. But, we use it to detect changes
1583  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1584  */
1585 static int be_uc_list_update(struct net_device *netdev,
1586                              const unsigned char *addr)
1587 {
1588         struct be_adapter *adapter = netdev_priv(netdev);
1589
1590         adapter->update_uc_list = true;
1591         return 0;
1592 }
1593
1594 static int be_mc_list_update(struct net_device *netdev,
1595                              const unsigned char *addr)
1596 {
1597         struct be_adapter *adapter = netdev_priv(netdev);
1598
1599         adapter->update_mc_list = true;
1600         return 0;
1601 }
1602
1603 static void be_set_mc_list(struct be_adapter *adapter)
1604 {
1605         struct net_device *netdev = adapter->netdev;
1606         struct netdev_hw_addr *ha;
1607         bool mc_promisc = false;
1608         int status;
1609
1610         netif_addr_lock_bh(netdev);
1611         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1612
1613         if (netdev->flags & IFF_PROMISC) {
1614                 adapter->update_mc_list = false;
1615         } else if (netdev->flags & IFF_ALLMULTI ||
1616                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1617                 /* Enable multicast promisc if num configured exceeds
1618                  * what we support
1619                  */
1620                 mc_promisc = true;
1621                 adapter->update_mc_list = false;
1622         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1623                 /* Update mc-list unconditionally if the iface was previously
1624                  * in mc-promisc mode and now is out of that mode.
1625                  */
1626                 adapter->update_mc_list = true;
1627         }
1628
1629         if (adapter->update_mc_list) {
1630                 int i = 0;
1631
1632                 /* cache the mc-list in adapter */
1633                 netdev_for_each_mc_addr(ha, netdev) {
1634                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1635                         i++;
1636                 }
1637                 adapter->mc_count = netdev_mc_count(netdev);
1638         }
1639         netif_addr_unlock_bh(netdev);
1640
1641         if (mc_promisc) {
1642                 be_set_mc_promisc(adapter);
1643         } else if (adapter->update_mc_list) {
1644                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1645                 if (!status)
1646                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1647                 else
1648                         be_set_mc_promisc(adapter);
1649
1650                 adapter->update_mc_list = false;
1651         }
1652 }
1653
1654 static void be_clear_mc_list(struct be_adapter *adapter)
1655 {
1656         struct net_device *netdev = adapter->netdev;
1657
1658         __dev_mc_unsync(netdev, NULL);
1659         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1660         adapter->mc_count = 0;
1661 }
1662
1663 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1664 {
1665         if (ether_addr_equal((u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
1666                              adapter->dev_mac)) {
1667                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1668                 return 0;
1669         }
1670
1671         return be_cmd_pmac_add(adapter,
1672                                (u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
1673                                adapter->if_handle,
1674                                &adapter->pmac_id[uc_idx + 1], 0);
1675 }
1676
1677 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1678 {
1679         if (pmac_id == adapter->pmac_id[0])
1680                 return;
1681
1682         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1683 }
1684
1685 static void be_set_uc_list(struct be_adapter *adapter)
1686 {
1687         struct net_device *netdev = adapter->netdev;
1688         struct netdev_hw_addr *ha;
1689         bool uc_promisc = false;
1690         int curr_uc_macs = 0, i;
1691
1692         netif_addr_lock_bh(netdev);
1693         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1694
1695         if (netdev->flags & IFF_PROMISC) {
1696                 adapter->update_uc_list = false;
1697         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1698                 uc_promisc = true;
1699                 adapter->update_uc_list = false;
1700         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1701                 /* Update uc-list unconditionally if the iface was previously
1702                  * in uc-promisc mode and now is out of that mode.
1703                  */
1704                 adapter->update_uc_list = true;
1705         }
1706
1707         if (adapter->update_uc_list) {
1708                 i = 1; /* First slot is claimed by the Primary MAC */
1709
1710                 /* cache the uc-list in adapter array */
1711                 netdev_for_each_uc_addr(ha, netdev) {
1712                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1713                         i++;
1714                 }
1715                 curr_uc_macs = netdev_uc_count(netdev);
1716         }
1717         netif_addr_unlock_bh(netdev);
1718
1719         if (uc_promisc) {
1720                 be_set_uc_promisc(adapter);
1721         } else if (adapter->update_uc_list) {
1722                 be_clear_uc_promisc(adapter);
1723
1724                 for (i = 0; i < adapter->uc_macs; i++)
1725                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1726
1727                 for (i = 0; i < curr_uc_macs; i++)
1728                         be_uc_mac_add(adapter, i);
1729                 adapter->uc_macs = curr_uc_macs;
1730                 adapter->update_uc_list = false;
1731         }
1732 }
1733
1734 static void be_clear_uc_list(struct be_adapter *adapter)
1735 {
1736         struct net_device *netdev = adapter->netdev;
1737         int i;
1738
1739         __dev_uc_unsync(netdev, NULL);
1740         for (i = 0; i < adapter->uc_macs; i++)
1741                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1742
1743         adapter->uc_macs = 0;
1744 }
1745
1746 static void __be_set_rx_mode(struct be_adapter *adapter)
1747 {
1748         struct net_device *netdev = adapter->netdev;
1749
1750         mutex_lock(&adapter->rx_filter_lock);
1751
1752         if (netdev->flags & IFF_PROMISC) {
1753                 if (!be_in_all_promisc(adapter))
1754                         be_set_all_promisc(adapter);
1755         } else if (be_in_all_promisc(adapter)) {
1756                 /* We need to re-program the vlan-list or clear
1757                  * vlan-promisc mode (if needed) when the interface
1758                  * comes out of promisc mode.
1759                  */
1760                 be_vid_config(adapter);
1761         }
1762
1763         be_set_uc_list(adapter);
1764         be_set_mc_list(adapter);
1765
1766         mutex_unlock(&adapter->rx_filter_lock);
1767 }
1768
1769 static void be_work_set_rx_mode(struct work_struct *work)
1770 {
1771         struct be_cmd_work *cmd_work =
1772                                 container_of(work, struct be_cmd_work, work);
1773
1774         __be_set_rx_mode(cmd_work->adapter);
1775         kfree(cmd_work);
1776 }
1777
1778 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1779 {
1780         struct be_adapter *adapter = netdev_priv(netdev);
1781         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1782         int status;
1783
1784         if (!sriov_enabled(adapter))
1785                 return -EPERM;
1786
1787         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1788                 return -EINVAL;
1789
1790         /* Proceed further only if user provided MAC is different
1791          * from active MAC
1792          */
1793         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1794                 return 0;
1795
1796         if (BEx_chip(adapter)) {
1797                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1798                                 vf + 1);
1799
1800                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1801                                          &vf_cfg->pmac_id, vf + 1);
1802         } else {
1803                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1804                                         vf + 1);
1805         }
1806
1807         if (status) {
1808                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1809                         mac, vf, status);
1810                 return be_cmd_status(status);
1811         }
1812
1813         ether_addr_copy(vf_cfg->mac_addr, mac);
1814
1815         return 0;
1816 }
1817
1818 static int be_get_vf_config(struct net_device *netdev, int vf,
1819                             struct ifla_vf_info *vi)
1820 {
1821         struct be_adapter *adapter = netdev_priv(netdev);
1822         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1823
1824         if (!sriov_enabled(adapter))
1825                 return -EPERM;
1826
1827         if (vf >= adapter->num_vfs)
1828                 return -EINVAL;
1829
1830         vi->vf = vf;
1831         vi->max_tx_rate = vf_cfg->tx_rate;
1832         vi->min_tx_rate = 0;
1833         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1834         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1835         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1836         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1837         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1838
1839         return 0;
1840 }
1841
1842 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1843 {
1844         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1845         u16 vids[BE_NUM_VLANS_SUPPORTED];
1846         int vf_if_id = vf_cfg->if_handle;
1847         int status;
1848
1849         /* Enable Transparent VLAN Tagging */
1850         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1851         if (status)
1852                 return status;
1853
1854         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1855         vids[0] = 0;
1856         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1857         if (!status)
1858                 dev_info(&adapter->pdev->dev,
1859                          "Cleared guest VLANs on VF%d", vf);
1860
1861         /* After TVT is enabled, disallow VFs to program VLAN filters */
1862         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1863                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1864                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1865                 if (!status)
1866                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1867         }
1868         return 0;
1869 }
1870
1871 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1872 {
1873         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1874         struct device *dev = &adapter->pdev->dev;
1875         int status;
1876
1877         /* Reset Transparent VLAN Tagging. */
1878         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1879                                        vf_cfg->if_handle, 0, 0);
1880         if (status)
1881                 return status;
1882
1883         /* Allow VFs to program VLAN filtering */
1884         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1885                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1886                                                   BE_PRIV_FILTMGMT, vf + 1);
1887                 if (!status) {
1888                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1889                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1890                 }
1891         }
1892
1893         dev_info(dev,
1894                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1895         return 0;
1896 }
1897
1898 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
1899 {
1900         struct be_adapter *adapter = netdev_priv(netdev);
1901         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1902         int status;
1903
1904         if (!sriov_enabled(adapter))
1905                 return -EPERM;
1906
1907         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1908                 return -EINVAL;
1909
1910         if (vlan || qos) {
1911                 vlan |= qos << VLAN_PRIO_SHIFT;
1912                 status = be_set_vf_tvt(adapter, vf, vlan);
1913         } else {
1914                 status = be_clear_vf_tvt(adapter, vf);
1915         }
1916
1917         if (status) {
1918                 dev_err(&adapter->pdev->dev,
1919                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1920                         status);
1921                 return be_cmd_status(status);
1922         }
1923
1924         vf_cfg->vlan_tag = vlan;
1925         return 0;
1926 }
1927
1928 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1929                              int min_tx_rate, int max_tx_rate)
1930 {
1931         struct be_adapter *adapter = netdev_priv(netdev);
1932         struct device *dev = &adapter->pdev->dev;
1933         int percent_rate, status = 0;
1934         u16 link_speed = 0;
1935         u8 link_status;
1936
1937         if (!sriov_enabled(adapter))
1938                 return -EPERM;
1939
1940         if (vf >= adapter->num_vfs)
1941                 return -EINVAL;
1942
1943         if (min_tx_rate)
1944                 return -EINVAL;
1945
1946         if (!max_tx_rate)
1947                 goto config_qos;
1948
1949         status = be_cmd_link_status_query(adapter, &link_speed,
1950                                           &link_status, 0);
1951         if (status)
1952                 goto err;
1953
1954         if (!link_status) {
1955                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1956                 status = -ENETDOWN;
1957                 goto err;
1958         }
1959
1960         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1961                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1962                         link_speed);
1963                 status = -EINVAL;
1964                 goto err;
1965         }
1966
1967         /* On Skyhawk the QOS setting must be done only as a % value */
1968         percent_rate = link_speed / 100;
1969         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1970                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1971                         percent_rate);
1972                 status = -EINVAL;
1973                 goto err;
1974         }
1975
1976 config_qos:
1977         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1978         if (status)
1979                 goto err;
1980
1981         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1982         return 0;
1983
1984 err:
1985         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1986                 max_tx_rate, vf);
1987         return be_cmd_status(status);
1988 }
1989
1990 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1991                                 int link_state)
1992 {
1993         struct be_adapter *adapter = netdev_priv(netdev);
1994         int status;
1995
1996         if (!sriov_enabled(adapter))
1997                 return -EPERM;
1998
1999         if (vf >= adapter->num_vfs)
2000                 return -EINVAL;
2001
2002         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2003         if (status) {
2004                 dev_err(&adapter->pdev->dev,
2005                         "Link state change on VF %d failed: %#x\n", vf, status);
2006                 return be_cmd_status(status);
2007         }
2008
2009         adapter->vf_cfg[vf].plink_tracking = link_state;
2010
2011         return 0;
2012 }
2013
2014 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2015 {
2016         struct be_adapter *adapter = netdev_priv(netdev);
2017         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2018         u8 spoofchk;
2019         int status;
2020
2021         if (!sriov_enabled(adapter))
2022                 return -EPERM;
2023
2024         if (vf >= adapter->num_vfs)
2025                 return -EINVAL;
2026
2027         if (BEx_chip(adapter))
2028                 return -EOPNOTSUPP;
2029
2030         if (enable == vf_cfg->spoofchk)
2031                 return 0;
2032
2033         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2034
2035         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2036                                        0, spoofchk);
2037         if (status) {
2038                 dev_err(&adapter->pdev->dev,
2039                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2040                 return be_cmd_status(status);
2041         }
2042
2043         vf_cfg->spoofchk = enable;
2044         return 0;
2045 }
2046
2047 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2048                           ulong now)
2049 {
2050         aic->rx_pkts_prev = rx_pkts;
2051         aic->tx_reqs_prev = tx_pkts;
2052         aic->jiffies = now;
2053 }
2054
2055 static int be_get_new_eqd(struct be_eq_obj *eqo)
2056 {
2057         struct be_adapter *adapter = eqo->adapter;
2058         int eqd, start;
2059         struct be_aic_obj *aic;
2060         struct be_rx_obj *rxo;
2061         struct be_tx_obj *txo;
2062         u64 rx_pkts = 0, tx_pkts = 0;
2063         ulong now;
2064         u32 pps, delta;
2065         int i;
2066
2067         aic = &adapter->aic_obj[eqo->idx];
2068         if (!aic->enable) {
2069                 if (aic->jiffies)
2070                         aic->jiffies = 0;
2071                 eqd = aic->et_eqd;
2072                 return eqd;
2073         }
2074
2075         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2076                 do {
2077                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2078                         rx_pkts += rxo->stats.rx_pkts;
2079                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2080         }
2081
2082         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2083                 do {
2084                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2085                         tx_pkts += txo->stats.tx_reqs;
2086                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2087         }
2088
2089         /* Skip, if wrapped around or first calculation */
2090         now = jiffies;
2091         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2092             rx_pkts < aic->rx_pkts_prev ||
2093             tx_pkts < aic->tx_reqs_prev) {
2094                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2095                 return aic->prev_eqd;
2096         }
2097
2098         delta = jiffies_to_msecs(now - aic->jiffies);
2099         if (delta == 0)
2100                 return aic->prev_eqd;
2101
2102         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2103                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2104         eqd = (pps / 15000) << 2;
2105
2106         if (eqd < 8)
2107                 eqd = 0;
2108         eqd = min_t(u32, eqd, aic->max_eqd);
2109         eqd = max_t(u32, eqd, aic->min_eqd);
2110
2111         be_aic_update(aic, rx_pkts, tx_pkts, now);
2112
2113         return eqd;
2114 }
2115
2116 /* For Skyhawk-R only */
2117 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2118 {
2119         struct be_adapter *adapter = eqo->adapter;
2120         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2121         ulong now = jiffies;
2122         int eqd;
2123         u32 mult_enc;
2124
2125         if (!aic->enable)
2126                 return 0;
2127
2128         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2129                 eqd = aic->prev_eqd;
2130         else
2131                 eqd = be_get_new_eqd(eqo);
2132
2133         if (eqd > 100)
2134                 mult_enc = R2I_DLY_ENC_1;
2135         else if (eqd > 60)
2136                 mult_enc = R2I_DLY_ENC_2;
2137         else if (eqd > 20)
2138                 mult_enc = R2I_DLY_ENC_3;
2139         else
2140                 mult_enc = R2I_DLY_ENC_0;
2141
2142         aic->prev_eqd = eqd;
2143
2144         return mult_enc;
2145 }
2146
2147 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2148 {
2149         struct be_set_eqd set_eqd[MAX_EVT_QS];
2150         struct be_aic_obj *aic;
2151         struct be_eq_obj *eqo;
2152         int i, num = 0, eqd;
2153
2154         for_all_evt_queues(adapter, eqo, i) {
2155                 aic = &adapter->aic_obj[eqo->idx];
2156                 eqd = be_get_new_eqd(eqo);
2157                 if (force_update || eqd != aic->prev_eqd) {
2158                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2159                         set_eqd[num].eq_id = eqo->q.id;
2160                         aic->prev_eqd = eqd;
2161                         num++;
2162                 }
2163         }
2164
2165         if (num)
2166                 be_cmd_modify_eqd(adapter, set_eqd, num);
2167 }
2168
2169 static void be_rx_stats_update(struct be_rx_obj *rxo,
2170                                struct be_rx_compl_info *rxcp)
2171 {
2172         struct be_rx_stats *stats = rx_stats(rxo);
2173
2174         u64_stats_update_begin(&stats->sync);
2175         stats->rx_compl++;
2176         stats->rx_bytes += rxcp->pkt_size;
2177         stats->rx_pkts++;
2178         if (rxcp->tunneled)
2179                 stats->rx_vxlan_offload_pkts++;
2180         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2181                 stats->rx_mcast_pkts++;
2182         if (rxcp->err)
2183                 stats->rx_compl_err++;
2184         u64_stats_update_end(&stats->sync);
2185 }
2186
2187 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2188 {
2189         /* L4 checksum is not reliable for non TCP/UDP packets.
2190          * Also ignore ipcksm for ipv6 pkts
2191          */
2192         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2193                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2194 }
2195
2196 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2197 {
2198         struct be_adapter *adapter = rxo->adapter;
2199         struct be_rx_page_info *rx_page_info;
2200         struct be_queue_info *rxq = &rxo->q;
2201         u32 frag_idx = rxq->tail;
2202
2203         rx_page_info = &rxo->page_info_tbl[frag_idx];
2204         BUG_ON(!rx_page_info->page);
2205
2206         if (rx_page_info->last_frag) {
2207                 dma_unmap_page(&adapter->pdev->dev,
2208                                dma_unmap_addr(rx_page_info, bus),
2209                                adapter->big_page_size, DMA_FROM_DEVICE);
2210                 rx_page_info->last_frag = false;
2211         } else {
2212                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2213                                         dma_unmap_addr(rx_page_info, bus),
2214                                         rx_frag_size, DMA_FROM_DEVICE);
2215         }
2216
2217         queue_tail_inc(rxq);
2218         atomic_dec(&rxq->used);
2219         return rx_page_info;
2220 }
2221
2222 /* Throwaway the data in the Rx completion */
2223 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2224                                 struct be_rx_compl_info *rxcp)
2225 {
2226         struct be_rx_page_info *page_info;
2227         u16 i, num_rcvd = rxcp->num_rcvd;
2228
2229         for (i = 0; i < num_rcvd; i++) {
2230                 page_info = get_rx_page_info(rxo);
2231                 put_page(page_info->page);
2232                 memset(page_info, 0, sizeof(*page_info));
2233         }
2234 }
2235
2236 /*
2237  * skb_fill_rx_data forms a complete skb for an ether frame
2238  * indicated by rxcp.
2239  */
2240 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2241                              struct be_rx_compl_info *rxcp)
2242 {
2243         struct be_rx_page_info *page_info;
2244         u16 i, j;
2245         u16 hdr_len, curr_frag_len, remaining;
2246         u8 *start;
2247
2248         page_info = get_rx_page_info(rxo);
2249         start = page_address(page_info->page) + page_info->page_offset;
2250         prefetch(start);
2251
2252         /* Copy data in the first descriptor of this completion */
2253         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2254
2255         skb->len = curr_frag_len;
2256         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2257                 memcpy(skb->data, start, curr_frag_len);
2258                 /* Complete packet has now been moved to data */
2259                 put_page(page_info->page);
2260                 skb->data_len = 0;
2261                 skb->tail += curr_frag_len;
2262         } else {
2263                 hdr_len = ETH_HLEN;
2264                 memcpy(skb->data, start, hdr_len);
2265                 skb_shinfo(skb)->nr_frags = 1;
2266                 skb_frag_set_page(skb, 0, page_info->page);
2267                 skb_shinfo(skb)->frags[0].page_offset =
2268                                         page_info->page_offset + hdr_len;
2269                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2270                                   curr_frag_len - hdr_len);
2271                 skb->data_len = curr_frag_len - hdr_len;
2272                 skb->truesize += rx_frag_size;
2273                 skb->tail += hdr_len;
2274         }
2275         page_info->page = NULL;
2276
2277         if (rxcp->pkt_size <= rx_frag_size) {
2278                 BUG_ON(rxcp->num_rcvd != 1);
2279                 return;
2280         }
2281
2282         /* More frags present for this completion */
2283         remaining = rxcp->pkt_size - curr_frag_len;
2284         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2285                 page_info = get_rx_page_info(rxo);
2286                 curr_frag_len = min(remaining, rx_frag_size);
2287
2288                 /* Coalesce all frags from the same physical page in one slot */
2289                 if (page_info->page_offset == 0) {
2290                         /* Fresh page */
2291                         j++;
2292                         skb_frag_set_page(skb, j, page_info->page);
2293                         skb_shinfo(skb)->frags[j].page_offset =
2294                                                         page_info->page_offset;
2295                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2296                         skb_shinfo(skb)->nr_frags++;
2297                 } else {
2298                         put_page(page_info->page);
2299                 }
2300
2301                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2302                 skb->len += curr_frag_len;
2303                 skb->data_len += curr_frag_len;
2304                 skb->truesize += rx_frag_size;
2305                 remaining -= curr_frag_len;
2306                 page_info->page = NULL;
2307         }
2308         BUG_ON(j > MAX_SKB_FRAGS);
2309 }
2310
2311 /* Process the RX completion indicated by rxcp when GRO is disabled */
2312 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2313                                 struct be_rx_compl_info *rxcp)
2314 {
2315         struct be_adapter *adapter = rxo->adapter;
2316         struct net_device *netdev = adapter->netdev;
2317         struct sk_buff *skb;
2318
2319         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2320         if (unlikely(!skb)) {
2321                 rx_stats(rxo)->rx_drops_no_skbs++;
2322                 be_rx_compl_discard(rxo, rxcp);
2323                 return;
2324         }
2325
2326         skb_fill_rx_data(rxo, skb, rxcp);
2327
2328         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2329                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2330         else
2331                 skb_checksum_none_assert(skb);
2332
2333         skb->protocol = eth_type_trans(skb, netdev);
2334         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2335         if (netdev->features & NETIF_F_RXHASH)
2336                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2337
2338         skb->csum_level = rxcp->tunneled;
2339         skb_mark_napi_id(skb, napi);
2340
2341         if (rxcp->vlanf)
2342                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2343
2344         netif_receive_skb(skb);
2345 }
2346
2347 /* Process the RX completion indicated by rxcp when GRO is enabled */
2348 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2349                                     struct napi_struct *napi,
2350                                     struct be_rx_compl_info *rxcp)
2351 {
2352         struct be_adapter *adapter = rxo->adapter;
2353         struct be_rx_page_info *page_info;
2354         struct sk_buff *skb = NULL;
2355         u16 remaining, curr_frag_len;
2356         u16 i, j;
2357
2358         skb = napi_get_frags(napi);
2359         if (!skb) {
2360                 be_rx_compl_discard(rxo, rxcp);
2361                 return;
2362         }
2363
2364         remaining = rxcp->pkt_size;
2365         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2366                 page_info = get_rx_page_info(rxo);
2367
2368                 curr_frag_len = min(remaining, rx_frag_size);
2369
2370                 /* Coalesce all frags from the same physical page in one slot */
2371                 if (i == 0 || page_info->page_offset == 0) {
2372                         /* First frag or Fresh page */
2373                         j++;
2374                         skb_frag_set_page(skb, j, page_info->page);
2375                         skb_shinfo(skb)->frags[j].page_offset =
2376                                                         page_info->page_offset;
2377                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2378                 } else {
2379                         put_page(page_info->page);
2380                 }
2381                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2382                 skb->truesize += rx_frag_size;
2383                 remaining -= curr_frag_len;
2384                 memset(page_info, 0, sizeof(*page_info));
2385         }
2386         BUG_ON(j > MAX_SKB_FRAGS);
2387
2388         skb_shinfo(skb)->nr_frags = j + 1;
2389         skb->len = rxcp->pkt_size;
2390         skb->data_len = rxcp->pkt_size;
2391         skb->ip_summed = CHECKSUM_UNNECESSARY;
2392         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2393         if (adapter->netdev->features & NETIF_F_RXHASH)
2394                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2395
2396         skb->csum_level = rxcp->tunneled;
2397
2398         if (rxcp->vlanf)
2399                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2400
2401         napi_gro_frags(napi);
2402 }
2403
2404 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2405                                  struct be_rx_compl_info *rxcp)
2406 {
2407         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2408         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2409         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2410         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2411         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2412         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2413         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2414         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2415         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2416         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2417         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2418         if (rxcp->vlanf) {
2419                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2420                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2421         }
2422         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2423         rxcp->tunneled =
2424                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2425 }
2426
2427 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2428                                  struct be_rx_compl_info *rxcp)
2429 {
2430         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2431         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2432         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2433         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2434         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2435         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2436         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2437         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2438         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2439         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2440         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2441         if (rxcp->vlanf) {
2442                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2443                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2444         }
2445         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2446         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2447 }
2448
2449 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2450 {
2451         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2452         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2453         struct be_adapter *adapter = rxo->adapter;
2454
2455         /* For checking the valid bit it is Ok to use either definition as the
2456          * valid bit is at the same position in both v0 and v1 Rx compl */
2457         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2458                 return NULL;
2459
2460         rmb();
2461         be_dws_le_to_cpu(compl, sizeof(*compl));
2462
2463         if (adapter->be3_native)
2464                 be_parse_rx_compl_v1(compl, rxcp);
2465         else
2466                 be_parse_rx_compl_v0(compl, rxcp);
2467
2468         if (rxcp->ip_frag)
2469                 rxcp->l4_csum = 0;
2470
2471         if (rxcp->vlanf) {
2472                 /* In QNQ modes, if qnq bit is not set, then the packet was
2473                  * tagged only with the transparent outer vlan-tag and must
2474                  * not be treated as a vlan packet by host
2475                  */
2476                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2477                         rxcp->vlanf = 0;
2478
2479                 if (!lancer_chip(adapter))
2480                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2481
2482                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2483                     !test_bit(rxcp->vlan_tag, adapter->vids))
2484                         rxcp->vlanf = 0;
2485         }
2486
2487         /* As the compl has been parsed, reset it; we wont touch it again */
2488         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2489
2490         queue_tail_inc(&rxo->cq);
2491         return rxcp;
2492 }
2493
2494 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2495 {
2496         u32 order = get_order(size);
2497
2498         if (order > 0)
2499                 gfp |= __GFP_COMP;
2500         return  alloc_pages(gfp, order);
2501 }
2502
2503 /*
2504  * Allocate a page, split it to fragments of size rx_frag_size and post as
2505  * receive buffers to BE
2506  */
2507 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2508 {
2509         struct be_adapter *adapter = rxo->adapter;
2510         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2511         struct be_queue_info *rxq = &rxo->q;
2512         struct page *pagep = NULL;
2513         struct device *dev = &adapter->pdev->dev;
2514         struct be_eth_rx_d *rxd;
2515         u64 page_dmaaddr = 0, frag_dmaaddr;
2516         u32 posted, page_offset = 0, notify = 0;
2517
2518         page_info = &rxo->page_info_tbl[rxq->head];
2519         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2520                 if (!pagep) {
2521                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2522                         if (unlikely(!pagep)) {
2523                                 rx_stats(rxo)->rx_post_fail++;
2524                                 break;
2525                         }
2526                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2527                                                     adapter->big_page_size,
2528                                                     DMA_FROM_DEVICE);
2529                         if (dma_mapping_error(dev, page_dmaaddr)) {
2530                                 put_page(pagep);
2531                                 pagep = NULL;
2532                                 adapter->drv_stats.dma_map_errors++;
2533                                 break;
2534                         }
2535                         page_offset = 0;
2536                 } else {
2537                         get_page(pagep);
2538                         page_offset += rx_frag_size;
2539                 }
2540                 page_info->page_offset = page_offset;
2541                 page_info->page = pagep;
2542
2543                 rxd = queue_head_node(rxq);
2544                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2545                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2546                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2547
2548                 /* Any space left in the current big page for another frag? */
2549                 if ((page_offset + rx_frag_size + rx_frag_size) >
2550                                         adapter->big_page_size) {
2551                         pagep = NULL;
2552                         page_info->last_frag = true;
2553                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2554                 } else {
2555                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2556                 }
2557
2558                 prev_page_info = page_info;
2559                 queue_head_inc(rxq);
2560                 page_info = &rxo->page_info_tbl[rxq->head];
2561         }
2562
2563         /* Mark the last frag of a page when we break out of the above loop
2564          * with no more slots available in the RXQ
2565          */
2566         if (pagep) {
2567                 prev_page_info->last_frag = true;
2568                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2569         }
2570
2571         if (posted) {
2572                 atomic_add(posted, &rxq->used);
2573                 if (rxo->rx_post_starved)
2574                         rxo->rx_post_starved = false;
2575                 do {
2576                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2577                         be_rxq_notify(adapter, rxq->id, notify);
2578                         posted -= notify;
2579                 } while (posted);
2580         } else if (atomic_read(&rxq->used) == 0) {
2581                 /* Let be_worker replenish when memory is available */
2582                 rxo->rx_post_starved = true;
2583         }
2584 }
2585
2586 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2587 {
2588         struct be_queue_info *tx_cq = &txo->cq;
2589         struct be_tx_compl_info *txcp = &txo->txcp;
2590         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2591
2592         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2593                 return NULL;
2594
2595         /* Ensure load ordering of valid bit dword and other dwords below */
2596         rmb();
2597         be_dws_le_to_cpu(compl, sizeof(*compl));
2598
2599         txcp->status = GET_TX_COMPL_BITS(status, compl);
2600         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2601
2602         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2603         queue_tail_inc(tx_cq);
2604         return txcp;
2605 }
2606
2607 static u16 be_tx_compl_process(struct be_adapter *adapter,
2608                                struct be_tx_obj *txo, u16 last_index)
2609 {
2610         struct sk_buff **sent_skbs = txo->sent_skb_list;
2611         struct be_queue_info *txq = &txo->q;
2612         struct sk_buff *skb = NULL;
2613         bool unmap_skb_hdr = false;
2614         struct be_eth_wrb *wrb;
2615         u16 num_wrbs = 0;
2616         u32 frag_index;
2617
2618         do {
2619                 if (sent_skbs[txq->tail]) {
2620                         /* Free skb from prev req */
2621                         if (skb)
2622                                 dev_consume_skb_any(skb);
2623                         skb = sent_skbs[txq->tail];
2624                         sent_skbs[txq->tail] = NULL;
2625                         queue_tail_inc(txq);  /* skip hdr wrb */
2626                         num_wrbs++;
2627                         unmap_skb_hdr = true;
2628                 }
2629                 wrb = queue_tail_node(txq);
2630                 frag_index = txq->tail;
2631                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2632                               (unmap_skb_hdr && skb_headlen(skb)));
2633                 unmap_skb_hdr = false;
2634                 queue_tail_inc(txq);
2635                 num_wrbs++;
2636         } while (frag_index != last_index);
2637         dev_consume_skb_any(skb);
2638
2639         return num_wrbs;
2640 }
2641
2642 /* Return the number of events in the event queue */
2643 static inline int events_get(struct be_eq_obj *eqo)
2644 {
2645         struct be_eq_entry *eqe;
2646         int num = 0;
2647
2648         do {
2649                 eqe = queue_tail_node(&eqo->q);
2650                 if (eqe->evt == 0)
2651                         break;
2652
2653                 rmb();
2654                 eqe->evt = 0;
2655                 num++;
2656                 queue_tail_inc(&eqo->q);
2657         } while (true);
2658
2659         return num;
2660 }
2661
2662 /* Leaves the EQ is disarmed state */
2663 static void be_eq_clean(struct be_eq_obj *eqo)
2664 {
2665         int num = events_get(eqo);
2666
2667         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2668 }
2669
2670 /* Free posted rx buffers that were not used */
2671 static void be_rxq_clean(struct be_rx_obj *rxo)
2672 {
2673         struct be_queue_info *rxq = &rxo->q;
2674         struct be_rx_page_info *page_info;
2675
2676         while (atomic_read(&rxq->used) > 0) {
2677                 page_info = get_rx_page_info(rxo);
2678                 put_page(page_info->page);
2679                 memset(page_info, 0, sizeof(*page_info));
2680         }
2681         BUG_ON(atomic_read(&rxq->used));
2682         rxq->tail = 0;
2683         rxq->head = 0;
2684 }
2685
2686 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2687 {
2688         struct be_queue_info *rx_cq = &rxo->cq;
2689         struct be_rx_compl_info *rxcp;
2690         struct be_adapter *adapter = rxo->adapter;
2691         int flush_wait = 0;
2692
2693         /* Consume pending rx completions.
2694          * Wait for the flush completion (identified by zero num_rcvd)
2695          * to arrive. Notify CQ even when there are no more CQ entries
2696          * for HW to flush partially coalesced CQ entries.
2697          * In Lancer, there is no need to wait for flush compl.
2698          */
2699         for (;;) {
2700                 rxcp = be_rx_compl_get(rxo);
2701                 if (!rxcp) {
2702                         if (lancer_chip(adapter))
2703                                 break;
2704
2705                         if (flush_wait++ > 50 ||
2706                             be_check_error(adapter,
2707                                            BE_ERROR_HW)) {
2708                                 dev_warn(&adapter->pdev->dev,
2709                                          "did not receive flush compl\n");
2710                                 break;
2711                         }
2712                         be_cq_notify(adapter, rx_cq->id, true, 0);
2713                         mdelay(1);
2714                 } else {
2715                         be_rx_compl_discard(rxo, rxcp);
2716                         be_cq_notify(adapter, rx_cq->id, false, 1);
2717                         if (rxcp->num_rcvd == 0)
2718                                 break;
2719                 }
2720         }
2721
2722         /* After cleanup, leave the CQ in unarmed state */
2723         be_cq_notify(adapter, rx_cq->id, false, 0);
2724 }
2725
2726 static void be_tx_compl_clean(struct be_adapter *adapter)
2727 {
2728         struct device *dev = &adapter->pdev->dev;
2729         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2730         struct be_tx_compl_info *txcp;
2731         struct be_queue_info *txq;
2732         u32 end_idx, notified_idx;
2733         struct be_tx_obj *txo;
2734         int i, pending_txqs;
2735
2736         /* Stop polling for compls when HW has been silent for 10ms */
2737         do {
2738                 pending_txqs = adapter->num_tx_qs;
2739
2740                 for_all_tx_queues(adapter, txo, i) {
2741                         cmpl = 0;
2742                         num_wrbs = 0;
2743                         txq = &txo->q;
2744                         while ((txcp = be_tx_compl_get(txo))) {
2745                                 num_wrbs +=
2746                                         be_tx_compl_process(adapter, txo,
2747                                                             txcp->end_index);
2748                                 cmpl++;
2749                         }
2750                         if (cmpl) {
2751                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2752                                 atomic_sub(num_wrbs, &txq->used);
2753                                 timeo = 0;
2754                         }
2755                         if (!be_is_tx_compl_pending(txo))
2756                                 pending_txqs--;
2757                 }
2758
2759                 if (pending_txqs == 0 || ++timeo > 10 ||
2760                     be_check_error(adapter, BE_ERROR_HW))
2761                         break;
2762
2763                 mdelay(1);
2764         } while (true);
2765
2766         /* Free enqueued TX that was never notified to HW */
2767         for_all_tx_queues(adapter, txo, i) {
2768                 txq = &txo->q;
2769
2770                 if (atomic_read(&txq->used)) {
2771                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2772                                  i, atomic_read(&txq->used));
2773                         notified_idx = txq->tail;
2774                         end_idx = txq->tail;
2775                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2776                                   txq->len);
2777                         /* Use the tx-compl process logic to handle requests
2778                          * that were not sent to the HW.
2779                          */
2780                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2781                         atomic_sub(num_wrbs, &txq->used);
2782                         BUG_ON(atomic_read(&txq->used));
2783                         txo->pend_wrb_cnt = 0;
2784                         /* Since hw was never notified of these requests,
2785                          * reset TXQ indices
2786                          */
2787                         txq->head = notified_idx;
2788                         txq->tail = notified_idx;
2789                 }
2790         }
2791 }
2792
2793 static void be_evt_queues_destroy(struct be_adapter *adapter)
2794 {
2795         struct be_eq_obj *eqo;
2796         int i;
2797
2798         for_all_evt_queues(adapter, eqo, i) {
2799                 if (eqo->q.created) {
2800                         be_eq_clean(eqo);
2801                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2802                         napi_hash_del(&eqo->napi);
2803                         netif_napi_del(&eqo->napi);
2804                         free_cpumask_var(eqo->affinity_mask);
2805                 }
2806                 be_queue_free(adapter, &eqo->q);
2807         }
2808 }
2809
2810 static int be_evt_queues_create(struct be_adapter *adapter)
2811 {
2812         struct be_queue_info *eq;
2813         struct be_eq_obj *eqo;
2814         struct be_aic_obj *aic;
2815         int i, rc;
2816
2817         /* need enough EQs to service both RX and TX queues */
2818         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2819                                     max(adapter->cfg_num_rx_irqs,
2820                                         adapter->cfg_num_tx_irqs));
2821
2822         for_all_evt_queues(adapter, eqo, i) {
2823                 int numa_node = dev_to_node(&adapter->pdev->dev);
2824
2825                 aic = &adapter->aic_obj[i];
2826                 eqo->adapter = adapter;
2827                 eqo->idx = i;
2828                 aic->max_eqd = BE_MAX_EQD;
2829                 aic->enable = true;
2830
2831                 eq = &eqo->q;
2832                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2833                                     sizeof(struct be_eq_entry));
2834                 if (rc)
2835                         return rc;
2836
2837                 rc = be_cmd_eq_create(adapter, eqo);
2838                 if (rc)
2839                         return rc;
2840
2841                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2842                         return -ENOMEM;
2843                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2844                                 eqo->affinity_mask);
2845                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2846                                BE_NAPI_WEIGHT);
2847         }
2848         return 0;
2849 }
2850
2851 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2852 {
2853         struct be_queue_info *q;
2854
2855         q = &adapter->mcc_obj.q;
2856         if (q->created)
2857                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2858         be_queue_free(adapter, q);
2859
2860         q = &adapter->mcc_obj.cq;
2861         if (q->created)
2862                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2863         be_queue_free(adapter, q);
2864 }
2865
2866 /* Must be called only after TX qs are created as MCC shares TX EQ */
2867 static int be_mcc_queues_create(struct be_adapter *adapter)
2868 {
2869         struct be_queue_info *q, *cq;
2870
2871         cq = &adapter->mcc_obj.cq;
2872         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2873                            sizeof(struct be_mcc_compl)))
2874                 goto err;
2875
2876         /* Use the default EQ for MCC completions */
2877         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2878                 goto mcc_cq_free;
2879
2880         q = &adapter->mcc_obj.q;
2881         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2882                 goto mcc_cq_destroy;
2883
2884         if (be_cmd_mccq_create(adapter, q, cq))
2885                 goto mcc_q_free;
2886
2887         return 0;
2888
2889 mcc_q_free:
2890         be_queue_free(adapter, q);
2891 mcc_cq_destroy:
2892         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2893 mcc_cq_free:
2894         be_queue_free(adapter, cq);
2895 err:
2896         return -1;
2897 }
2898
2899 static void be_tx_queues_destroy(struct be_adapter *adapter)
2900 {
2901         struct be_queue_info *q;
2902         struct be_tx_obj *txo;
2903         u8 i;
2904
2905         for_all_tx_queues(adapter, txo, i) {
2906                 q = &txo->q;
2907                 if (q->created)
2908                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2909                 be_queue_free(adapter, q);
2910
2911                 q = &txo->cq;
2912                 if (q->created)
2913                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2914                 be_queue_free(adapter, q);
2915         }
2916 }
2917
2918 static int be_tx_qs_create(struct be_adapter *adapter)
2919 {
2920         struct be_queue_info *cq;
2921         struct be_tx_obj *txo;
2922         struct be_eq_obj *eqo;
2923         int status, i;
2924
2925         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2926
2927         for_all_tx_queues(adapter, txo, i) {
2928                 cq = &txo->cq;
2929                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2930                                         sizeof(struct be_eth_tx_compl));
2931                 if (status)
2932                         return status;
2933
2934                 u64_stats_init(&txo->stats.sync);
2935                 u64_stats_init(&txo->stats.sync_compl);
2936
2937                 /* If num_evt_qs is less than num_tx_qs, then more than
2938                  * one txq share an eq
2939                  */
2940                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2941                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2942                 if (status)
2943                         return status;
2944
2945                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2946                                         sizeof(struct be_eth_wrb));
2947                 if (status)
2948                         return status;
2949
2950                 status = be_cmd_txq_create(adapter, txo);
2951                 if (status)
2952                         return status;
2953
2954                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2955                                     eqo->idx);
2956         }
2957
2958         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2959                  adapter->num_tx_qs);
2960         return 0;
2961 }
2962
2963 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2964 {
2965         struct be_queue_info *q;
2966         struct be_rx_obj *rxo;
2967         int i;
2968
2969         for_all_rx_queues(adapter, rxo, i) {
2970                 q = &rxo->cq;
2971                 if (q->created)
2972                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2973                 be_queue_free(adapter, q);
2974         }
2975 }
2976
2977 static int be_rx_cqs_create(struct be_adapter *adapter)
2978 {
2979         struct be_queue_info *eq, *cq;
2980         struct be_rx_obj *rxo;
2981         int rc, i;
2982
2983         adapter->num_rss_qs =
2984                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2985
2986         /* We'll use RSS only if atleast 2 RSS rings are supported. */
2987         if (adapter->num_rss_qs < 2)
2988                 adapter->num_rss_qs = 0;
2989
2990         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2991
2992         /* When the interface is not capable of RSS rings (and there is no
2993          * need to create a default RXQ) we'll still need one RXQ
2994          */
2995         if (adapter->num_rx_qs == 0)
2996                 adapter->num_rx_qs = 1;
2997
2998         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2999         for_all_rx_queues(adapter, rxo, i) {
3000                 rxo->adapter = adapter;
3001                 cq = &rxo->cq;
3002                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3003                                     sizeof(struct be_eth_rx_compl));
3004                 if (rc)
3005                         return rc;
3006
3007                 u64_stats_init(&rxo->stats.sync);
3008                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3009                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3010                 if (rc)
3011                         return rc;
3012         }
3013
3014         dev_info(&adapter->pdev->dev,
3015                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3016         return 0;
3017 }
3018
3019 static irqreturn_t be_intx(int irq, void *dev)
3020 {
3021         struct be_eq_obj *eqo = dev;
3022         struct be_adapter *adapter = eqo->adapter;
3023         int num_evts = 0;
3024
3025         /* IRQ is not expected when NAPI is scheduled as the EQ
3026          * will not be armed.
3027          * But, this can happen on Lancer INTx where it takes
3028          * a while to de-assert INTx or in BE2 where occasionaly
3029          * an interrupt may be raised even when EQ is unarmed.
3030          * If NAPI is already scheduled, then counting & notifying
3031          * events will orphan them.
3032          */
3033         if (napi_schedule_prep(&eqo->napi)) {
3034                 num_evts = events_get(eqo);
3035                 __napi_schedule(&eqo->napi);
3036                 if (num_evts)
3037                         eqo->spurious_intr = 0;
3038         }
3039         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3040
3041         /* Return IRQ_HANDLED only for the the first spurious intr
3042          * after a valid intr to stop the kernel from branding
3043          * this irq as a bad one!
3044          */
3045         if (num_evts || eqo->spurious_intr++ == 0)
3046                 return IRQ_HANDLED;
3047         else
3048                 return IRQ_NONE;
3049 }
3050
3051 static irqreturn_t be_msix(int irq, void *dev)
3052 {
3053         struct be_eq_obj *eqo = dev;
3054
3055         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3056         napi_schedule(&eqo->napi);
3057         return IRQ_HANDLED;
3058 }
3059
3060 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3061 {
3062         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3063 }
3064
3065 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3066                          int budget, int polling)
3067 {
3068         struct be_adapter *adapter = rxo->adapter;
3069         struct be_queue_info *rx_cq = &rxo->cq;
3070         struct be_rx_compl_info *rxcp;
3071         u32 work_done;
3072         u32 frags_consumed = 0;
3073
3074         for (work_done = 0; work_done < budget; work_done++) {
3075                 rxcp = be_rx_compl_get(rxo);
3076                 if (!rxcp)
3077                         break;
3078
3079                 /* Is it a flush compl that has no data */
3080                 if (unlikely(rxcp->num_rcvd == 0))
3081                         goto loop_continue;
3082
3083                 /* Discard compl with partial DMA Lancer B0 */
3084                 if (unlikely(!rxcp->pkt_size)) {
3085                         be_rx_compl_discard(rxo, rxcp);
3086                         goto loop_continue;
3087                 }
3088
3089                 /* On BE drop pkts that arrive due to imperfect filtering in
3090                  * promiscuous mode on some skews
3091                  */
3092                 if (unlikely(rxcp->port != adapter->port_num &&
3093                              !lancer_chip(adapter))) {
3094                         be_rx_compl_discard(rxo, rxcp);
3095                         goto loop_continue;
3096                 }
3097
3098                 /* Don't do gro when we're busy_polling */
3099                 if (do_gro(rxcp) && polling != BUSY_POLLING)
3100                         be_rx_compl_process_gro(rxo, napi, rxcp);
3101                 else
3102                         be_rx_compl_process(rxo, napi, rxcp);
3103
3104 loop_continue:
3105                 frags_consumed += rxcp->num_rcvd;
3106                 be_rx_stats_update(rxo, rxcp);
3107         }
3108
3109         if (work_done) {
3110                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3111
3112                 /* When an rx-obj gets into post_starved state, just
3113                  * let be_worker do the posting.
3114                  */
3115                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3116                     !rxo->rx_post_starved)
3117                         be_post_rx_frags(rxo, GFP_ATOMIC,
3118                                          max_t(u32, MAX_RX_POST,
3119                                                frags_consumed));
3120         }
3121
3122         return work_done;
3123 }
3124
3125 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3126 {
3127         switch (status) {
3128         case BE_TX_COMP_HDR_PARSE_ERR:
3129                 tx_stats(txo)->tx_hdr_parse_err++;
3130                 break;
3131         case BE_TX_COMP_NDMA_ERR:
3132                 tx_stats(txo)->tx_dma_err++;
3133                 break;
3134         case BE_TX_COMP_ACL_ERR:
3135                 tx_stats(txo)->tx_spoof_check_err++;
3136                 break;
3137         }
3138 }
3139
3140 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3141 {
3142         switch (status) {
3143         case LANCER_TX_COMP_LSO_ERR:
3144                 tx_stats(txo)->tx_tso_err++;
3145                 break;
3146         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3147         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3148                 tx_stats(txo)->tx_spoof_check_err++;
3149                 break;
3150         case LANCER_TX_COMP_QINQ_ERR:
3151                 tx_stats(txo)->tx_qinq_err++;
3152                 break;
3153         case LANCER_TX_COMP_PARITY_ERR:
3154                 tx_stats(txo)->tx_internal_parity_err++;
3155                 break;
3156         case LANCER_TX_COMP_DMA_ERR:
3157                 tx_stats(txo)->tx_dma_err++;
3158                 break;
3159         }
3160 }
3161
3162 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3163                           int idx)
3164 {
3165         int num_wrbs = 0, work_done = 0;
3166         struct be_tx_compl_info *txcp;
3167
3168         while ((txcp = be_tx_compl_get(txo))) {
3169                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3170                 work_done++;
3171
3172                 if (txcp->status) {
3173                         if (lancer_chip(adapter))
3174                                 lancer_update_tx_err(txo, txcp->status);
3175                         else
3176                                 be_update_tx_err(txo, txcp->status);
3177                 }
3178         }
3179
3180         if (work_done) {
3181                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3182                 atomic_sub(num_wrbs, &txo->q.used);
3183
3184                 /* As Tx wrbs have been freed up, wake up netdev queue
3185                  * if it was stopped due to lack of tx wrbs.  */
3186                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3187                     be_can_txq_wake(txo)) {
3188                         netif_wake_subqueue(adapter->netdev, idx);
3189                 }
3190
3191                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3192                 tx_stats(txo)->tx_compl += work_done;
3193                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3194         }
3195 }
3196
3197 #ifdef CONFIG_NET_RX_BUSY_POLL
3198 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3199 {
3200         bool status = true;
3201
3202         spin_lock(&eqo->lock); /* BH is already disabled */
3203         if (eqo->state & BE_EQ_LOCKED) {
3204                 WARN_ON(eqo->state & BE_EQ_NAPI);
3205                 eqo->state |= BE_EQ_NAPI_YIELD;
3206                 status = false;
3207         } else {
3208                 eqo->state = BE_EQ_NAPI;
3209         }
3210         spin_unlock(&eqo->lock);
3211         return status;
3212 }
3213
3214 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3215 {
3216         spin_lock(&eqo->lock); /* BH is already disabled */
3217
3218         WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3219         eqo->state = BE_EQ_IDLE;
3220
3221         spin_unlock(&eqo->lock);
3222 }
3223
3224 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3225 {
3226         bool status = true;
3227
3228         spin_lock_bh(&eqo->lock);
3229         if (eqo->state & BE_EQ_LOCKED) {
3230                 eqo->state |= BE_EQ_POLL_YIELD;
3231                 status = false;
3232         } else {
3233                 eqo->state |= BE_EQ_POLL;
3234         }
3235         spin_unlock_bh(&eqo->lock);
3236         return status;
3237 }
3238
3239 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3240 {
3241         spin_lock_bh(&eqo->lock);
3242
3243         WARN_ON(eqo->state & (BE_EQ_NAPI));
3244         eqo->state = BE_EQ_IDLE;
3245
3246         spin_unlock_bh(&eqo->lock);
3247 }
3248
3249 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3250 {
3251         spin_lock_init(&eqo->lock);
3252         eqo->state = BE_EQ_IDLE;
3253 }
3254
3255 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3256 {
3257         local_bh_disable();
3258
3259         /* It's enough to just acquire napi lock on the eqo to stop
3260          * be_busy_poll() from processing any queueus.
3261          */
3262         while (!be_lock_napi(eqo))
3263                 mdelay(1);
3264
3265         local_bh_enable();
3266 }
3267
3268 #else /* CONFIG_NET_RX_BUSY_POLL */
3269
3270 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3271 {
3272         return true;
3273 }
3274
3275 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3276 {
3277 }
3278
3279 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3280 {
3281         return false;
3282 }
3283
3284 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3285 {
3286 }
3287
3288 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3289 {
3290 }
3291
3292 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3293 {
3294 }
3295 #endif /* CONFIG_NET_RX_BUSY_POLL */
3296
3297 int be_poll(struct napi_struct *napi, int budget)
3298 {
3299         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3300         struct be_adapter *adapter = eqo->adapter;
3301         int max_work = 0, work, i, num_evts;
3302         struct be_rx_obj *rxo;
3303         struct be_tx_obj *txo;
3304         u32 mult_enc = 0;
3305
3306         num_evts = events_get(eqo);
3307
3308         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3309                 be_process_tx(adapter, txo, i);
3310
3311         if (be_lock_napi(eqo)) {
3312                 /* This loop will iterate twice for EQ0 in which
3313                  * completions of the last RXQ (default one) are also processed
3314                  * For other EQs the loop iterates only once
3315                  */
3316                 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3317                         work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3318                         max_work = max(work, max_work);
3319                 }
3320                 be_unlock_napi(eqo);
3321         } else {
3322                 max_work = budget;
3323         }
3324
3325         if (is_mcc_eqo(eqo))
3326                 be_process_mcc(adapter);
3327
3328         if (max_work < budget) {
3329                 napi_complete(napi);
3330
3331                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3332                  * delay via a delay multiplier encoding value
3333                  */
3334                 if (skyhawk_chip(adapter))
3335                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3336
3337                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3338                              mult_enc);
3339         } else {
3340                 /* As we'll continue in polling mode, count and clear events */
3341                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3342         }
3343         return max_work;
3344 }
3345
3346 #ifdef CONFIG_NET_RX_BUSY_POLL
3347 static int be_busy_poll(struct napi_struct *napi)
3348 {
3349         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3350         struct be_adapter *adapter = eqo->adapter;
3351         struct be_rx_obj *rxo;
3352         int i, work = 0;
3353
3354         if (!be_lock_busy_poll(eqo))
3355                 return LL_FLUSH_BUSY;
3356
3357         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3358                 work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3359                 if (work)
3360                         break;
3361         }
3362
3363         be_unlock_busy_poll(eqo);
3364         return work;
3365 }
3366 #endif
3367
3368 void be_detect_error(struct be_adapter *adapter)
3369 {
3370         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3371         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3372         u32 i;
3373         struct device *dev = &adapter->pdev->dev;
3374
3375         if (be_check_error(adapter, BE_ERROR_HW))
3376                 return;
3377
3378         if (lancer_chip(adapter)) {
3379                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3380                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3381                         be_set_error(adapter, BE_ERROR_UE);
3382                         sliport_err1 = ioread32(adapter->db +
3383                                                 SLIPORT_ERROR1_OFFSET);
3384                         sliport_err2 = ioread32(adapter->db +
3385                                                 SLIPORT_ERROR2_OFFSET);
3386                         /* Do not log error messages if its a FW reset */
3387                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3388                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3389                                 dev_info(dev, "Firmware update in progress\n");
3390                         } else {
3391                                 dev_err(dev, "Error detected in the card\n");
3392                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3393                                         sliport_status);
3394                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3395                                         sliport_err1);
3396                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3397                                         sliport_err2);
3398                         }
3399                 }
3400         } else {
3401                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3402                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3403                 ue_lo_mask = ioread32(adapter->pcicfg +
3404                                       PCICFG_UE_STATUS_LOW_MASK);
3405                 ue_hi_mask = ioread32(adapter->pcicfg +
3406                                       PCICFG_UE_STATUS_HI_MASK);
3407
3408                 ue_lo = (ue_lo & ~ue_lo_mask);
3409                 ue_hi = (ue_hi & ~ue_hi_mask);
3410
3411                 /* On certain platforms BE hardware can indicate spurious UEs.
3412                  * Allow HW to stop working completely in case of a real UE.
3413                  * Hence not setting the hw_error for UE detection.
3414                  */
3415
3416                 if (ue_lo || ue_hi) {
3417                         dev_err(dev, "Error detected in the adapter");
3418                         if (skyhawk_chip(adapter))
3419                                 be_set_error(adapter, BE_ERROR_UE);
3420
3421                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3422                                 if (ue_lo & 1)
3423                                         dev_err(dev, "UE: %s bit set\n",
3424                                                 ue_status_low_desc[i]);
3425                         }
3426                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3427                                 if (ue_hi & 1)
3428                                         dev_err(dev, "UE: %s bit set\n",
3429                                                 ue_status_hi_desc[i]);
3430                         }
3431                 }
3432         }
3433 }
3434
3435 static void be_msix_disable(struct be_adapter *adapter)
3436 {
3437         if (msix_enabled(adapter)) {
3438                 pci_disable_msix(adapter->pdev);
3439                 adapter->num_msix_vec = 0;
3440                 adapter->num_msix_roce_vec = 0;
3441         }
3442 }
3443
3444 static int be_msix_enable(struct be_adapter *adapter)
3445 {
3446         unsigned int i, max_roce_eqs;
3447         struct device *dev = &adapter->pdev->dev;
3448         int num_vec;
3449
3450         /* If RoCE is supported, program the max number of vectors that
3451          * could be used for NIC and RoCE, else, just program the number
3452          * we'll use initially.
3453          */
3454         if (be_roce_supported(adapter)) {
3455                 max_roce_eqs =
3456                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3457                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3458                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3459         } else {
3460                 num_vec = max(adapter->cfg_num_rx_irqs,
3461                               adapter->cfg_num_tx_irqs);
3462         }
3463
3464         for (i = 0; i < num_vec; i++)
3465                 adapter->msix_entries[i].entry = i;
3466
3467         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3468                                         MIN_MSIX_VECTORS, num_vec);
3469         if (num_vec < 0)
3470                 goto fail;
3471
3472         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3473                 adapter->num_msix_roce_vec = num_vec / 2;
3474                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3475                          adapter->num_msix_roce_vec);
3476         }
3477
3478         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3479
3480         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3481                  adapter->num_msix_vec);
3482         return 0;
3483
3484 fail:
3485         dev_warn(dev, "MSIx enable failed\n");
3486
3487         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3488         if (be_virtfn(adapter))
3489                 return num_vec;
3490         return 0;
3491 }
3492
3493 static inline int be_msix_vec_get(struct be_adapter *adapter,
3494                                   struct be_eq_obj *eqo)
3495 {
3496         return adapter->msix_entries[eqo->msix_idx].vector;
3497 }
3498
3499 static int be_msix_register(struct be_adapter *adapter)
3500 {
3501         struct net_device *netdev = adapter->netdev;
3502         struct be_eq_obj *eqo;
3503         int status, i, vec;
3504
3505         for_all_evt_queues(adapter, eqo, i) {
3506                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3507                 vec = be_msix_vec_get(adapter, eqo);
3508                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3509                 if (status)
3510                         goto err_msix;
3511
3512                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3513         }
3514
3515         return 0;
3516 err_msix:
3517         for (i--; i >= 0; i--) {
3518                 eqo = &adapter->eq_obj[i];
3519                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3520         }
3521         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3522                  status);
3523         be_msix_disable(adapter);
3524         return status;
3525 }
3526
3527 static int be_irq_register(struct be_adapter *adapter)
3528 {
3529         struct net_device *netdev = adapter->netdev;
3530         int status;
3531
3532         if (msix_enabled(adapter)) {
3533                 status = be_msix_register(adapter);
3534                 if (status == 0)
3535                         goto done;
3536                 /* INTx is not supported for VF */
3537                 if (be_virtfn(adapter))
3538                         return status;
3539         }
3540
3541         /* INTx: only the first EQ is used */
3542         netdev->irq = adapter->pdev->irq;
3543         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3544                              &adapter->eq_obj[0]);
3545         if (status) {
3546                 dev_err(&adapter->pdev->dev,
3547                         "INTx request IRQ failed - err %d\n", status);
3548                 return status;
3549         }
3550 done:
3551         adapter->isr_registered = true;
3552         return 0;
3553 }
3554
3555 static void be_irq_unregister(struct be_adapter *adapter)
3556 {
3557         struct net_device *netdev = adapter->netdev;
3558         struct be_eq_obj *eqo;
3559         int i, vec;
3560
3561         if (!adapter->isr_registered)
3562                 return;
3563
3564         /* INTx */
3565         if (!msix_enabled(adapter)) {
3566                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3567                 goto done;
3568         }
3569
3570         /* MSIx */
3571         for_all_evt_queues(adapter, eqo, i) {
3572                 vec = be_msix_vec_get(adapter, eqo);
3573                 irq_set_affinity_hint(vec, NULL);
3574                 free_irq(vec, eqo);
3575         }
3576
3577 done:
3578         adapter->isr_registered = false;
3579 }
3580
3581 static void be_rx_qs_destroy(struct be_adapter *adapter)
3582 {
3583         struct rss_info *rss = &adapter->rss_info;
3584         struct be_queue_info *q;
3585         struct be_rx_obj *rxo;
3586         int i;
3587
3588         for_all_rx_queues(adapter, rxo, i) {
3589                 q = &rxo->q;
3590                 if (q->created) {
3591                         /* If RXQs are destroyed while in an "out of buffer"
3592                          * state, there is a possibility of an HW stall on
3593                          * Lancer. So, post 64 buffers to each queue to relieve
3594                          * the "out of buffer" condition.
3595                          * Make sure there's space in the RXQ before posting.
3596                          */
3597                         if (lancer_chip(adapter)) {
3598                                 be_rx_cq_clean(rxo);
3599                                 if (atomic_read(&q->used) == 0)
3600                                         be_post_rx_frags(rxo, GFP_KERNEL,
3601                                                          MAX_RX_POST);
3602                         }
3603
3604                         be_cmd_rxq_destroy(adapter, q);
3605                         be_rx_cq_clean(rxo);
3606                         be_rxq_clean(rxo);
3607                 }
3608                 be_queue_free(adapter, q);
3609         }
3610
3611         if (rss->rss_flags) {
3612                 rss->rss_flags = RSS_ENABLE_NONE;
3613                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3614                                   128, rss->rss_hkey);
3615         }
3616 }
3617
3618 static void be_disable_if_filters(struct be_adapter *adapter)
3619 {
3620         be_dev_mac_del(adapter, adapter->pmac_id[0]);
3621         be_clear_uc_list(adapter);
3622         be_clear_mc_list(adapter);
3623
3624         /* The IFACE flags are enabled in the open path and cleared
3625          * in the close path. When a VF gets detached from the host and
3626          * assigned to a VM the following happens:
3627          *      - VF's IFACE flags get cleared in the detach path
3628          *      - IFACE create is issued by the VF in the attach path
3629          * Due to a bug in the BE3/Skyhawk-R FW
3630          * (Lancer FW doesn't have the bug), the IFACE capability flags
3631          * specified along with the IFACE create cmd issued by a VF are not
3632          * honoured by FW.  As a consequence, if a *new* driver
3633          * (that enables/disables IFACE flags in open/close)
3634          * is loaded in the host and an *old* driver is * used by a VM/VF,
3635          * the IFACE gets created *without* the needed flags.
3636          * To avoid this, disable RX-filter flags only for Lancer.
3637          */
3638         if (lancer_chip(adapter)) {
3639                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3640                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3641         }
3642 }
3643
3644 static int be_close(struct net_device *netdev)
3645 {
3646         struct be_adapter *adapter = netdev_priv(netdev);
3647         struct be_eq_obj *eqo;
3648         int i;
3649
3650         /* This protection is needed as be_close() may be called even when the
3651          * adapter is in cleared state (after eeh perm failure)
3652          */
3653         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3654                 return 0;
3655
3656         /* Before attempting cleanup ensure all the pending cmds in the
3657          * config_wq have finished execution
3658          */
3659         flush_workqueue(be_wq);
3660
3661         be_disable_if_filters(adapter);
3662
3663         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3664                 for_all_evt_queues(adapter, eqo, i) {
3665                         napi_disable(&eqo->napi);
3666                         be_disable_busy_poll(eqo);
3667                 }
3668                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3669         }
3670
3671         be_async_mcc_disable(adapter);
3672
3673         /* Wait for all pending tx completions to arrive so that
3674          * all tx skbs are freed.
3675          */
3676         netif_tx_disable(netdev);
3677         be_tx_compl_clean(adapter);
3678
3679         be_rx_qs_destroy(adapter);
3680
3681         for_all_evt_queues(adapter, eqo, i) {
3682                 if (msix_enabled(adapter))
3683                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3684                 else
3685                         synchronize_irq(netdev->irq);
3686                 be_eq_clean(eqo);
3687         }
3688
3689         be_irq_unregister(adapter);
3690
3691         return 0;
3692 }
3693
3694 static int be_rx_qs_create(struct be_adapter *adapter)
3695 {
3696         struct rss_info *rss = &adapter->rss_info;
3697         u8 rss_key[RSS_HASH_KEY_LEN];
3698         struct be_rx_obj *rxo;
3699         int rc, i, j;
3700
3701         for_all_rx_queues(adapter, rxo, i) {
3702                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3703                                     sizeof(struct be_eth_rx_d));
3704                 if (rc)
3705                         return rc;
3706         }
3707
3708         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3709                 rxo = default_rxo(adapter);
3710                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3711                                        rx_frag_size, adapter->if_handle,
3712                                        false, &rxo->rss_id);
3713                 if (rc)
3714                         return rc;
3715         }
3716
3717         for_all_rss_queues(adapter, rxo, i) {
3718                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3719                                        rx_frag_size, adapter->if_handle,
3720                                        true, &rxo->rss_id);
3721                 if (rc)
3722                         return rc;
3723         }
3724
3725         if (be_multi_rxq(adapter)) {
3726                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3727                         for_all_rss_queues(adapter, rxo, i) {
3728                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3729                                         break;
3730                                 rss->rsstable[j + i] = rxo->rss_id;
3731                                 rss->rss_queue[j + i] = i;
3732                         }
3733                 }
3734                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3735                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3736
3737                 if (!BEx_chip(adapter))
3738                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3739                                 RSS_ENABLE_UDP_IPV6;
3740
3741                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3742                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3743                                        RSS_INDIR_TABLE_LEN, rss_key);
3744                 if (rc) {
3745                         rss->rss_flags = RSS_ENABLE_NONE;
3746                         return rc;
3747                 }
3748
3749                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3750         } else {
3751                 /* Disable RSS, if only default RX Q is created */
3752                 rss->rss_flags = RSS_ENABLE_NONE;
3753         }
3754
3755
3756         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3757          * which is a queue empty condition
3758          */
3759         for_all_rx_queues(adapter, rxo, i)
3760                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3761
3762         return 0;
3763 }
3764
3765 static int be_enable_if_filters(struct be_adapter *adapter)
3766 {
3767         int status;
3768
3769         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3770         if (status)
3771                 return status;
3772
3773         /* For BE3 VFs, the PF programs the initial MAC address */
3774         if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
3775                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3776                 if (status)
3777                         return status;
3778                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3779         }
3780
3781         if (adapter->vlans_added)
3782                 be_vid_config(adapter);
3783
3784         __be_set_rx_mode(adapter);
3785
3786         return 0;
3787 }
3788
3789 static int be_open(struct net_device *netdev)
3790 {
3791         struct be_adapter *adapter = netdev_priv(netdev);
3792         struct be_eq_obj *eqo;
3793         struct be_rx_obj *rxo;
3794         struct be_tx_obj *txo;
3795         u8 link_status;
3796         int status, i;
3797
3798         status = be_rx_qs_create(adapter);
3799         if (status)
3800                 goto err;
3801
3802         status = be_enable_if_filters(adapter);
3803         if (status)
3804                 goto err;
3805
3806         status = be_irq_register(adapter);
3807         if (status)
3808                 goto err;
3809
3810         for_all_rx_queues(adapter, rxo, i)
3811                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3812
3813         for_all_tx_queues(adapter, txo, i)
3814                 be_cq_notify(adapter, txo->cq.id, true, 0);
3815
3816         be_async_mcc_enable(adapter);
3817
3818         for_all_evt_queues(adapter, eqo, i) {
3819                 napi_enable(&eqo->napi);
3820                 be_enable_busy_poll(eqo);
3821                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3822         }
3823         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3824
3825         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3826         if (!status)
3827                 be_link_status_update(adapter, link_status);
3828
3829         netif_tx_start_all_queues(netdev);
3830         if (skyhawk_chip(adapter))
3831                 udp_tunnel_get_rx_info(netdev);
3832
3833         return 0;
3834 err:
3835         be_close(adapter->netdev);
3836         return -EIO;
3837 }
3838
3839 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3840 {
3841         u32 addr;
3842
3843         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3844
3845         mac[5] = (u8)(addr & 0xFF);
3846         mac[4] = (u8)((addr >> 8) & 0xFF);
3847         mac[3] = (u8)((addr >> 16) & 0xFF);
3848         /* Use the OUI from the current MAC address */
3849         memcpy(mac, adapter->netdev->dev_addr, 3);
3850 }
3851
3852 /*
3853  * Generate a seed MAC address from the PF MAC Address using jhash.
3854  * MAC Address for VFs are assigned incrementally starting from the seed.
3855  * These addresses are programmed in the ASIC by the PF and the VF driver
3856  * queries for the MAC address during its probe.
3857  */
3858 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3859 {
3860         u32 vf;
3861         int status = 0;
3862         u8 mac[ETH_ALEN];
3863         struct be_vf_cfg *vf_cfg;
3864
3865         be_vf_eth_addr_generate(adapter, mac);
3866
3867         for_all_vfs(adapter, vf_cfg, vf) {
3868                 if (BEx_chip(adapter))
3869                         status = be_cmd_pmac_add(adapter, mac,
3870                                                  vf_cfg->if_handle,
3871                                                  &vf_cfg->pmac_id, vf + 1);
3872                 else
3873                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3874                                                 vf + 1);
3875
3876                 if (status)
3877                         dev_err(&adapter->pdev->dev,
3878                                 "Mac address assignment failed for VF %d\n",
3879                                 vf);
3880                 else
3881                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3882
3883                 mac[5] += 1;
3884         }
3885         return status;
3886 }
3887
3888 static int be_vfs_mac_query(struct be_adapter *adapter)
3889 {
3890         int status, vf;
3891         u8 mac[ETH_ALEN];
3892         struct be_vf_cfg *vf_cfg;
3893
3894         for_all_vfs(adapter, vf_cfg, vf) {
3895                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3896                                                mac, vf_cfg->if_handle,
3897                                                false, vf+1);
3898                 if (status)
3899                         return status;
3900                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3901         }
3902         return 0;
3903 }
3904
3905 static void be_vf_clear(struct be_adapter *adapter)
3906 {
3907         struct be_vf_cfg *vf_cfg;
3908         u32 vf;
3909
3910         if (pci_vfs_assigned(adapter->pdev)) {
3911                 dev_warn(&adapter->pdev->dev,
3912                          "VFs are assigned to VMs: not disabling VFs\n");
3913                 goto done;
3914         }
3915
3916         pci_disable_sriov(adapter->pdev);
3917
3918         for_all_vfs(adapter, vf_cfg, vf) {
3919                 if (BEx_chip(adapter))
3920                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3921                                         vf_cfg->pmac_id, vf + 1);
3922                 else
3923                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3924                                        vf + 1);
3925
3926                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3927         }
3928
3929         if (BE3_chip(adapter))
3930                 be_cmd_set_hsw_config(adapter, 0, 0,
3931                                       adapter->if_handle,
3932                                       PORT_FWD_TYPE_PASSTHRU, 0);
3933 done:
3934         kfree(adapter->vf_cfg);
3935         adapter->num_vfs = 0;
3936         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3937 }
3938
3939 static void be_clear_queues(struct be_adapter *adapter)
3940 {
3941         be_mcc_queues_destroy(adapter);
3942         be_rx_cqs_destroy(adapter);
3943         be_tx_queues_destroy(adapter);
3944         be_evt_queues_destroy(adapter);
3945 }
3946
3947 static void be_cancel_worker(struct be_adapter *adapter)
3948 {
3949         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3950                 cancel_delayed_work_sync(&adapter->work);
3951                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3952         }
3953 }
3954
3955 static void be_cancel_err_detection(struct be_adapter *adapter)
3956 {
3957         struct be_error_recovery *err_rec = &adapter->error_recovery;
3958
3959         if (!be_err_recovery_workq)
3960                 return;
3961
3962         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3963                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3964                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3965         }
3966 }
3967
3968 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3969 {
3970         struct net_device *netdev = adapter->netdev;
3971
3972         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3973                 be_cmd_manage_iface(adapter, adapter->if_handle,
3974                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3975
3976         if (adapter->vxlan_port)
3977                 be_cmd_set_vxlan_port(adapter, 0);
3978
3979         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3980         adapter->vxlan_port = 0;
3981
3982         netdev->hw_enc_features = 0;
3983         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3984         netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3985 }
3986
3987 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3988                                 struct be_resources *vft_res)
3989 {
3990         struct be_resources res = adapter->pool_res;
3991         u32 vf_if_cap_flags = res.vf_if_cap_flags;
3992         struct be_resources res_mod = {0};
3993         u16 num_vf_qs = 1;
3994
3995         /* Distribute the queue resources among the PF and it's VFs */
3996         if (num_vfs) {
3997                 /* Divide the rx queues evenly among the VFs and the PF, capped
3998                  * at VF-EQ-count. Any remainder queues belong to the PF.
3999                  */
4000                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4001                                 res.max_rss_qs / (num_vfs + 1));
4002
4003                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4004                  * RSS Tables per port. Provide RSS on VFs, only if number of
4005                  * VFs requested is less than it's PF Pool's RSS Tables limit.
4006                  */
4007                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4008                         num_vf_qs = 1;
4009         }
4010
4011         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4012          * which are modifiable using SET_PROFILE_CONFIG cmd.
4013          */
4014         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4015                                   RESOURCE_MODIFIABLE, 0);
4016
4017         /* If RSS IFACE capability flags are modifiable for a VF, set the
4018          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4019          * more than 1 RSSQ is available for a VF.
4020          * Otherwise, provision only 1 queue pair for VF.
4021          */
4022         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4023                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4024                 if (num_vf_qs > 1) {
4025                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4026                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4027                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4028                 } else {
4029                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4030                                              BE_IF_FLAGS_DEFQ_RSS);
4031                 }
4032         } else {
4033                 num_vf_qs = 1;
4034         }
4035
4036         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4037                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4038                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4039         }
4040
4041         vft_res->vf_if_cap_flags = vf_if_cap_flags;
4042         vft_res->max_rx_qs = num_vf_qs;
4043         vft_res->max_rss_qs = num_vf_qs;
4044         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4045         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4046
4047         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4048          * among the PF and it's VFs, if the fields are changeable
4049          */
4050         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4051                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4052
4053         if (res_mod.max_vlans == FIELD_MODIFIABLE)
4054                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4055
4056         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4057                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4058
4059         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4060                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4061 }
4062
4063 static void be_if_destroy(struct be_adapter *adapter)
4064 {
4065         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4066
4067         kfree(adapter->pmac_id);
4068         adapter->pmac_id = NULL;
4069
4070         kfree(adapter->mc_list);
4071         adapter->mc_list = NULL;
4072
4073         kfree(adapter->uc_list);
4074         adapter->uc_list = NULL;
4075 }
4076
4077 static int be_clear(struct be_adapter *adapter)
4078 {
4079         struct pci_dev *pdev = adapter->pdev;
4080         struct  be_resources vft_res = {0};
4081
4082         be_cancel_worker(adapter);
4083
4084         flush_workqueue(be_wq);
4085
4086         if (sriov_enabled(adapter))
4087                 be_vf_clear(adapter);
4088
4089         /* Re-configure FW to distribute resources evenly across max-supported
4090          * number of VFs, only when VFs are not already enabled.
4091          */
4092         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4093             !pci_vfs_assigned(pdev)) {
4094                 be_calculate_vf_res(adapter,
4095                                     pci_sriov_get_totalvfs(pdev),
4096                                     &vft_res);
4097                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4098                                         pci_sriov_get_totalvfs(pdev),
4099                                         &vft_res);
4100         }
4101
4102         be_disable_vxlan_offloads(adapter);
4103
4104         be_if_destroy(adapter);
4105
4106         be_clear_queues(adapter);
4107
4108         be_msix_disable(adapter);
4109         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4110         return 0;
4111 }
4112
4113 static int be_vfs_if_create(struct be_adapter *adapter)
4114 {
4115         struct be_resources res = {0};
4116         u32 cap_flags, en_flags, vf;
4117         struct be_vf_cfg *vf_cfg;
4118         int status;
4119
4120         /* If a FW profile exists, then cap_flags are updated */
4121         cap_flags = BE_VF_IF_EN_FLAGS;
4122
4123         for_all_vfs(adapter, vf_cfg, vf) {
4124                 if (!BE3_chip(adapter)) {
4125                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4126                                                            ACTIVE_PROFILE_TYPE,
4127                                                            RESOURCE_LIMITS,
4128                                                            vf + 1);
4129                         if (!status) {
4130                                 cap_flags = res.if_cap_flags;
4131                                 /* Prevent VFs from enabling VLAN promiscuous
4132                                  * mode
4133                                  */
4134                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4135                         }
4136                 }
4137
4138                 /* PF should enable IF flags during proxy if_create call */
4139                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4140                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4141                                           &vf_cfg->if_handle, vf + 1);
4142                 if (status)
4143                         return status;
4144         }
4145
4146         return 0;
4147 }
4148
4149 static int be_vf_setup_init(struct be_adapter *adapter)
4150 {
4151         struct be_vf_cfg *vf_cfg;
4152         int vf;
4153
4154         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4155                                   GFP_KERNEL);
4156         if (!adapter->vf_cfg)
4157                 return -ENOMEM;
4158
4159         for_all_vfs(adapter, vf_cfg, vf) {
4160                 vf_cfg->if_handle = -1;
4161                 vf_cfg->pmac_id = -1;
4162         }
4163         return 0;
4164 }
4165
4166 static int be_vf_setup(struct be_adapter *adapter)
4167 {
4168         struct device *dev = &adapter->pdev->dev;
4169         struct be_vf_cfg *vf_cfg;
4170         int status, old_vfs, vf;
4171         bool spoofchk;
4172
4173         old_vfs = pci_num_vf(adapter->pdev);
4174
4175         status = be_vf_setup_init(adapter);
4176         if (status)
4177                 goto err;
4178
4179         if (old_vfs) {
4180                 for_all_vfs(adapter, vf_cfg, vf) {
4181                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4182                         if (status)
4183                                 goto err;
4184                 }
4185
4186                 status = be_vfs_mac_query(adapter);
4187                 if (status)
4188                         goto err;
4189         } else {
4190                 status = be_vfs_if_create(adapter);
4191                 if (status)
4192                         goto err;
4193
4194                 status = be_vf_eth_addr_config(adapter);
4195                 if (status)
4196                         goto err;
4197         }
4198
4199         for_all_vfs(adapter, vf_cfg, vf) {
4200                 /* Allow VFs to programs MAC/VLAN filters */
4201                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4202                                                   vf + 1);
4203                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4204                         status = be_cmd_set_fn_privileges(adapter,
4205                                                           vf_cfg->privileges |
4206                                                           BE_PRIV_FILTMGMT,
4207                                                           vf + 1);
4208                         if (!status) {
4209                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4210                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4211                                          vf);
4212                         }
4213                 }
4214
4215                 /* Allow full available bandwidth */
4216                 if (!old_vfs)
4217                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4218
4219                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4220                                                vf_cfg->if_handle, NULL,
4221                                                &spoofchk);
4222                 if (!status)
4223                         vf_cfg->spoofchk = spoofchk;
4224
4225                 if (!old_vfs) {
4226                         be_cmd_enable_vf(adapter, vf + 1);
4227                         be_cmd_set_logical_link_config(adapter,
4228                                                        IFLA_VF_LINK_STATE_AUTO,
4229                                                        vf+1);
4230                 }
4231         }
4232
4233         if (!old_vfs) {
4234                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4235                 if (status) {
4236                         dev_err(dev, "SRIOV enable failed\n");
4237                         adapter->num_vfs = 0;
4238                         goto err;
4239                 }
4240         }
4241
4242         if (BE3_chip(adapter)) {
4243                 /* On BE3, enable VEB only when SRIOV is enabled */
4244                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4245                                                adapter->if_handle,
4246                                                PORT_FWD_TYPE_VEB, 0);
4247                 if (status)
4248                         goto err;
4249         }
4250
4251         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4252         return 0;
4253 err:
4254         dev_err(dev, "VF setup failed\n");
4255         be_vf_clear(adapter);
4256         return status;
4257 }
4258
4259 /* Converting function_mode bits on BE3 to SH mc_type enums */
4260
4261 static u8 be_convert_mc_type(u32 function_mode)
4262 {
4263         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4264                 return vNIC1;
4265         else if (function_mode & QNQ_MODE)
4266                 return FLEX10;
4267         else if (function_mode & VNIC_MODE)
4268                 return vNIC2;
4269         else if (function_mode & UMC_ENABLED)
4270                 return UMC;
4271         else
4272                 return MC_NONE;
4273 }
4274
4275 /* On BE2/BE3 FW does not suggest the supported limits */
4276 static void BEx_get_resources(struct be_adapter *adapter,
4277                               struct be_resources *res)
4278 {
4279         bool use_sriov = adapter->num_vfs ? 1 : 0;
4280
4281         if (be_physfn(adapter))
4282                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4283         else
4284                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4285
4286         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4287
4288         if (be_is_mc(adapter)) {
4289                 /* Assuming that there are 4 channels per port,
4290                  * when multi-channel is enabled
4291                  */
4292                 if (be_is_qnq_mode(adapter))
4293                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4294                 else
4295                         /* In a non-qnq multichannel mode, the pvid
4296                          * takes up one vlan entry
4297                          */
4298                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4299         } else {
4300                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4301         }
4302
4303         res->max_mcast_mac = BE_MAX_MC;
4304
4305         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4306          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4307          *    *only* if it is RSS-capable.
4308          */
4309         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4310             be_virtfn(adapter) ||
4311             (be_is_mc(adapter) &&
4312              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4313                 res->max_tx_qs = 1;
4314         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4315                 struct be_resources super_nic_res = {0};
4316
4317                 /* On a SuperNIC profile, the driver needs to use the
4318                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4319                  */
4320                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4321                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4322                                           0);
4323                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4324                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4325         } else {
4326                 res->max_tx_qs = BE3_MAX_TX_QS;
4327         }
4328
4329         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4330             !use_sriov && be_physfn(adapter))
4331                 res->max_rss_qs = (adapter->be3_native) ?
4332                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4333         res->max_rx_qs = res->max_rss_qs + 1;
4334
4335         if (be_physfn(adapter))
4336                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4337                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4338         else
4339                 res->max_evt_qs = 1;
4340
4341         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4342         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4343         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4344                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4345 }
4346
4347 static void be_setup_init(struct be_adapter *adapter)
4348 {
4349         adapter->vlan_prio_bmap = 0xff;
4350         adapter->phy.link_speed = -1;
4351         adapter->if_handle = -1;
4352         adapter->be3_native = false;
4353         adapter->if_flags = 0;
4354         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4355         if (be_physfn(adapter))
4356                 adapter->cmd_privileges = MAX_PRIVILEGES;
4357         else
4358                 adapter->cmd_privileges = MIN_PRIVILEGES;
4359 }
4360
4361 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4362  * However, this HW limitation is not exposed to the host via any SLI cmd.
4363  * As a result, in the case of SRIOV and in particular multi-partition configs
4364  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4365  * for distribution between the VFs. This self-imposed limit will determine the
4366  * no: of VFs for which RSS can be enabled.
4367  */
4368 void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4369 {
4370         struct be_port_resources port_res = {0};
4371         u8 rss_tables_on_port;
4372         u16 max_vfs = be_max_vfs(adapter);
4373
4374         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4375                                   RESOURCE_LIMITS, 0);
4376
4377         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4378
4379         /* Each PF Pool's RSS Tables limit =
4380          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4381          */
4382         adapter->pool_res.max_rss_tables =
4383                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4384 }
4385
4386 static int be_get_sriov_config(struct be_adapter *adapter)
4387 {
4388         struct be_resources res = {0};
4389         int max_vfs, old_vfs;
4390
4391         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4392                                   RESOURCE_LIMITS, 0);
4393
4394         /* Some old versions of BE3 FW don't report max_vfs value */
4395         if (BE3_chip(adapter) && !res.max_vfs) {
4396                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4397                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4398         }
4399
4400         adapter->pool_res = res;
4401
4402         /* If during previous unload of the driver, the VFs were not disabled,
4403          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4404          * Instead use the TotalVFs value stored in the pci-dev struct.
4405          */
4406         old_vfs = pci_num_vf(adapter->pdev);
4407         if (old_vfs) {
4408                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4409                          old_vfs);
4410
4411                 adapter->pool_res.max_vfs =
4412                         pci_sriov_get_totalvfs(adapter->pdev);
4413                 adapter->num_vfs = old_vfs;
4414         }
4415
4416         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4417                 be_calculate_pf_pool_rss_tables(adapter);
4418                 dev_info(&adapter->pdev->dev,
4419                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4420                          be_max_pf_pool_rss_tables(adapter));
4421         }
4422         return 0;
4423 }
4424
4425 static void be_alloc_sriov_res(struct be_adapter *adapter)
4426 {
4427         int old_vfs = pci_num_vf(adapter->pdev);
4428         struct  be_resources vft_res = {0};
4429         int status;
4430
4431         be_get_sriov_config(adapter);
4432
4433         if (!old_vfs)
4434                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4435
4436         /* When the HW is in SRIOV capable configuration, the PF-pool
4437          * resources are given to PF during driver load, if there are no
4438          * old VFs. This facility is not available in BE3 FW.
4439          * Also, this is done by FW in Lancer chip.
4440          */
4441         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4442                 be_calculate_vf_res(adapter, 0, &vft_res);
4443                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4444                                                  &vft_res);
4445                 if (status)
4446                         dev_err(&adapter->pdev->dev,
4447                                 "Failed to optimize SRIOV resources\n");
4448         }
4449 }
4450
4451 static int be_get_resources(struct be_adapter *adapter)
4452 {
4453         struct device *dev = &adapter->pdev->dev;
4454         struct be_resources res = {0};
4455         int status;
4456
4457         /* For Lancer, SH etc read per-function resource limits from FW.
4458          * GET_FUNC_CONFIG returns per function guaranteed limits.
4459          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4460          */
4461         if (BEx_chip(adapter)) {
4462                 BEx_get_resources(adapter, &res);
4463         } else {
4464                 status = be_cmd_get_func_config(adapter, &res);
4465                 if (status)
4466                         return status;
4467
4468                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4469                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4470                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4471                         res.max_rss_qs -= 1;
4472         }
4473
4474         /* If RoCE is supported stash away half the EQs for RoCE */
4475         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4476                                 res.max_evt_qs / 2 : res.max_evt_qs;
4477         adapter->res = res;
4478
4479         /* If FW supports RSS default queue, then skip creating non-RSS
4480          * queue for non-IP traffic.
4481          */
4482         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4483                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4484
4485         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4486                  be_max_txqs(adapter), be_max_rxqs(adapter),
4487                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4488                  be_max_vfs(adapter));
4489         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4490                  be_max_uc(adapter), be_max_mc(adapter),
4491                  be_max_vlans(adapter));
4492
4493         /* Ensure RX and TX queues are created in pairs at init time */
4494         adapter->cfg_num_rx_irqs =
4495                                 min_t(u16, netif_get_num_default_rss_queues(),
4496                                       be_max_qp_irqs(adapter));
4497         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4498         return 0;
4499 }
4500
4501 static int be_get_config(struct be_adapter *adapter)
4502 {
4503         int status, level;
4504         u16 profile_id;
4505
4506         status = be_cmd_get_cntl_attributes(adapter);
4507         if (status)
4508                 return status;
4509
4510         status = be_cmd_query_fw_cfg(adapter);
4511         if (status)
4512                 return status;
4513
4514         if (!lancer_chip(adapter) && be_physfn(adapter))
4515                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4516
4517         if (BEx_chip(adapter)) {
4518                 level = be_cmd_get_fw_log_level(adapter);
4519                 adapter->msg_enable =
4520                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4521         }
4522
4523         be_cmd_get_acpi_wol_cap(adapter);
4524         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4525         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4526
4527         be_cmd_query_port_name(adapter);
4528
4529         if (be_physfn(adapter)) {
4530                 status = be_cmd_get_active_profile(adapter, &profile_id);
4531                 if (!status)
4532                         dev_info(&adapter->pdev->dev,
4533                                  "Using profile 0x%x\n", profile_id);
4534         }
4535
4536         return 0;
4537 }
4538
4539 static int be_mac_setup(struct be_adapter *adapter)
4540 {
4541         u8 mac[ETH_ALEN];
4542         int status;
4543
4544         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4545                 status = be_cmd_get_perm_mac(adapter, mac);
4546                 if (status)
4547                         return status;
4548
4549                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4550                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4551         }
4552
4553         return 0;
4554 }
4555
4556 static void be_schedule_worker(struct be_adapter *adapter)
4557 {
4558         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4559         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4560 }
4561
4562 static void be_destroy_err_recovery_workq(void)
4563 {
4564         if (!be_err_recovery_workq)
4565                 return;
4566
4567         flush_workqueue(be_err_recovery_workq);
4568         destroy_workqueue(be_err_recovery_workq);
4569         be_err_recovery_workq = NULL;
4570 }
4571
4572 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4573 {
4574         struct be_error_recovery *err_rec = &adapter->error_recovery;
4575
4576         if (!be_err_recovery_workq)
4577                 return;
4578
4579         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4580                            msecs_to_jiffies(delay));
4581         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4582 }
4583
4584 static int be_setup_queues(struct be_adapter *adapter)
4585 {
4586         struct net_device *netdev = adapter->netdev;
4587         int status;
4588
4589         status = be_evt_queues_create(adapter);
4590         if (status)
4591                 goto err;
4592
4593         status = be_tx_qs_create(adapter);
4594         if (status)
4595                 goto err;
4596
4597         status = be_rx_cqs_create(adapter);
4598         if (status)
4599                 goto err;
4600
4601         status = be_mcc_queues_create(adapter);
4602         if (status)
4603                 goto err;
4604
4605         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4606         if (status)
4607                 goto err;
4608
4609         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4610         if (status)
4611                 goto err;
4612
4613         return 0;
4614 err:
4615         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4616         return status;
4617 }
4618
4619 static int be_if_create(struct be_adapter *adapter)
4620 {
4621         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4622         u32 cap_flags = be_if_cap_flags(adapter);
4623         int status;
4624
4625         /* alloc required memory for other filtering fields */
4626         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4627                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4628         if (!adapter->pmac_id)
4629                 return -ENOMEM;
4630
4631         adapter->mc_list = kcalloc(be_max_mc(adapter),
4632                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4633         if (!adapter->mc_list)
4634                 return -ENOMEM;
4635
4636         adapter->uc_list = kcalloc(be_max_uc(adapter),
4637                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4638         if (!adapter->uc_list)
4639                 return -ENOMEM;
4640
4641         if (adapter->cfg_num_rx_irqs == 1)
4642                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4643
4644         en_flags &= cap_flags;
4645         /* will enable all the needed filter flags in be_open() */
4646         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4647                                   &adapter->if_handle, 0);
4648
4649         if (status)
4650                 return status;
4651
4652         return 0;
4653 }
4654
4655 int be_update_queues(struct be_adapter *adapter)
4656 {
4657         struct net_device *netdev = adapter->netdev;
4658         int status;
4659
4660         if (netif_running(netdev))
4661                 be_close(netdev);
4662
4663         be_cancel_worker(adapter);
4664
4665         /* If any vectors have been shared with RoCE we cannot re-program
4666          * the MSIx table.
4667          */
4668         if (!adapter->num_msix_roce_vec)
4669                 be_msix_disable(adapter);
4670
4671         be_clear_queues(adapter);
4672         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4673         if (status)
4674                 return status;
4675
4676         if (!msix_enabled(adapter)) {
4677                 status = be_msix_enable(adapter);
4678                 if (status)
4679                         return status;
4680         }
4681
4682         status = be_if_create(adapter);
4683         if (status)
4684                 return status;
4685
4686         status = be_setup_queues(adapter);
4687         if (status)
4688                 return status;
4689
4690         be_schedule_worker(adapter);
4691
4692         if (netif_running(netdev))
4693                 status = be_open(netdev);
4694
4695         return status;
4696 }
4697
4698 static inline int fw_major_num(const char *fw_ver)
4699 {
4700         int fw_major = 0, i;
4701
4702         i = sscanf(fw_ver, "%d.", &fw_major);
4703         if (i != 1)
4704                 return 0;
4705
4706         return fw_major;
4707 }
4708
4709 /* If it is error recovery, FLR the PF
4710  * Else if any VFs are already enabled don't FLR the PF
4711  */
4712 static bool be_reset_required(struct be_adapter *adapter)
4713 {
4714         if (be_error_recovering(adapter))
4715                 return true;
4716         else
4717                 return pci_num_vf(adapter->pdev) == 0;
4718 }
4719
4720 /* Wait for the FW to be ready and perform the required initialization */
4721 static int be_func_init(struct be_adapter *adapter)
4722 {
4723         int status;
4724
4725         status = be_fw_wait_ready(adapter);
4726         if (status)
4727                 return status;
4728
4729         /* FW is now ready; clear errors to allow cmds/doorbell */
4730         be_clear_error(adapter, BE_CLEAR_ALL);
4731
4732         if (be_reset_required(adapter)) {
4733                 status = be_cmd_reset_function(adapter);
4734                 if (status)
4735                         return status;
4736
4737                 /* Wait for interrupts to quiesce after an FLR */
4738                 msleep(100);
4739         }
4740
4741         /* Tell FW we're ready to fire cmds */
4742         status = be_cmd_fw_init(adapter);
4743         if (status)
4744                 return status;
4745
4746         /* Allow interrupts for other ULPs running on NIC function */
4747         be_intr_set(adapter, true);
4748
4749         return 0;
4750 }
4751
4752 static int be_setup(struct be_adapter *adapter)
4753 {
4754         struct device *dev = &adapter->pdev->dev;
4755         int status;
4756
4757         status = be_func_init(adapter);
4758         if (status)
4759                 return status;
4760
4761         be_setup_init(adapter);
4762
4763         if (!lancer_chip(adapter))
4764                 be_cmd_req_native_mode(adapter);
4765
4766         /* invoke this cmd first to get pf_num and vf_num which are needed
4767          * for issuing profile related cmds
4768          */
4769         if (!BEx_chip(adapter)) {
4770                 status = be_cmd_get_func_config(adapter, NULL);
4771                 if (status)
4772                         return status;
4773         }
4774
4775         status = be_get_config(adapter);
4776         if (status)
4777                 goto err;
4778
4779         if (!BE2_chip(adapter) && be_physfn(adapter))
4780                 be_alloc_sriov_res(adapter);
4781
4782         status = be_get_resources(adapter);
4783         if (status)
4784                 goto err;
4785
4786         status = be_msix_enable(adapter);
4787         if (status)
4788                 goto err;
4789
4790         /* will enable all the needed filter flags in be_open() */
4791         status = be_if_create(adapter);
4792         if (status)
4793                 goto err;
4794
4795         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4796         rtnl_lock();
4797         status = be_setup_queues(adapter);
4798         rtnl_unlock();
4799         if (status)
4800                 goto err;
4801
4802         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4803
4804         status = be_mac_setup(adapter);
4805         if (status)
4806                 goto err;
4807
4808         be_cmd_get_fw_ver(adapter);
4809         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4810
4811         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4812                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4813                         adapter->fw_ver);
4814                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4815         }
4816
4817         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4818                                          adapter->rx_fc);
4819         if (status)
4820                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4821                                         &adapter->rx_fc);
4822
4823         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4824                  adapter->tx_fc, adapter->rx_fc);
4825
4826         if (be_physfn(adapter))
4827                 be_cmd_set_logical_link_config(adapter,
4828                                                IFLA_VF_LINK_STATE_AUTO, 0);
4829
4830         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4831          * confusing a linux bridge or OVS that it might be connected to.
4832          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4833          * when SRIOV is not enabled.
4834          */
4835         if (BE3_chip(adapter))
4836                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4837                                       PORT_FWD_TYPE_PASSTHRU, 0);
4838
4839         if (adapter->num_vfs)
4840                 be_vf_setup(adapter);
4841
4842         status = be_cmd_get_phy_info(adapter);
4843         if (!status && be_pause_supported(adapter))
4844                 adapter->phy.fc_autoneg = 1;
4845
4846         if (be_physfn(adapter) && !lancer_chip(adapter))
4847                 be_cmd_set_features(adapter);
4848
4849         be_schedule_worker(adapter);
4850         adapter->flags |= BE_FLAGS_SETUP_DONE;
4851         return 0;
4852 err:
4853         be_clear(adapter);
4854         return status;
4855 }
4856
4857 #ifdef CONFIG_NET_POLL_CONTROLLER
4858 static void be_netpoll(struct net_device *netdev)
4859 {
4860         struct be_adapter *adapter = netdev_priv(netdev);
4861         struct be_eq_obj *eqo;
4862         int i;
4863
4864         for_all_evt_queues(adapter, eqo, i) {
4865                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4866                 napi_schedule(&eqo->napi);
4867         }
4868 }
4869 #endif
4870
4871 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4872 {
4873         const struct firmware *fw;
4874         int status;
4875
4876         if (!netif_running(adapter->netdev)) {
4877                 dev_err(&adapter->pdev->dev,
4878                         "Firmware load not allowed (interface is down)\n");
4879                 return -ENETDOWN;
4880         }
4881
4882         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4883         if (status)
4884                 goto fw_exit;
4885
4886         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4887
4888         if (lancer_chip(adapter))
4889                 status = lancer_fw_download(adapter, fw);
4890         else
4891                 status = be_fw_download(adapter, fw);
4892
4893         if (!status)
4894                 be_cmd_get_fw_ver(adapter);
4895
4896 fw_exit:
4897         release_firmware(fw);
4898         return status;
4899 }
4900
4901 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4902                                  u16 flags)
4903 {
4904         struct be_adapter *adapter = netdev_priv(dev);
4905         struct nlattr *attr, *br_spec;
4906         int rem;
4907         int status = 0;
4908         u16 mode = 0;
4909
4910         if (!sriov_enabled(adapter))
4911                 return -EOPNOTSUPP;
4912
4913         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4914         if (!br_spec)
4915                 return -EINVAL;
4916
4917         nla_for_each_nested(attr, br_spec, rem) {
4918                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4919                         continue;
4920
4921                 if (nla_len(attr) < sizeof(mode))
4922                         return -EINVAL;
4923
4924                 mode = nla_get_u16(attr);
4925                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4926                         return -EOPNOTSUPP;
4927
4928                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4929                         return -EINVAL;
4930
4931                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4932                                                adapter->if_handle,
4933                                                mode == BRIDGE_MODE_VEPA ?
4934                                                PORT_FWD_TYPE_VEPA :
4935                                                PORT_FWD_TYPE_VEB, 0);
4936                 if (status)
4937                         goto err;
4938
4939                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4940                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4941
4942                 return status;
4943         }
4944 err:
4945         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4946                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4947
4948         return status;
4949 }
4950
4951 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4952                                  struct net_device *dev, u32 filter_mask,
4953                                  int nlflags)
4954 {
4955         struct be_adapter *adapter = netdev_priv(dev);
4956         int status = 0;
4957         u8 hsw_mode;
4958
4959         /* BE and Lancer chips support VEB mode only */
4960         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4961                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4962                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4963                         return 0;
4964                 hsw_mode = PORT_FWD_TYPE_VEB;
4965         } else {
4966                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4967                                                adapter->if_handle, &hsw_mode,
4968                                                NULL);
4969                 if (status)
4970                         return 0;
4971
4972                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4973                         return 0;
4974         }
4975
4976         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4977                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
4978                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4979                                        0, 0, nlflags, filter_mask, NULL);
4980 }
4981
4982 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4983                                          void (*func)(struct work_struct *))
4984 {
4985         struct be_cmd_work *work;
4986
4987         work = kzalloc(sizeof(*work), GFP_ATOMIC);
4988         if (!work) {
4989                 dev_err(&adapter->pdev->dev,
4990                         "be_work memory allocation failed\n");
4991                 return NULL;
4992         }
4993
4994         INIT_WORK(&work->work, func);
4995         work->adapter = adapter;
4996         return work;
4997 }
4998
4999 /* VxLAN offload Notes:
5000  *
5001  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5002  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5003  * is expected to work across all types of IP tunnels once exported. Skyhawk
5004  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5005  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5006  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5007  * those other tunnels are unexported on the fly through ndo_features_check().
5008  *
5009  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5010  * adds more than one port, disable offloads and don't re-enable them again
5011  * until after all the tunnels are removed.
5012  */
5013 static void be_work_add_vxlan_port(struct work_struct *work)
5014 {
5015         struct be_cmd_work *cmd_work =
5016                                 container_of(work, struct be_cmd_work, work);
5017         struct be_adapter *adapter = cmd_work->adapter;
5018         struct net_device *netdev = adapter->netdev;
5019         struct device *dev = &adapter->pdev->dev;
5020         __be16 port = cmd_work->info.vxlan_port;
5021         int status;
5022
5023         if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
5024                 adapter->vxlan_port_aliases++;
5025                 goto done;
5026         }
5027
5028         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5029                 dev_info(dev,
5030                          "Only one UDP port supported for VxLAN offloads\n");
5031                 dev_info(dev, "Disabling VxLAN offloads\n");
5032                 adapter->vxlan_port_count++;
5033                 goto err;
5034         }
5035
5036         if (adapter->vxlan_port_count++ >= 1)
5037                 goto done;
5038
5039         status = be_cmd_manage_iface(adapter, adapter->if_handle,
5040                                      OP_CONVERT_NORMAL_TO_TUNNEL);
5041         if (status) {
5042                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
5043                 goto err;
5044         }
5045
5046         status = be_cmd_set_vxlan_port(adapter, port);
5047         if (status) {
5048                 dev_warn(dev, "Failed to add VxLAN port\n");
5049                 goto err;
5050         }
5051         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
5052         adapter->vxlan_port = port;
5053
5054         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
5055                                    NETIF_F_TSO | NETIF_F_TSO6 |
5056                                    NETIF_F_GSO_UDP_TUNNEL;
5057         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
5058         netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
5059
5060         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
5061                  be16_to_cpu(port));
5062         goto done;
5063 err:
5064         be_disable_vxlan_offloads(adapter);
5065 done:
5066         kfree(cmd_work);
5067 }
5068
5069 static void be_work_del_vxlan_port(struct work_struct *work)
5070 {
5071         struct be_cmd_work *cmd_work =
5072                                 container_of(work, struct be_cmd_work, work);
5073         struct be_adapter *adapter = cmd_work->adapter;
5074         __be16 port = cmd_work->info.vxlan_port;
5075
5076         if (adapter->vxlan_port != port)
5077                 goto done;
5078
5079         if (adapter->vxlan_port_aliases) {
5080                 adapter->vxlan_port_aliases--;
5081                 goto out;
5082         }
5083
5084         be_disable_vxlan_offloads(adapter);
5085
5086         dev_info(&adapter->pdev->dev,
5087                  "Disabled VxLAN offloads for UDP port %d\n",
5088                  be16_to_cpu(port));
5089 done:
5090         adapter->vxlan_port_count--;
5091 out:
5092         kfree(cmd_work);
5093 }
5094
5095 static void be_cfg_vxlan_port(struct net_device *netdev,
5096                               struct udp_tunnel_info *ti,
5097                               void (*func)(struct work_struct *))
5098 {
5099         struct be_adapter *adapter = netdev_priv(netdev);
5100         struct be_cmd_work *cmd_work;
5101
5102         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5103                 return;
5104
5105         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5106                 return;
5107
5108         cmd_work = be_alloc_work(adapter, func);
5109         if (cmd_work) {
5110                 cmd_work->info.vxlan_port = ti->port;
5111                 queue_work(be_wq, &cmd_work->work);
5112         }
5113 }
5114
5115 static void be_del_vxlan_port(struct net_device *netdev,
5116                               struct udp_tunnel_info *ti)
5117 {
5118         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5119 }
5120
5121 static void be_add_vxlan_port(struct net_device *netdev,
5122                               struct udp_tunnel_info *ti)
5123 {
5124         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5125 }
5126
5127 static netdev_features_t be_features_check(struct sk_buff *skb,
5128                                            struct net_device *dev,
5129                                            netdev_features_t features)
5130 {
5131         struct be_adapter *adapter = netdev_priv(dev);
5132         u8 l4_hdr = 0;
5133
5134         /* The code below restricts offload features for some tunneled packets.
5135          * Offload features for normal (non tunnel) packets are unchanged.
5136          */
5137         if (!skb->encapsulation ||
5138             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5139                 return features;
5140
5141         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5142          * should disable tunnel offload features if it's not a VxLAN packet,
5143          * as tunnel offloads have been enabled only for VxLAN. This is done to
5144          * allow other tunneled traffic like GRE work fine while VxLAN
5145          * offloads are configured in Skyhawk-R.
5146          */
5147         switch (vlan_get_protocol(skb)) {
5148         case htons(ETH_P_IP):
5149                 l4_hdr = ip_hdr(skb)->protocol;
5150                 break;
5151         case htons(ETH_P_IPV6):
5152                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5153                 break;
5154         default:
5155                 return features;
5156         }
5157
5158         if (l4_hdr != IPPROTO_UDP ||
5159             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5160             skb->inner_protocol != htons(ETH_P_TEB) ||
5161             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5162             sizeof(struct udphdr) + sizeof(struct vxlanhdr))
5163                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5164
5165         return features;
5166 }
5167
5168 static int be_get_phys_port_id(struct net_device *dev,
5169                                struct netdev_phys_item_id *ppid)
5170 {
5171         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5172         struct be_adapter *adapter = netdev_priv(dev);
5173         u8 *id;
5174
5175         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5176                 return -ENOSPC;
5177
5178         ppid->id[0] = adapter->hba_port_num + 1;
5179         id = &ppid->id[1];
5180         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5181              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5182                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5183
5184         ppid->id_len = id_len;
5185
5186         return 0;
5187 }
5188
5189 static void be_set_rx_mode(struct net_device *dev)
5190 {
5191         struct be_adapter *adapter = netdev_priv(dev);
5192         struct be_cmd_work *work;
5193
5194         work = be_alloc_work(adapter, be_work_set_rx_mode);
5195         if (work)
5196                 queue_work(be_wq, &work->work);
5197 }
5198
5199 static const struct net_device_ops be_netdev_ops = {
5200         .ndo_open               = be_open,
5201         .ndo_stop               = be_close,
5202         .ndo_start_xmit         = be_xmit,
5203         .ndo_set_rx_mode        = be_set_rx_mode,
5204         .ndo_set_mac_address    = be_mac_addr_set,
5205         .ndo_change_mtu         = be_change_mtu,
5206         .ndo_get_stats64        = be_get_stats64,
5207         .ndo_validate_addr      = eth_validate_addr,
5208         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5209         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5210         .ndo_set_vf_mac         = be_set_vf_mac,
5211         .ndo_set_vf_vlan        = be_set_vf_vlan,
5212         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5213         .ndo_get_vf_config      = be_get_vf_config,
5214         .ndo_set_vf_link_state  = be_set_vf_link_state,
5215         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5216 #ifdef CONFIG_NET_POLL_CONTROLLER
5217         .ndo_poll_controller    = be_netpoll,
5218 #endif
5219         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5220         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5221 #ifdef CONFIG_NET_RX_BUSY_POLL
5222         .ndo_busy_poll          = be_busy_poll,
5223 #endif
5224         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5225         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5226         .ndo_features_check     = be_features_check,
5227         .ndo_get_phys_port_id   = be_get_phys_port_id,
5228 };
5229
5230 static void be_netdev_init(struct net_device *netdev)
5231 {
5232         struct be_adapter *adapter = netdev_priv(netdev);
5233
5234         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5235                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5236                 NETIF_F_HW_VLAN_CTAG_TX;
5237         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5238                 netdev->hw_features |= NETIF_F_RXHASH;
5239
5240         netdev->features |= netdev->hw_features |
5241                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5242
5243         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5244                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5245
5246         netdev->priv_flags |= IFF_UNICAST_FLT;
5247
5248         netdev->flags |= IFF_MULTICAST;
5249
5250         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5251
5252         netdev->netdev_ops = &be_netdev_ops;
5253
5254         netdev->ethtool_ops = &be_ethtool_ops;
5255 }
5256
5257 static void be_cleanup(struct be_adapter *adapter)
5258 {
5259         struct net_device *netdev = adapter->netdev;
5260
5261         rtnl_lock();
5262         netif_device_detach(netdev);
5263         if (netif_running(netdev))
5264                 be_close(netdev);
5265         rtnl_unlock();
5266
5267         be_clear(adapter);
5268 }
5269
5270 static int be_resume(struct be_adapter *adapter)
5271 {
5272         struct net_device *netdev = adapter->netdev;
5273         int status;
5274
5275         status = be_setup(adapter);
5276         if (status)
5277                 return status;
5278
5279         rtnl_lock();
5280         if (netif_running(netdev))
5281                 status = be_open(netdev);
5282         rtnl_unlock();
5283
5284         if (status)
5285                 return status;
5286
5287         netif_device_attach(netdev);
5288
5289         return 0;
5290 }
5291
5292 static void be_soft_reset(struct be_adapter *adapter)
5293 {
5294         u32 val;
5295
5296         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5297         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5298         val |= SLIPORT_SOFTRESET_SR_MASK;
5299         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5300 }
5301
5302 static bool be_err_is_recoverable(struct be_adapter *adapter)
5303 {
5304         struct be_error_recovery *err_rec = &adapter->error_recovery;
5305         unsigned long initial_idle_time =
5306                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5307         unsigned long recovery_interval =
5308                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5309         u16 ue_err_code;
5310         u32 val;
5311
5312         val = be_POST_stage_get(adapter);
5313         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5314                 return false;
5315         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5316         if (ue_err_code == 0)
5317                 return false;
5318
5319         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5320                 ue_err_code);
5321
5322         if (jiffies - err_rec->probe_time <= initial_idle_time) {
5323                 dev_err(&adapter->pdev->dev,
5324                         "Cannot recover within %lu sec from driver load\n",
5325                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5326                 return false;
5327         }
5328
5329         if (err_rec->last_recovery_time &&
5330             (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
5331                 dev_err(&adapter->pdev->dev,
5332                         "Cannot recover within %lu sec from last recovery\n",
5333                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5334                 return false;
5335         }
5336
5337         if (ue_err_code == err_rec->last_err_code) {
5338                 dev_err(&adapter->pdev->dev,
5339                         "Cannot recover from a consecutive TPE error\n");
5340                 return false;
5341         }
5342
5343         err_rec->last_recovery_time = jiffies;
5344         err_rec->last_err_code = ue_err_code;
5345         return true;
5346 }
5347
5348 static int be_tpe_recover(struct be_adapter *adapter)
5349 {
5350         struct be_error_recovery *err_rec = &adapter->error_recovery;
5351         int status = -EAGAIN;
5352         u32 val;
5353
5354         switch (err_rec->recovery_state) {
5355         case ERR_RECOVERY_ST_NONE:
5356                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5357                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5358                 break;
5359
5360         case ERR_RECOVERY_ST_DETECT:
5361                 val = be_POST_stage_get(adapter);
5362                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5363                     POST_STAGE_RECOVERABLE_ERR) {
5364                         dev_err(&adapter->pdev->dev,
5365                                 "Unrecoverable HW error detected: 0x%x\n", val);
5366                         status = -EINVAL;
5367                         err_rec->resched_delay = 0;
5368                         break;
5369                 }
5370
5371                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5372
5373                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5374                  * milliseconds before it checks for final error status in
5375                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5376                  * If it does, then PF0 initiates a Soft Reset.
5377                  */
5378                 if (adapter->pf_num == 0) {
5379                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5380                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5381                                         ERR_RECOVERY_UE_DETECT_DURATION;
5382                         break;
5383                 }
5384
5385                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5386                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5387                                         ERR_RECOVERY_UE_DETECT_DURATION;
5388                 break;
5389
5390         case ERR_RECOVERY_ST_RESET:
5391                 if (!be_err_is_recoverable(adapter)) {
5392                         dev_err(&adapter->pdev->dev,
5393                                 "Failed to meet recovery criteria\n");
5394                         status = -EIO;
5395                         err_rec->resched_delay = 0;
5396                         break;
5397                 }
5398                 be_soft_reset(adapter);
5399                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5400                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5401                                         err_rec->ue_to_reset_time;
5402                 break;
5403
5404         case ERR_RECOVERY_ST_PRE_POLL:
5405                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5406                 err_rec->resched_delay = 0;
5407                 status = 0;                     /* done */
5408                 break;
5409
5410         default:
5411                 status = -EINVAL;
5412                 err_rec->resched_delay = 0;
5413                 break;
5414         }
5415
5416         return status;
5417 }
5418
5419 static int be_err_recover(struct be_adapter *adapter)
5420 {
5421         int status;
5422
5423         if (!lancer_chip(adapter)) {
5424                 if (!adapter->error_recovery.recovery_supported ||
5425                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5426                         return -EIO;
5427                 status = be_tpe_recover(adapter);
5428                 if (status)
5429                         goto err;
5430         }
5431
5432         /* Wait for adapter to reach quiescent state before
5433          * destroying queues
5434          */
5435         status = be_fw_wait_ready(adapter);
5436         if (status)
5437                 goto err;
5438
5439         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5440
5441         be_cleanup(adapter);
5442
5443         status = be_resume(adapter);
5444         if (status)
5445                 goto err;
5446
5447         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5448
5449 err:
5450         return status;
5451 }
5452
5453 static void be_err_detection_task(struct work_struct *work)
5454 {
5455         struct be_error_recovery *err_rec =
5456                         container_of(work, struct be_error_recovery,
5457                                      err_detection_work.work);
5458         struct be_adapter *adapter =
5459                         container_of(err_rec, struct be_adapter,
5460                                      error_recovery);
5461         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5462         struct device *dev = &adapter->pdev->dev;
5463         int recovery_status;
5464
5465         be_detect_error(adapter);
5466         if (!be_check_error(adapter, BE_ERROR_HW))
5467                 goto reschedule_task;
5468
5469         recovery_status = be_err_recover(adapter);
5470         if (!recovery_status) {
5471                 err_rec->recovery_retries = 0;
5472                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5473                 dev_info(dev, "Adapter recovery successful\n");
5474                 goto reschedule_task;
5475         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5476                 /* BEx/SH recovery state machine */
5477                 if (adapter->pf_num == 0 &&
5478                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5479                         dev_err(&adapter->pdev->dev,
5480                                 "Adapter recovery in progress\n");
5481                 resched_delay = err_rec->resched_delay;
5482                 goto reschedule_task;
5483         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5484                 /* For VFs, check if PF have allocated resources
5485                  * every second.
5486                  */
5487                 dev_err(dev, "Re-trying adapter recovery\n");
5488                 goto reschedule_task;
5489         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5490                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5491                 /* In case of another error during recovery, it takes 30 sec
5492                  * for adapter to come out of error. Retry error recovery after
5493                  * this time interval.
5494                  */
5495                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5496                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5497                 goto reschedule_task;
5498         } else {
5499                 dev_err(dev, "Adapter recovery failed\n");
5500                 dev_err(dev, "Please reboot server to recover\n");
5501         }
5502
5503         return;
5504
5505 reschedule_task:
5506         be_schedule_err_detection(adapter, resched_delay);
5507 }
5508
5509 static void be_log_sfp_info(struct be_adapter *adapter)
5510 {
5511         int status;
5512
5513         status = be_cmd_query_sfp_info(adapter);
5514         if (!status) {
5515                 dev_err(&adapter->pdev->dev,
5516                         "Port %c: %s Vendor: %s part no: %s",
5517                         adapter->port_name,
5518                         be_misconfig_evt_port_state[adapter->phy_state],
5519                         adapter->phy.vendor_name,
5520                         adapter->phy.vendor_pn);
5521         }
5522         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5523 }
5524
5525 static void be_worker(struct work_struct *work)
5526 {
5527         struct be_adapter *adapter =
5528                 container_of(work, struct be_adapter, work.work);
5529         struct be_rx_obj *rxo;
5530         int i;
5531
5532         if (be_physfn(adapter) &&
5533             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5534                 be_cmd_get_die_temperature(adapter);
5535
5536         /* when interrupts are not yet enabled, just reap any pending
5537          * mcc completions
5538          */
5539         if (!netif_running(adapter->netdev)) {
5540                 local_bh_disable();
5541                 be_process_mcc(adapter);
5542                 local_bh_enable();
5543                 goto reschedule;
5544         }
5545
5546         if (!adapter->stats_cmd_sent) {
5547                 if (lancer_chip(adapter))
5548                         lancer_cmd_get_pport_stats(adapter,
5549                                                    &adapter->stats_cmd);
5550                 else
5551                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5552         }
5553
5554         for_all_rx_queues(adapter, rxo, i) {
5555                 /* Replenish RX-queues starved due to memory
5556                  * allocation failures.
5557                  */
5558                 if (rxo->rx_post_starved)
5559                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5560         }
5561
5562         /* EQ-delay update for Skyhawk is done while notifying EQ */
5563         if (!skyhawk_chip(adapter))
5564                 be_eqd_update(adapter, false);
5565
5566         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5567                 be_log_sfp_info(adapter);
5568
5569 reschedule:
5570         adapter->work_counter++;
5571         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5572 }
5573
5574 static void be_unmap_pci_bars(struct be_adapter *adapter)
5575 {
5576         if (adapter->csr)
5577                 pci_iounmap(adapter->pdev, adapter->csr);
5578         if (adapter->db)
5579                 pci_iounmap(adapter->pdev, adapter->db);
5580         if (adapter->pcicfg && adapter->pcicfg_mapped)
5581                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5582 }
5583
5584 static int db_bar(struct be_adapter *adapter)
5585 {
5586         if (lancer_chip(adapter) || be_virtfn(adapter))
5587                 return 0;
5588         else
5589                 return 4;
5590 }
5591
5592 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5593 {
5594         if (skyhawk_chip(adapter)) {
5595                 adapter->roce_db.size = 4096;
5596                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5597                                                               db_bar(adapter));
5598                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5599                                                                db_bar(adapter));
5600         }
5601         return 0;
5602 }
5603
5604 static int be_map_pci_bars(struct be_adapter *adapter)
5605 {
5606         struct pci_dev *pdev = adapter->pdev;
5607         u8 __iomem *addr;
5608         u32 sli_intf;
5609
5610         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5611         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5612                                 SLI_INTF_FAMILY_SHIFT;
5613         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5614
5615         if (BEx_chip(adapter) && be_physfn(adapter)) {
5616                 adapter->csr = pci_iomap(pdev, 2, 0);
5617                 if (!adapter->csr)
5618                         return -ENOMEM;
5619         }
5620
5621         addr = pci_iomap(pdev, db_bar(adapter), 0);
5622         if (!addr)
5623                 goto pci_map_err;
5624         adapter->db = addr;
5625
5626         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5627                 if (be_physfn(adapter)) {
5628                         /* PCICFG is the 2nd BAR in BE2 */
5629                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5630                         if (!addr)
5631                                 goto pci_map_err;
5632                         adapter->pcicfg = addr;
5633                         adapter->pcicfg_mapped = true;
5634                 } else {
5635                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5636                         adapter->pcicfg_mapped = false;
5637                 }
5638         }
5639
5640         be_roce_map_pci_bars(adapter);
5641         return 0;
5642
5643 pci_map_err:
5644         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5645         be_unmap_pci_bars(adapter);
5646         return -ENOMEM;
5647 }
5648
5649 static void be_drv_cleanup(struct be_adapter *adapter)
5650 {
5651         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5652         struct device *dev = &adapter->pdev->dev;
5653
5654         if (mem->va)
5655                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5656
5657         mem = &adapter->rx_filter;
5658         if (mem->va)
5659                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5660
5661         mem = &adapter->stats_cmd;
5662         if (mem->va)
5663                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5664 }
5665
5666 /* Allocate and initialize various fields in be_adapter struct */
5667 static int be_drv_init(struct be_adapter *adapter)
5668 {
5669         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5670         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5671         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5672         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5673         struct device *dev = &adapter->pdev->dev;
5674         int status = 0;
5675
5676         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5677         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5678                                                  &mbox_mem_alloc->dma,
5679                                                  GFP_KERNEL);
5680         if (!mbox_mem_alloc->va)
5681                 return -ENOMEM;
5682
5683         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5684         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5685         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5686
5687         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5688         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5689                                             &rx_filter->dma, GFP_KERNEL);
5690         if (!rx_filter->va) {
5691                 status = -ENOMEM;
5692                 goto free_mbox;
5693         }
5694
5695         if (lancer_chip(adapter))
5696                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5697         else if (BE2_chip(adapter))
5698                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5699         else if (BE3_chip(adapter))
5700                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5701         else
5702                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5703         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5704                                             &stats_cmd->dma, GFP_KERNEL);
5705         if (!stats_cmd->va) {
5706                 status = -ENOMEM;
5707                 goto free_rx_filter;
5708         }
5709
5710         mutex_init(&adapter->mbox_lock);
5711         mutex_init(&adapter->mcc_lock);
5712         mutex_init(&adapter->rx_filter_lock);
5713         spin_lock_init(&adapter->mcc_cq_lock);
5714         init_completion(&adapter->et_cmd_compl);
5715
5716         pci_save_state(adapter->pdev);
5717
5718         INIT_DELAYED_WORK(&adapter->work, be_worker);
5719
5720         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5721         adapter->error_recovery.resched_delay = 0;
5722         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5723                           be_err_detection_task);
5724
5725         adapter->rx_fc = true;
5726         adapter->tx_fc = true;
5727
5728         /* Must be a power of 2 or else MODULO will BUG_ON */
5729         adapter->be_get_temp_freq = 64;
5730
5731         return 0;
5732
5733 free_rx_filter:
5734         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5735 free_mbox:
5736         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5737                           mbox_mem_alloc->dma);
5738         return status;
5739 }
5740
5741 static void be_remove(struct pci_dev *pdev)
5742 {
5743         struct be_adapter *adapter = pci_get_drvdata(pdev);
5744
5745         if (!adapter)
5746                 return;
5747
5748         be_roce_dev_remove(adapter);
5749         be_intr_set(adapter, false);
5750
5751         be_cancel_err_detection(adapter);
5752
5753         unregister_netdev(adapter->netdev);
5754
5755         be_clear(adapter);
5756
5757         if (!pci_vfs_assigned(adapter->pdev))
5758                 be_cmd_reset_function(adapter);
5759
5760         /* tell fw we're done with firing cmds */
5761         be_cmd_fw_clean(adapter);
5762
5763         be_unmap_pci_bars(adapter);
5764         be_drv_cleanup(adapter);
5765
5766         pci_disable_pcie_error_reporting(pdev);
5767
5768         pci_release_regions(pdev);
5769         pci_disable_device(pdev);
5770
5771         free_netdev(adapter->netdev);
5772 }
5773
5774 static ssize_t be_hwmon_show_temp(struct device *dev,
5775                                   struct device_attribute *dev_attr,
5776                                   char *buf)
5777 {
5778         struct be_adapter *adapter = dev_get_drvdata(dev);
5779
5780         /* Unit: millidegree Celsius */
5781         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5782                 return -EIO;
5783         else
5784                 return sprintf(buf, "%u\n",
5785                                adapter->hwmon_info.be_on_die_temp * 1000);
5786 }
5787
5788 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5789                           be_hwmon_show_temp, NULL, 1);
5790
5791 static struct attribute *be_hwmon_attrs[] = {
5792         &sensor_dev_attr_temp1_input.dev_attr.attr,
5793         NULL
5794 };
5795
5796 ATTRIBUTE_GROUPS(be_hwmon);
5797
5798 static char *mc_name(struct be_adapter *adapter)
5799 {
5800         char *str = ""; /* default */
5801
5802         switch (adapter->mc_type) {
5803         case UMC:
5804                 str = "UMC";
5805                 break;
5806         case FLEX10:
5807                 str = "FLEX10";
5808                 break;
5809         case vNIC1:
5810                 str = "vNIC-1";
5811                 break;
5812         case nPAR:
5813                 str = "nPAR";
5814                 break;
5815         case UFP:
5816                 str = "UFP";
5817                 break;
5818         case vNIC2:
5819                 str = "vNIC-2";
5820                 break;
5821         default:
5822                 str = "";
5823         }
5824
5825         return str;
5826 }
5827
5828 static inline char *func_name(struct be_adapter *adapter)
5829 {
5830         return be_physfn(adapter) ? "PF" : "VF";
5831 }
5832
5833 static inline char *nic_name(struct pci_dev *pdev)
5834 {
5835         switch (pdev->device) {
5836         case OC_DEVICE_ID1:
5837                 return OC_NAME;
5838         case OC_DEVICE_ID2:
5839                 return OC_NAME_BE;
5840         case OC_DEVICE_ID3:
5841         case OC_DEVICE_ID4:
5842                 return OC_NAME_LANCER;
5843         case BE_DEVICE_ID2:
5844                 return BE3_NAME;
5845         case OC_DEVICE_ID5:
5846         case OC_DEVICE_ID6:
5847                 return OC_NAME_SH;
5848         default:
5849                 return BE_NAME;
5850         }
5851 }
5852
5853 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5854 {
5855         struct be_adapter *adapter;
5856         struct net_device *netdev;
5857         int status = 0;
5858
5859         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5860
5861         status = pci_enable_device(pdev);
5862         if (status)
5863                 goto do_none;
5864
5865         status = pci_request_regions(pdev, DRV_NAME);
5866         if (status)
5867                 goto disable_dev;
5868         pci_set_master(pdev);
5869
5870         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5871         if (!netdev) {
5872                 status = -ENOMEM;
5873                 goto rel_reg;
5874         }
5875         adapter = netdev_priv(netdev);
5876         adapter->pdev = pdev;
5877         pci_set_drvdata(pdev, adapter);
5878         adapter->netdev = netdev;
5879         SET_NETDEV_DEV(netdev, &pdev->dev);
5880
5881         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5882         if (!status) {
5883                 netdev->features |= NETIF_F_HIGHDMA;
5884         } else {
5885                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5886                 if (status) {
5887                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5888                         goto free_netdev;
5889                 }
5890         }
5891
5892         status = pci_enable_pcie_error_reporting(pdev);
5893         if (!status)
5894                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5895
5896         status = be_map_pci_bars(adapter);
5897         if (status)
5898                 goto free_netdev;
5899
5900         status = be_drv_init(adapter);
5901         if (status)
5902                 goto unmap_bars;
5903
5904         status = be_setup(adapter);
5905         if (status)
5906                 goto drv_cleanup;
5907
5908         be_netdev_init(netdev);
5909         status = register_netdev(netdev);
5910         if (status != 0)
5911                 goto unsetup;
5912
5913         be_roce_dev_add(adapter);
5914
5915         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5916         adapter->error_recovery.probe_time = jiffies;
5917
5918         /* On Die temperature not supported for VF. */
5919         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5920                 adapter->hwmon_info.hwmon_dev =
5921                         devm_hwmon_device_register_with_groups(&pdev->dev,
5922                                                                DRV_NAME,
5923                                                                adapter,
5924                                                                be_hwmon_groups);
5925                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5926         }
5927
5928         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5929                  func_name(adapter), mc_name(adapter), adapter->port_name);
5930
5931         return 0;
5932
5933 unsetup:
5934         be_clear(adapter);
5935 drv_cleanup:
5936         be_drv_cleanup(adapter);
5937 unmap_bars:
5938         be_unmap_pci_bars(adapter);
5939 free_netdev:
5940         free_netdev(netdev);
5941 rel_reg:
5942         pci_release_regions(pdev);
5943 disable_dev:
5944         pci_disable_device(pdev);
5945 do_none:
5946         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5947         return status;
5948 }
5949
5950 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5951 {
5952         struct be_adapter *adapter = pci_get_drvdata(pdev);
5953
5954         be_intr_set(adapter, false);
5955         be_cancel_err_detection(adapter);
5956
5957         be_cleanup(adapter);
5958
5959         pci_save_state(pdev);
5960         pci_disable_device(pdev);
5961         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5962         return 0;
5963 }
5964
5965 static int be_pci_resume(struct pci_dev *pdev)
5966 {
5967         struct be_adapter *adapter = pci_get_drvdata(pdev);
5968         int status = 0;
5969
5970         status = pci_enable_device(pdev);
5971         if (status)
5972                 return status;
5973
5974         pci_restore_state(pdev);
5975
5976         status = be_resume(adapter);
5977         if (status)
5978                 return status;
5979
5980         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5981
5982         return 0;
5983 }
5984
5985 /*
5986  * An FLR will stop BE from DMAing any data.
5987  */
5988 static void be_shutdown(struct pci_dev *pdev)
5989 {
5990         struct be_adapter *adapter = pci_get_drvdata(pdev);
5991
5992         if (!adapter)
5993                 return;
5994
5995         be_roce_dev_shutdown(adapter);
5996         cancel_delayed_work_sync(&adapter->work);
5997         be_cancel_err_detection(adapter);
5998
5999         netif_device_detach(adapter->netdev);
6000
6001         be_cmd_reset_function(adapter);
6002
6003         pci_disable_device(pdev);
6004 }
6005
6006 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6007                                             pci_channel_state_t state)
6008 {
6009         struct be_adapter *adapter = pci_get_drvdata(pdev);
6010
6011         dev_err(&adapter->pdev->dev, "EEH error detected\n");
6012
6013         be_roce_dev_remove(adapter);
6014
6015         if (!be_check_error(adapter, BE_ERROR_EEH)) {
6016                 be_set_error(adapter, BE_ERROR_EEH);
6017
6018                 be_cancel_err_detection(adapter);
6019
6020                 be_cleanup(adapter);
6021         }
6022
6023         if (state == pci_channel_io_perm_failure)
6024                 return PCI_ERS_RESULT_DISCONNECT;
6025
6026         pci_disable_device(pdev);
6027
6028         /* The error could cause the FW to trigger a flash debug dump.
6029          * Resetting the card while flash dump is in progress
6030          * can cause it not to recover; wait for it to finish.
6031          * Wait only for first function as it is needed only once per
6032          * adapter.
6033          */
6034         if (pdev->devfn == 0)
6035                 ssleep(30);
6036
6037         return PCI_ERS_RESULT_NEED_RESET;
6038 }
6039
6040 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6041 {
6042         struct be_adapter *adapter = pci_get_drvdata(pdev);
6043         int status;
6044
6045         dev_info(&adapter->pdev->dev, "EEH reset\n");
6046
6047         status = pci_enable_device(pdev);
6048         if (status)
6049                 return PCI_ERS_RESULT_DISCONNECT;
6050
6051         pci_set_master(pdev);
6052         pci_restore_state(pdev);
6053
6054         /* Check if card is ok and fw is ready */
6055         dev_info(&adapter->pdev->dev,
6056                  "Waiting for FW to be ready after EEH reset\n");
6057         status = be_fw_wait_ready(adapter);
6058         if (status)
6059                 return PCI_ERS_RESULT_DISCONNECT;
6060
6061         pci_cleanup_aer_uncorrect_error_status(pdev);
6062         be_clear_error(adapter, BE_CLEAR_ALL);
6063         return PCI_ERS_RESULT_RECOVERED;
6064 }
6065
6066 static void be_eeh_resume(struct pci_dev *pdev)
6067 {
6068         int status = 0;
6069         struct be_adapter *adapter = pci_get_drvdata(pdev);
6070
6071         dev_info(&adapter->pdev->dev, "EEH resume\n");
6072
6073         pci_save_state(pdev);
6074
6075         status = be_resume(adapter);
6076         if (status)
6077                 goto err;
6078
6079         be_roce_dev_add(adapter);
6080
6081         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6082         return;
6083 err:
6084         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6085 }
6086
6087 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6088 {
6089         struct be_adapter *adapter = pci_get_drvdata(pdev);
6090         struct be_resources vft_res = {0};
6091         int status;
6092
6093         if (!num_vfs)
6094                 be_vf_clear(adapter);
6095
6096         adapter->num_vfs = num_vfs;
6097
6098         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6099                 dev_warn(&pdev->dev,
6100                          "Cannot disable VFs while they are assigned\n");
6101                 return -EBUSY;
6102         }
6103
6104         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6105          * are equally distributed across the max-number of VFs. The user may
6106          * request only a subset of the max-vfs to be enabled.
6107          * Based on num_vfs, redistribute the resources across num_vfs so that
6108          * each VF will have access to more number of resources.
6109          * This facility is not available in BE3 FW.
6110          * Also, this is done by FW in Lancer chip.
6111          */
6112         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6113                 be_calculate_vf_res(adapter, adapter->num_vfs,
6114                                     &vft_res);
6115                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6116                                                  adapter->num_vfs, &vft_res);
6117                 if (status)
6118                         dev_err(&pdev->dev,
6119                                 "Failed to optimize SR-IOV resources\n");
6120         }
6121
6122         status = be_get_resources(adapter);
6123         if (status)
6124                 return be_cmd_status(status);
6125
6126         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6127         rtnl_lock();
6128         status = be_update_queues(adapter);
6129         rtnl_unlock();
6130         if (status)
6131                 return be_cmd_status(status);
6132
6133         if (adapter->num_vfs)
6134                 status = be_vf_setup(adapter);
6135
6136         if (!status)
6137                 return adapter->num_vfs;
6138
6139         return 0;
6140 }
6141
6142 static const struct pci_error_handlers be_eeh_handlers = {
6143         .error_detected = be_eeh_err_detected,
6144         .slot_reset = be_eeh_reset,
6145         .resume = be_eeh_resume,
6146 };
6147
6148 static struct pci_driver be_driver = {
6149         .name = DRV_NAME,
6150         .id_table = be_dev_ids,
6151         .probe = be_probe,
6152         .remove = be_remove,
6153         .suspend = be_suspend,
6154         .resume = be_pci_resume,
6155         .shutdown = be_shutdown,
6156         .sriov_configure = be_pci_sriov_configure,
6157         .err_handler = &be_eeh_handlers
6158 };
6159
6160 static int __init be_init_module(void)
6161 {
6162         int status;
6163
6164         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6165             rx_frag_size != 2048) {
6166                 printk(KERN_WARNING DRV_NAME
6167                         " : Module param rx_frag_size must be 2048/4096/8192."
6168                         " Using 2048\n");
6169                 rx_frag_size = 2048;
6170         }
6171
6172         if (num_vfs > 0) {
6173                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6174                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6175         }
6176
6177         be_wq = create_singlethread_workqueue("be_wq");
6178         if (!be_wq) {
6179                 pr_warn(DRV_NAME "workqueue creation failed\n");
6180                 return -1;
6181         }
6182
6183         be_err_recovery_workq =
6184                 create_singlethread_workqueue("be_err_recover");
6185         if (!be_err_recovery_workq)
6186                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6187
6188         status = pci_register_driver(&be_driver);
6189         if (status) {
6190                 destroy_workqueue(be_wq);
6191                 be_destroy_err_recovery_workq();
6192         }
6193         return status;
6194 }
6195 module_init(be_init_module);
6196
6197 static void __exit be_exit_module(void)
6198 {
6199         pci_unregister_driver(&be_driver);
6200
6201         be_destroy_err_recovery_workq();
6202
6203         if (be_wq)
6204                 destroy_workqueue(be_wq);
6205 }
6206 module_exit(be_exit_module);