benet: Replace ndo_add/del_vxlan_port with ndo_add/del_udp_enc_port
[cascardo/linux.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2015 Emulex
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 static const struct pci_device_id be_dev_ids[] = {
45         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
46         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
47         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
48         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
49         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
50         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
51         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
52         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
53         { 0 }
54 };
55 MODULE_DEVICE_TABLE(pci, be_dev_ids);
56 /* UE Status Low CSR */
57 static const char * const ue_status_low_desc[] = {
58         "CEV",
59         "CTX",
60         "DBUF",
61         "ERX",
62         "Host",
63         "MPU",
64         "NDMA",
65         "PTC ",
66         "RDMA ",
67         "RXF ",
68         "RXIPS ",
69         "RXULP0 ",
70         "RXULP1 ",
71         "RXULP2 ",
72         "TIM ",
73         "TPOST ",
74         "TPRE ",
75         "TXIPS ",
76         "TXULP0 ",
77         "TXULP1 ",
78         "UC ",
79         "WDMA ",
80         "TXULP2 ",
81         "HOST1 ",
82         "P0_OB_LINK ",
83         "P1_OB_LINK ",
84         "HOST_GPIO ",
85         "MBOX ",
86         "ERX2 ",
87         "SPARE ",
88         "JTAG ",
89         "MPU_INTPEND "
90 };
91
92 /* UE Status High CSR */
93 static const char * const ue_status_hi_desc[] = {
94         "LPCMEMHOST",
95         "MGMT_MAC",
96         "PCS0ONLINE",
97         "MPU_IRAM",
98         "PCS1ONLINE",
99         "PCTL0",
100         "PCTL1",
101         "PMEM",
102         "RR",
103         "TXPB",
104         "RXPP",
105         "XAUI",
106         "TXP",
107         "ARM",
108         "IPC",
109         "HOST2",
110         "HOST3",
111         "HOST4",
112         "HOST5",
113         "HOST6",
114         "HOST7",
115         "ECRC",
116         "Poison TLP",
117         "NETC",
118         "PERIPH",
119         "LLTXULP",
120         "D2P",
121         "RCON",
122         "LDMA",
123         "LLTXP",
124         "LLTXPB",
125         "Unknown"
126 };
127
128 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
129                                  BE_IF_FLAGS_BROADCAST | \
130                                  BE_IF_FLAGS_MULTICAST | \
131                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
132
133 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
134 {
135         struct be_dma_mem *mem = &q->dma_mem;
136
137         if (mem->va) {
138                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
139                                   mem->dma);
140                 mem->va = NULL;
141         }
142 }
143
144 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
145                           u16 len, u16 entry_size)
146 {
147         struct be_dma_mem *mem = &q->dma_mem;
148
149         memset(q, 0, sizeof(*q));
150         q->len = len;
151         q->entry_size = entry_size;
152         mem->size = len * entry_size;
153         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
154                                       GFP_KERNEL);
155         if (!mem->va)
156                 return -ENOMEM;
157         return 0;
158 }
159
160 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
161 {
162         u32 reg, enabled;
163
164         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
165                               &reg);
166         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
167
168         if (!enabled && enable)
169                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
170         else if (enabled && !enable)
171                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
172         else
173                 return;
174
175         pci_write_config_dword(adapter->pdev,
176                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
177 }
178
179 static void be_intr_set(struct be_adapter *adapter, bool enable)
180 {
181         int status = 0;
182
183         /* On lancer interrupts can't be controlled via this register */
184         if (lancer_chip(adapter))
185                 return;
186
187         if (be_check_error(adapter, BE_ERROR_EEH))
188                 return;
189
190         status = be_cmd_intr_set(adapter, enable);
191         if (status)
192                 be_reg_intr_set(adapter, enable);
193 }
194
195 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
196 {
197         u32 val = 0;
198
199         if (be_check_error(adapter, BE_ERROR_HW))
200                 return;
201
202         val |= qid & DB_RQ_RING_ID_MASK;
203         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
204
205         wmb();
206         iowrite32(val, adapter->db + DB_RQ_OFFSET);
207 }
208
209 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
210                           u16 posted)
211 {
212         u32 val = 0;
213
214         if (be_check_error(adapter, BE_ERROR_HW))
215                 return;
216
217         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
218         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
219
220         wmb();
221         iowrite32(val, adapter->db + txo->db_offset);
222 }
223
224 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
225                          bool arm, bool clear_int, u16 num_popped,
226                          u32 eq_delay_mult_enc)
227 {
228         u32 val = 0;
229
230         val |= qid & DB_EQ_RING_ID_MASK;
231         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
232
233         if (be_check_error(adapter, BE_ERROR_HW))
234                 return;
235
236         if (arm)
237                 val |= 1 << DB_EQ_REARM_SHIFT;
238         if (clear_int)
239                 val |= 1 << DB_EQ_CLR_SHIFT;
240         val |= 1 << DB_EQ_EVNT_SHIFT;
241         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
242         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
243         iowrite32(val, adapter->db + DB_EQ_OFFSET);
244 }
245
246 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
247 {
248         u32 val = 0;
249
250         val |= qid & DB_CQ_RING_ID_MASK;
251         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
252                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
253
254         if (be_check_error(adapter, BE_ERROR_HW))
255                 return;
256
257         if (arm)
258                 val |= 1 << DB_CQ_REARM_SHIFT;
259         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
260         iowrite32(val, adapter->db + DB_CQ_OFFSET);
261 }
262
263 static int be_mac_addr_set(struct net_device *netdev, void *p)
264 {
265         struct be_adapter *adapter = netdev_priv(netdev);
266         struct device *dev = &adapter->pdev->dev;
267         struct sockaddr *addr = p;
268         int status;
269         u8 mac[ETH_ALEN];
270         u32 old_pmac_id = adapter->pmac_id[0], curr_pmac_id = 0;
271
272         if (!is_valid_ether_addr(addr->sa_data))
273                 return -EADDRNOTAVAIL;
274
275         /* Proceed further only if, User provided MAC is different
276          * from active MAC
277          */
278         if (ether_addr_equal(addr->sa_data, netdev->dev_addr))
279                 return 0;
280
281         /* if device is not running, copy MAC to netdev->dev_addr */
282         if (!netif_running(netdev))
283                 goto done;
284
285         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
286          * privilege or if PF did not provision the new MAC address.
287          * On BE3, this cmd will always fail if the VF doesn't have the
288          * FILTMGMT privilege. This failure is OK, only if the PF programmed
289          * the MAC for the VF.
290          */
291         status = be_cmd_pmac_add(adapter, (u8 *)addr->sa_data,
292                                  adapter->if_handle, &adapter->pmac_id[0], 0);
293         if (!status) {
294                 curr_pmac_id = adapter->pmac_id[0];
295
296                 /* Delete the old programmed MAC. This call may fail if the
297                  * old MAC was already deleted by the PF driver.
298                  */
299                 if (adapter->pmac_id[0] != old_pmac_id)
300                         be_cmd_pmac_del(adapter, adapter->if_handle,
301                                         old_pmac_id, 0);
302         }
303
304         /* Decide if the new MAC is successfully activated only after
305          * querying the FW
306          */
307         status = be_cmd_get_active_mac(adapter, curr_pmac_id, mac,
308                                        adapter->if_handle, true, 0);
309         if (status)
310                 goto err;
311
312         /* The MAC change did not happen, either due to lack of privilege
313          * or PF didn't pre-provision.
314          */
315         if (!ether_addr_equal(addr->sa_data, mac)) {
316                 status = -EPERM;
317                 goto err;
318         }
319 done:
320         ether_addr_copy(netdev->dev_addr, addr->sa_data);
321         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
322         return 0;
323 err:
324         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
325         return status;
326 }
327
328 /* BE2 supports only v0 cmd */
329 static void *hw_stats_from_cmd(struct be_adapter *adapter)
330 {
331         if (BE2_chip(adapter)) {
332                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
333
334                 return &cmd->hw_stats;
335         } else if (BE3_chip(adapter)) {
336                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
337
338                 return &cmd->hw_stats;
339         } else {
340                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
341
342                 return &cmd->hw_stats;
343         }
344 }
345
346 /* BE2 supports only v0 cmd */
347 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
348 {
349         if (BE2_chip(adapter)) {
350                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
351
352                 return &hw_stats->erx;
353         } else if (BE3_chip(adapter)) {
354                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
355
356                 return &hw_stats->erx;
357         } else {
358                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
359
360                 return &hw_stats->erx;
361         }
362 }
363
364 static void populate_be_v0_stats(struct be_adapter *adapter)
365 {
366         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
367         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
368         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
369         struct be_port_rxf_stats_v0 *port_stats =
370                                         &rxf_stats->port[adapter->port_num];
371         struct be_drv_stats *drvs = &adapter->drv_stats;
372
373         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
374         drvs->rx_pause_frames = port_stats->rx_pause_frames;
375         drvs->rx_crc_errors = port_stats->rx_crc_errors;
376         drvs->rx_control_frames = port_stats->rx_control_frames;
377         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
378         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
379         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
380         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
381         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
382         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
383         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
384         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
385         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
386         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
387         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
388         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
389         drvs->rx_dropped_header_too_small =
390                 port_stats->rx_dropped_header_too_small;
391         drvs->rx_address_filtered =
392                                         port_stats->rx_address_filtered +
393                                         port_stats->rx_vlan_filtered;
394         drvs->rx_alignment_symbol_errors =
395                 port_stats->rx_alignment_symbol_errors;
396
397         drvs->tx_pauseframes = port_stats->tx_pauseframes;
398         drvs->tx_controlframes = port_stats->tx_controlframes;
399
400         if (adapter->port_num)
401                 drvs->jabber_events = rxf_stats->port1_jabber_events;
402         else
403                 drvs->jabber_events = rxf_stats->port0_jabber_events;
404         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
405         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
406         drvs->forwarded_packets = rxf_stats->forwarded_packets;
407         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
408         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
409         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
410         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
411 }
412
413 static void populate_be_v1_stats(struct be_adapter *adapter)
414 {
415         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
416         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
417         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
418         struct be_port_rxf_stats_v1 *port_stats =
419                                         &rxf_stats->port[adapter->port_num];
420         struct be_drv_stats *drvs = &adapter->drv_stats;
421
422         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
423         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
424         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
425         drvs->rx_pause_frames = port_stats->rx_pause_frames;
426         drvs->rx_crc_errors = port_stats->rx_crc_errors;
427         drvs->rx_control_frames = port_stats->rx_control_frames;
428         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
429         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
430         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
431         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
432         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
433         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
434         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
435         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
436         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
437         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
438         drvs->rx_dropped_header_too_small =
439                 port_stats->rx_dropped_header_too_small;
440         drvs->rx_input_fifo_overflow_drop =
441                 port_stats->rx_input_fifo_overflow_drop;
442         drvs->rx_address_filtered = port_stats->rx_address_filtered;
443         drvs->rx_alignment_symbol_errors =
444                 port_stats->rx_alignment_symbol_errors;
445         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
446         drvs->tx_pauseframes = port_stats->tx_pauseframes;
447         drvs->tx_controlframes = port_stats->tx_controlframes;
448         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
449         drvs->jabber_events = port_stats->jabber_events;
450         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
451         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
452         drvs->forwarded_packets = rxf_stats->forwarded_packets;
453         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
454         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
455         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
456         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
457 }
458
459 static void populate_be_v2_stats(struct be_adapter *adapter)
460 {
461         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
462         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
463         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
464         struct be_port_rxf_stats_v2 *port_stats =
465                                         &rxf_stats->port[adapter->port_num];
466         struct be_drv_stats *drvs = &adapter->drv_stats;
467
468         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
469         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
470         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
471         drvs->rx_pause_frames = port_stats->rx_pause_frames;
472         drvs->rx_crc_errors = port_stats->rx_crc_errors;
473         drvs->rx_control_frames = port_stats->rx_control_frames;
474         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
475         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
476         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
477         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
478         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
479         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
480         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
481         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
482         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
483         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
484         drvs->rx_dropped_header_too_small =
485                 port_stats->rx_dropped_header_too_small;
486         drvs->rx_input_fifo_overflow_drop =
487                 port_stats->rx_input_fifo_overflow_drop;
488         drvs->rx_address_filtered = port_stats->rx_address_filtered;
489         drvs->rx_alignment_symbol_errors =
490                 port_stats->rx_alignment_symbol_errors;
491         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
492         drvs->tx_pauseframes = port_stats->tx_pauseframes;
493         drvs->tx_controlframes = port_stats->tx_controlframes;
494         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
495         drvs->jabber_events = port_stats->jabber_events;
496         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
497         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
498         drvs->forwarded_packets = rxf_stats->forwarded_packets;
499         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
500         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
501         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
502         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
503         if (be_roce_supported(adapter)) {
504                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
505                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
506                 drvs->rx_roce_frames = port_stats->roce_frames_received;
507                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
508                 drvs->roce_drops_payload_len =
509                         port_stats->roce_drops_payload_len;
510         }
511 }
512
513 static void populate_lancer_stats(struct be_adapter *adapter)
514 {
515         struct be_drv_stats *drvs = &adapter->drv_stats;
516         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
517
518         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
519         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
520         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
521         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
522         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
523         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
524         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
525         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
526         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
527         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
528         drvs->rx_dropped_tcp_length =
529                                 pport_stats->rx_dropped_invalid_tcp_length;
530         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
531         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
532         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
533         drvs->rx_dropped_header_too_small =
534                                 pport_stats->rx_dropped_header_too_small;
535         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
536         drvs->rx_address_filtered =
537                                         pport_stats->rx_address_filtered +
538                                         pport_stats->rx_vlan_filtered;
539         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
540         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
541         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
542         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
543         drvs->jabber_events = pport_stats->rx_jabbers;
544         drvs->forwarded_packets = pport_stats->num_forwards_lo;
545         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
546         drvs->rx_drops_too_many_frags =
547                                 pport_stats->rx_drops_too_many_frags_lo;
548 }
549
550 static void accumulate_16bit_val(u32 *acc, u16 val)
551 {
552 #define lo(x)                   (x & 0xFFFF)
553 #define hi(x)                   (x & 0xFFFF0000)
554         bool wrapped = val < lo(*acc);
555         u32 newacc = hi(*acc) + val;
556
557         if (wrapped)
558                 newacc += 65536;
559         ACCESS_ONCE(*acc) = newacc;
560 }
561
562 static void populate_erx_stats(struct be_adapter *adapter,
563                                struct be_rx_obj *rxo, u32 erx_stat)
564 {
565         if (!BEx_chip(adapter))
566                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
567         else
568                 /* below erx HW counter can actually wrap around after
569                  * 65535. Driver accumulates a 32-bit value
570                  */
571                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
572                                      (u16)erx_stat);
573 }
574
575 void be_parse_stats(struct be_adapter *adapter)
576 {
577         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
578         struct be_rx_obj *rxo;
579         int i;
580         u32 erx_stat;
581
582         if (lancer_chip(adapter)) {
583                 populate_lancer_stats(adapter);
584         } else {
585                 if (BE2_chip(adapter))
586                         populate_be_v0_stats(adapter);
587                 else if (BE3_chip(adapter))
588                         /* for BE3 */
589                         populate_be_v1_stats(adapter);
590                 else
591                         populate_be_v2_stats(adapter);
592
593                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
594                 for_all_rx_queues(adapter, rxo, i) {
595                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
596                         populate_erx_stats(adapter, rxo, erx_stat);
597                 }
598         }
599 }
600
601 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
602                                                 struct rtnl_link_stats64 *stats)
603 {
604         struct be_adapter *adapter = netdev_priv(netdev);
605         struct be_drv_stats *drvs = &adapter->drv_stats;
606         struct be_rx_obj *rxo;
607         struct be_tx_obj *txo;
608         u64 pkts, bytes;
609         unsigned int start;
610         int i;
611
612         for_all_rx_queues(adapter, rxo, i) {
613                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
614
615                 do {
616                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
617                         pkts = rx_stats(rxo)->rx_pkts;
618                         bytes = rx_stats(rxo)->rx_bytes;
619                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
620                 stats->rx_packets += pkts;
621                 stats->rx_bytes += bytes;
622                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
623                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
624                                         rx_stats(rxo)->rx_drops_no_frags;
625         }
626
627         for_all_tx_queues(adapter, txo, i) {
628                 const struct be_tx_stats *tx_stats = tx_stats(txo);
629
630                 do {
631                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
632                         pkts = tx_stats(txo)->tx_pkts;
633                         bytes = tx_stats(txo)->tx_bytes;
634                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
635                 stats->tx_packets += pkts;
636                 stats->tx_bytes += bytes;
637         }
638
639         /* bad pkts received */
640         stats->rx_errors = drvs->rx_crc_errors +
641                 drvs->rx_alignment_symbol_errors +
642                 drvs->rx_in_range_errors +
643                 drvs->rx_out_range_errors +
644                 drvs->rx_frame_too_long +
645                 drvs->rx_dropped_too_small +
646                 drvs->rx_dropped_too_short +
647                 drvs->rx_dropped_header_too_small +
648                 drvs->rx_dropped_tcp_length +
649                 drvs->rx_dropped_runt;
650
651         /* detailed rx errors */
652         stats->rx_length_errors = drvs->rx_in_range_errors +
653                 drvs->rx_out_range_errors +
654                 drvs->rx_frame_too_long;
655
656         stats->rx_crc_errors = drvs->rx_crc_errors;
657
658         /* frame alignment errors */
659         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
660
661         /* receiver fifo overrun */
662         /* drops_no_pbuf is no per i/f, it's per BE card */
663         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
664                                 drvs->rx_input_fifo_overflow_drop +
665                                 drvs->rx_drops_no_pbuf;
666         return stats;
667 }
668
669 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
670 {
671         struct net_device *netdev = adapter->netdev;
672
673         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
674                 netif_carrier_off(netdev);
675                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
676         }
677
678         if (link_status)
679                 netif_carrier_on(netdev);
680         else
681                 netif_carrier_off(netdev);
682
683         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
684 }
685
686 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
687 {
688         struct be_tx_stats *stats = tx_stats(txo);
689         u64 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
690
691         u64_stats_update_begin(&stats->sync);
692         stats->tx_reqs++;
693         stats->tx_bytes += skb->len;
694         stats->tx_pkts += tx_pkts;
695         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
696                 stats->tx_vxlan_offload_pkts += tx_pkts;
697         u64_stats_update_end(&stats->sync);
698 }
699
700 /* Returns number of WRBs needed for the skb */
701 static u32 skb_wrb_cnt(struct sk_buff *skb)
702 {
703         /* +1 for the header wrb */
704         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
705 }
706
707 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
708 {
709         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
710         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
711         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
712         wrb->rsvd0 = 0;
713 }
714
715 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
716  * to avoid the swap and shift/mask operations in wrb_fill().
717  */
718 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
719 {
720         wrb->frag_pa_hi = 0;
721         wrb->frag_pa_lo = 0;
722         wrb->frag_len = 0;
723         wrb->rsvd0 = 0;
724 }
725
726 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
727                                      struct sk_buff *skb)
728 {
729         u8 vlan_prio;
730         u16 vlan_tag;
731
732         vlan_tag = skb_vlan_tag_get(skb);
733         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
734         /* If vlan priority provided by OS is NOT in available bmap */
735         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
736                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
737                                 adapter->recommended_prio_bits;
738
739         return vlan_tag;
740 }
741
742 /* Used only for IP tunnel packets */
743 static u16 skb_inner_ip_proto(struct sk_buff *skb)
744 {
745         return (inner_ip_hdr(skb)->version == 4) ?
746                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
747 }
748
749 static u16 skb_ip_proto(struct sk_buff *skb)
750 {
751         return (ip_hdr(skb)->version == 4) ?
752                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
753 }
754
755 static inline bool be_is_txq_full(struct be_tx_obj *txo)
756 {
757         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
758 }
759
760 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
761 {
762         return atomic_read(&txo->q.used) < txo->q.len / 2;
763 }
764
765 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
766 {
767         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
768 }
769
770 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
771                                        struct sk_buff *skb,
772                                        struct be_wrb_params *wrb_params)
773 {
774         u16 proto;
775
776         if (skb_is_gso(skb)) {
777                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
778                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
779                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
780                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
781         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
782                 if (skb->encapsulation) {
783                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
784                         proto = skb_inner_ip_proto(skb);
785                 } else {
786                         proto = skb_ip_proto(skb);
787                 }
788                 if (proto == IPPROTO_TCP)
789                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
790                 else if (proto == IPPROTO_UDP)
791                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
792         }
793
794         if (skb_vlan_tag_present(skb)) {
795                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
796                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
797         }
798
799         BE_WRB_F_SET(wrb_params->features, CRC, 1);
800 }
801
802 static void wrb_fill_hdr(struct be_adapter *adapter,
803                          struct be_eth_hdr_wrb *hdr,
804                          struct be_wrb_params *wrb_params,
805                          struct sk_buff *skb)
806 {
807         memset(hdr, 0, sizeof(*hdr));
808
809         SET_TX_WRB_HDR_BITS(crc, hdr,
810                             BE_WRB_F_GET(wrb_params->features, CRC));
811         SET_TX_WRB_HDR_BITS(ipcs, hdr,
812                             BE_WRB_F_GET(wrb_params->features, IPCS));
813         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
814                             BE_WRB_F_GET(wrb_params->features, TCPCS));
815         SET_TX_WRB_HDR_BITS(udpcs, hdr,
816                             BE_WRB_F_GET(wrb_params->features, UDPCS));
817
818         SET_TX_WRB_HDR_BITS(lso, hdr,
819                             BE_WRB_F_GET(wrb_params->features, LSO));
820         SET_TX_WRB_HDR_BITS(lso6, hdr,
821                             BE_WRB_F_GET(wrb_params->features, LSO6));
822         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
823
824         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
825          * hack is not needed, the evt bit is set while ringing DB.
826          */
827         SET_TX_WRB_HDR_BITS(event, hdr,
828                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
829         SET_TX_WRB_HDR_BITS(vlan, hdr,
830                             BE_WRB_F_GET(wrb_params->features, VLAN));
831         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
832
833         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
834         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
835         SET_TX_WRB_HDR_BITS(mgmt, hdr,
836                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
837 }
838
839 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
840                           bool unmap_single)
841 {
842         dma_addr_t dma;
843         u32 frag_len = le32_to_cpu(wrb->frag_len);
844
845
846         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
847                 (u64)le32_to_cpu(wrb->frag_pa_lo);
848         if (frag_len) {
849                 if (unmap_single)
850                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
851                 else
852                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
853         }
854 }
855
856 /* Grab a WRB header for xmit */
857 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
858 {
859         u32 head = txo->q.head;
860
861         queue_head_inc(&txo->q);
862         return head;
863 }
864
865 /* Set up the WRB header for xmit */
866 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
867                                 struct be_tx_obj *txo,
868                                 struct be_wrb_params *wrb_params,
869                                 struct sk_buff *skb, u16 head)
870 {
871         u32 num_frags = skb_wrb_cnt(skb);
872         struct be_queue_info *txq = &txo->q;
873         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
874
875         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
876         be_dws_cpu_to_le(hdr, sizeof(*hdr));
877
878         BUG_ON(txo->sent_skb_list[head]);
879         txo->sent_skb_list[head] = skb;
880         txo->last_req_hdr = head;
881         atomic_add(num_frags, &txq->used);
882         txo->last_req_wrb_cnt = num_frags;
883         txo->pend_wrb_cnt += num_frags;
884 }
885
886 /* Setup a WRB fragment (buffer descriptor) for xmit */
887 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
888                                  int len)
889 {
890         struct be_eth_wrb *wrb;
891         struct be_queue_info *txq = &txo->q;
892
893         wrb = queue_head_node(txq);
894         wrb_fill(wrb, busaddr, len);
895         queue_head_inc(txq);
896 }
897
898 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
899  * was invoked. The producer index is restored to the previous packet and the
900  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
901  */
902 static void be_xmit_restore(struct be_adapter *adapter,
903                             struct be_tx_obj *txo, u32 head, bool map_single,
904                             u32 copied)
905 {
906         struct device *dev;
907         struct be_eth_wrb *wrb;
908         struct be_queue_info *txq = &txo->q;
909
910         dev = &adapter->pdev->dev;
911         txq->head = head;
912
913         /* skip the first wrb (hdr); it's not mapped */
914         queue_head_inc(txq);
915         while (copied) {
916                 wrb = queue_head_node(txq);
917                 unmap_tx_frag(dev, wrb, map_single);
918                 map_single = false;
919                 copied -= le32_to_cpu(wrb->frag_len);
920                 queue_head_inc(txq);
921         }
922
923         txq->head = head;
924 }
925
926 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
927  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
928  * of WRBs used up by the packet.
929  */
930 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
931                            struct sk_buff *skb,
932                            struct be_wrb_params *wrb_params)
933 {
934         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
935         struct device *dev = &adapter->pdev->dev;
936         struct be_queue_info *txq = &txo->q;
937         bool map_single = false;
938         u32 head = txq->head;
939         dma_addr_t busaddr;
940         int len;
941
942         head = be_tx_get_wrb_hdr(txo);
943
944         if (skb->len > skb->data_len) {
945                 len = skb_headlen(skb);
946
947                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
948                 if (dma_mapping_error(dev, busaddr))
949                         goto dma_err;
950                 map_single = true;
951                 be_tx_setup_wrb_frag(txo, busaddr, len);
952                 copied += len;
953         }
954
955         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
956                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
957                 len = skb_frag_size(frag);
958
959                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
960                 if (dma_mapping_error(dev, busaddr))
961                         goto dma_err;
962                 be_tx_setup_wrb_frag(txo, busaddr, len);
963                 copied += len;
964         }
965
966         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
967
968         be_tx_stats_update(txo, skb);
969         return wrb_cnt;
970
971 dma_err:
972         adapter->drv_stats.dma_map_errors++;
973         be_xmit_restore(adapter, txo, head, map_single, copied);
974         return 0;
975 }
976
977 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
978 {
979         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
980 }
981
982 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
983                                              struct sk_buff *skb,
984                                              struct be_wrb_params
985                                              *wrb_params)
986 {
987         u16 vlan_tag = 0;
988
989         skb = skb_share_check(skb, GFP_ATOMIC);
990         if (unlikely(!skb))
991                 return skb;
992
993         if (skb_vlan_tag_present(skb))
994                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
995
996         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
997                 if (!vlan_tag)
998                         vlan_tag = adapter->pvid;
999                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1000                  * skip VLAN insertion
1001                  */
1002                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1003         }
1004
1005         if (vlan_tag) {
1006                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1007                                                 vlan_tag);
1008                 if (unlikely(!skb))
1009                         return skb;
1010                 skb->vlan_tci = 0;
1011         }
1012
1013         /* Insert the outer VLAN, if any */
1014         if (adapter->qnq_vid) {
1015                 vlan_tag = adapter->qnq_vid;
1016                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1017                                                 vlan_tag);
1018                 if (unlikely(!skb))
1019                         return skb;
1020                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1021         }
1022
1023         return skb;
1024 }
1025
1026 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1027 {
1028         struct ethhdr *eh = (struct ethhdr *)skb->data;
1029         u16 offset = ETH_HLEN;
1030
1031         if (eh->h_proto == htons(ETH_P_IPV6)) {
1032                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1033
1034                 offset += sizeof(struct ipv6hdr);
1035                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1036                     ip6h->nexthdr != NEXTHDR_UDP) {
1037                         struct ipv6_opt_hdr *ehdr =
1038                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1039
1040                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1041                         if (ehdr->hdrlen == 0xff)
1042                                 return true;
1043                 }
1044         }
1045         return false;
1046 }
1047
1048 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1049 {
1050         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1051 }
1052
1053 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1054 {
1055         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1056 }
1057
1058 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1059                                                   struct sk_buff *skb,
1060                                                   struct be_wrb_params
1061                                                   *wrb_params)
1062 {
1063         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1064         unsigned int eth_hdr_len;
1065         struct iphdr *ip;
1066
1067         /* For padded packets, BE HW modifies tot_len field in IP header
1068          * incorrecly when VLAN tag is inserted by HW.
1069          * For padded packets, Lancer computes incorrect checksum.
1070          */
1071         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1072                                                 VLAN_ETH_HLEN : ETH_HLEN;
1073         if (skb->len <= 60 &&
1074             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1075             is_ipv4_pkt(skb)) {
1076                 ip = (struct iphdr *)ip_hdr(skb);
1077                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1078         }
1079
1080         /* If vlan tag is already inlined in the packet, skip HW VLAN
1081          * tagging in pvid-tagging mode
1082          */
1083         if (be_pvid_tagging_enabled(adapter) &&
1084             veh->h_vlan_proto == htons(ETH_P_8021Q))
1085                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1086
1087         /* HW has a bug wherein it will calculate CSUM for VLAN
1088          * pkts even though it is disabled.
1089          * Manually insert VLAN in pkt.
1090          */
1091         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1092             skb_vlan_tag_present(skb)) {
1093                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1094                 if (unlikely(!skb))
1095                         goto err;
1096         }
1097
1098         /* HW may lockup when VLAN HW tagging is requested on
1099          * certain ipv6 packets. Drop such pkts if the HW workaround to
1100          * skip HW tagging is not enabled by FW.
1101          */
1102         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1103                      (adapter->pvid || adapter->qnq_vid) &&
1104                      !qnq_async_evt_rcvd(adapter)))
1105                 goto tx_drop;
1106
1107         /* Manual VLAN tag insertion to prevent:
1108          * ASIC lockup when the ASIC inserts VLAN tag into
1109          * certain ipv6 packets. Insert VLAN tags in driver,
1110          * and set event, completion, vlan bits accordingly
1111          * in the Tx WRB.
1112          */
1113         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1114             be_vlan_tag_tx_chk(adapter, skb)) {
1115                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1116                 if (unlikely(!skb))
1117                         goto err;
1118         }
1119
1120         return skb;
1121 tx_drop:
1122         dev_kfree_skb_any(skb);
1123 err:
1124         return NULL;
1125 }
1126
1127 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1128                                            struct sk_buff *skb,
1129                                            struct be_wrb_params *wrb_params)
1130 {
1131         int err;
1132
1133         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1134          * packets that are 32b or less may cause a transmit stall
1135          * on that port. The workaround is to pad such packets
1136          * (len <= 32 bytes) to a minimum length of 36b.
1137          */
1138         if (skb->len <= 32) {
1139                 if (skb_put_padto(skb, 36))
1140                         return NULL;
1141         }
1142
1143         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1144                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1145                 if (!skb)
1146                         return NULL;
1147         }
1148
1149         /* The stack can send us skbs with length greater than
1150          * what the HW can handle. Trim the extra bytes.
1151          */
1152         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1153         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1154         WARN_ON(err);
1155
1156         return skb;
1157 }
1158
1159 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1160 {
1161         struct be_queue_info *txq = &txo->q;
1162         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1163
1164         /* Mark the last request eventable if it hasn't been marked already */
1165         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1166                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1167
1168         /* compose a dummy wrb if there are odd set of wrbs to notify */
1169         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1170                 wrb_fill_dummy(queue_head_node(txq));
1171                 queue_head_inc(txq);
1172                 atomic_inc(&txq->used);
1173                 txo->pend_wrb_cnt++;
1174                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1175                                            TX_HDR_WRB_NUM_SHIFT);
1176                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1177                                           TX_HDR_WRB_NUM_SHIFT);
1178         }
1179         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1180         txo->pend_wrb_cnt = 0;
1181 }
1182
1183 /* OS2BMC related */
1184
1185 #define DHCP_CLIENT_PORT        68
1186 #define DHCP_SERVER_PORT        67
1187 #define NET_BIOS_PORT1          137
1188 #define NET_BIOS_PORT2          138
1189 #define DHCPV6_RAS_PORT         547
1190
1191 #define is_mc_allowed_on_bmc(adapter, eh)       \
1192         (!is_multicast_filt_enabled(adapter) && \
1193          is_multicast_ether_addr(eh->h_dest) && \
1194          !is_broadcast_ether_addr(eh->h_dest))
1195
1196 #define is_bc_allowed_on_bmc(adapter, eh)       \
1197         (!is_broadcast_filt_enabled(adapter) && \
1198          is_broadcast_ether_addr(eh->h_dest))
1199
1200 #define is_arp_allowed_on_bmc(adapter, skb)     \
1201         (is_arp(skb) && is_arp_filt_enabled(adapter))
1202
1203 #define is_broadcast_packet(eh, adapter)        \
1204                 (is_multicast_ether_addr(eh->h_dest) && \
1205                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1206
1207 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1208
1209 #define is_arp_filt_enabled(adapter)    \
1210                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1211
1212 #define is_dhcp_client_filt_enabled(adapter)    \
1213                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1214
1215 #define is_dhcp_srvr_filt_enabled(adapter)      \
1216                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1217
1218 #define is_nbios_filt_enabled(adapter)  \
1219                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1220
1221 #define is_ipv6_na_filt_enabled(adapter)        \
1222                 (adapter->bmc_filt_mask &       \
1223                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1224
1225 #define is_ipv6_ra_filt_enabled(adapter)        \
1226                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1227
1228 #define is_ipv6_ras_filt_enabled(adapter)       \
1229                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1230
1231 #define is_broadcast_filt_enabled(adapter)      \
1232                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1233
1234 #define is_multicast_filt_enabled(adapter)      \
1235                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1236
1237 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1238                                struct sk_buff **skb)
1239 {
1240         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1241         bool os2bmc = false;
1242
1243         if (!be_is_os2bmc_enabled(adapter))
1244                 goto done;
1245
1246         if (!is_multicast_ether_addr(eh->h_dest))
1247                 goto done;
1248
1249         if (is_mc_allowed_on_bmc(adapter, eh) ||
1250             is_bc_allowed_on_bmc(adapter, eh) ||
1251             is_arp_allowed_on_bmc(adapter, (*skb))) {
1252                 os2bmc = true;
1253                 goto done;
1254         }
1255
1256         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1257                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1258                 u8 nexthdr = hdr->nexthdr;
1259
1260                 if (nexthdr == IPPROTO_ICMPV6) {
1261                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1262
1263                         switch (icmp6->icmp6_type) {
1264                         case NDISC_ROUTER_ADVERTISEMENT:
1265                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1266                                 goto done;
1267                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1268                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1269                                 goto done;
1270                         default:
1271                                 break;
1272                         }
1273                 }
1274         }
1275
1276         if (is_udp_pkt((*skb))) {
1277                 struct udphdr *udp = udp_hdr((*skb));
1278
1279                 switch (ntohs(udp->dest)) {
1280                 case DHCP_CLIENT_PORT:
1281                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1282                         goto done;
1283                 case DHCP_SERVER_PORT:
1284                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1285                         goto done;
1286                 case NET_BIOS_PORT1:
1287                 case NET_BIOS_PORT2:
1288                         os2bmc = is_nbios_filt_enabled(adapter);
1289                         goto done;
1290                 case DHCPV6_RAS_PORT:
1291                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1292                         goto done;
1293                 default:
1294                         break;
1295                 }
1296         }
1297 done:
1298         /* For packets over a vlan, which are destined
1299          * to BMC, asic expects the vlan to be inline in the packet.
1300          */
1301         if (os2bmc)
1302                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1303
1304         return os2bmc;
1305 }
1306
1307 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1308 {
1309         struct be_adapter *adapter = netdev_priv(netdev);
1310         u16 q_idx = skb_get_queue_mapping(skb);
1311         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1312         struct be_wrb_params wrb_params = { 0 };
1313         bool flush = !skb->xmit_more;
1314         u16 wrb_cnt;
1315
1316         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1317         if (unlikely(!skb))
1318                 goto drop;
1319
1320         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1321
1322         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1323         if (unlikely(!wrb_cnt)) {
1324                 dev_kfree_skb_any(skb);
1325                 goto drop;
1326         }
1327
1328         /* if os2bmc is enabled and if the pkt is destined to bmc,
1329          * enqueue the pkt a 2nd time with mgmt bit set.
1330          */
1331         if (be_send_pkt_to_bmc(adapter, &skb)) {
1332                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1333                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1334                 if (unlikely(!wrb_cnt))
1335                         goto drop;
1336                 else
1337                         skb_get(skb);
1338         }
1339
1340         if (be_is_txq_full(txo)) {
1341                 netif_stop_subqueue(netdev, q_idx);
1342                 tx_stats(txo)->tx_stops++;
1343         }
1344
1345         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1346                 be_xmit_flush(adapter, txo);
1347
1348         return NETDEV_TX_OK;
1349 drop:
1350         tx_stats(txo)->tx_drv_drops++;
1351         /* Flush the already enqueued tx requests */
1352         if (flush && txo->pend_wrb_cnt)
1353                 be_xmit_flush(adapter, txo);
1354
1355         return NETDEV_TX_OK;
1356 }
1357
1358 static int be_change_mtu(struct net_device *netdev, int new_mtu)
1359 {
1360         struct be_adapter *adapter = netdev_priv(netdev);
1361         struct device *dev = &adapter->pdev->dev;
1362
1363         if (new_mtu < BE_MIN_MTU || new_mtu > BE_MAX_MTU) {
1364                 dev_info(dev, "MTU must be between %d and %d bytes\n",
1365                          BE_MIN_MTU, BE_MAX_MTU);
1366                 return -EINVAL;
1367         }
1368
1369         dev_info(dev, "MTU changed from %d to %d bytes\n",
1370                  netdev->mtu, new_mtu);
1371         netdev->mtu = new_mtu;
1372         return 0;
1373 }
1374
1375 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1376 {
1377         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1378                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1379 }
1380
1381 static int be_set_vlan_promisc(struct be_adapter *adapter)
1382 {
1383         struct device *dev = &adapter->pdev->dev;
1384         int status;
1385
1386         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1387                 return 0;
1388
1389         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1390         if (!status) {
1391                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1392                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1393         } else {
1394                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1395         }
1396         return status;
1397 }
1398
1399 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1400 {
1401         struct device *dev = &adapter->pdev->dev;
1402         int status;
1403
1404         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1405         if (!status) {
1406                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1407                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1408         }
1409         return status;
1410 }
1411
1412 /*
1413  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1414  * If the user configures more, place BE in vlan promiscuous mode.
1415  */
1416 static int be_vid_config(struct be_adapter *adapter)
1417 {
1418         struct device *dev = &adapter->pdev->dev;
1419         u16 vids[BE_NUM_VLANS_SUPPORTED];
1420         u16 num = 0, i = 0;
1421         int status = 0;
1422
1423         /* No need to further configure vids if in promiscuous mode */
1424         if (be_in_all_promisc(adapter))
1425                 return 0;
1426
1427         if (adapter->vlans_added > be_max_vlans(adapter))
1428                 return be_set_vlan_promisc(adapter);
1429
1430         /* Construct VLAN Table to give to HW */
1431         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1432                 vids[num++] = cpu_to_le16(i);
1433
1434         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1435         if (status) {
1436                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1437                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1438                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1439                     addl_status(status) ==
1440                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1441                         return be_set_vlan_promisc(adapter);
1442         } else if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1443                 status = be_clear_vlan_promisc(adapter);
1444         }
1445         return status;
1446 }
1447
1448 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1449 {
1450         struct be_adapter *adapter = netdev_priv(netdev);
1451         int status = 0;
1452
1453         /* Packets with VID 0 are always received by Lancer by default */
1454         if (lancer_chip(adapter) && vid == 0)
1455                 return status;
1456
1457         if (test_bit(vid, adapter->vids))
1458                 return status;
1459
1460         set_bit(vid, adapter->vids);
1461         adapter->vlans_added++;
1462
1463         status = be_vid_config(adapter);
1464         if (status) {
1465                 adapter->vlans_added--;
1466                 clear_bit(vid, adapter->vids);
1467         }
1468
1469         return status;
1470 }
1471
1472 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1473 {
1474         struct be_adapter *adapter = netdev_priv(netdev);
1475
1476         /* Packets with VID 0 are always received by Lancer by default */
1477         if (lancer_chip(adapter) && vid == 0)
1478                 return 0;
1479
1480         if (!test_bit(vid, adapter->vids))
1481                 return 0;
1482
1483         clear_bit(vid, adapter->vids);
1484         adapter->vlans_added--;
1485
1486         return be_vid_config(adapter);
1487 }
1488
1489 static void be_clear_all_promisc(struct be_adapter *adapter)
1490 {
1491         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, OFF);
1492         adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
1493 }
1494
1495 static void be_set_all_promisc(struct be_adapter *adapter)
1496 {
1497         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1498         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1499 }
1500
1501 static void be_set_mc_promisc(struct be_adapter *adapter)
1502 {
1503         int status;
1504
1505         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1506                 return;
1507
1508         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1509         if (!status)
1510                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1511 }
1512
1513 static void be_set_mc_list(struct be_adapter *adapter)
1514 {
1515         int status;
1516
1517         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1518         if (!status)
1519                 adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1520         else
1521                 be_set_mc_promisc(adapter);
1522 }
1523
1524 static void be_set_uc_list(struct be_adapter *adapter)
1525 {
1526         struct netdev_hw_addr *ha;
1527         int i = 1; /* First slot is claimed by the Primary MAC */
1528
1529         for (; adapter->uc_macs > 0; adapter->uc_macs--, i++)
1530                 be_cmd_pmac_del(adapter, adapter->if_handle,
1531                                 adapter->pmac_id[i], 0);
1532
1533         if (netdev_uc_count(adapter->netdev) > be_max_uc(adapter)) {
1534                 be_set_all_promisc(adapter);
1535                 return;
1536         }
1537
1538         netdev_for_each_uc_addr(ha, adapter->netdev) {
1539                 adapter->uc_macs++; /* First slot is for Primary MAC */
1540                 be_cmd_pmac_add(adapter, (u8 *)ha->addr, adapter->if_handle,
1541                                 &adapter->pmac_id[adapter->uc_macs], 0);
1542         }
1543 }
1544
1545 static void be_clear_uc_list(struct be_adapter *adapter)
1546 {
1547         int i;
1548
1549         for (i = 1; i < (adapter->uc_macs + 1); i++)
1550                 be_cmd_pmac_del(adapter, adapter->if_handle,
1551                                 adapter->pmac_id[i], 0);
1552         adapter->uc_macs = 0;
1553 }
1554
1555 static void be_set_rx_mode(struct net_device *netdev)
1556 {
1557         struct be_adapter *adapter = netdev_priv(netdev);
1558
1559         if (netdev->flags & IFF_PROMISC) {
1560                 be_set_all_promisc(adapter);
1561                 return;
1562         }
1563
1564         /* Interface was previously in promiscuous mode; disable it */
1565         if (be_in_all_promisc(adapter)) {
1566                 be_clear_all_promisc(adapter);
1567                 if (adapter->vlans_added)
1568                         be_vid_config(adapter);
1569         }
1570
1571         /* Enable multicast promisc if num configured exceeds what we support */
1572         if (netdev->flags & IFF_ALLMULTI ||
1573             netdev_mc_count(netdev) > be_max_mc(adapter)) {
1574                 be_set_mc_promisc(adapter);
1575                 return;
1576         }
1577
1578         if (netdev_uc_count(netdev) != adapter->uc_macs)
1579                 be_set_uc_list(adapter);
1580
1581         be_set_mc_list(adapter);
1582 }
1583
1584 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1585 {
1586         struct be_adapter *adapter = netdev_priv(netdev);
1587         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1588         int status;
1589
1590         if (!sriov_enabled(adapter))
1591                 return -EPERM;
1592
1593         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1594                 return -EINVAL;
1595
1596         /* Proceed further only if user provided MAC is different
1597          * from active MAC
1598          */
1599         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1600                 return 0;
1601
1602         if (BEx_chip(adapter)) {
1603                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1604                                 vf + 1);
1605
1606                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1607                                          &vf_cfg->pmac_id, vf + 1);
1608         } else {
1609                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1610                                         vf + 1);
1611         }
1612
1613         if (status) {
1614                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1615                         mac, vf, status);
1616                 return be_cmd_status(status);
1617         }
1618
1619         ether_addr_copy(vf_cfg->mac_addr, mac);
1620
1621         return 0;
1622 }
1623
1624 static int be_get_vf_config(struct net_device *netdev, int vf,
1625                             struct ifla_vf_info *vi)
1626 {
1627         struct be_adapter *adapter = netdev_priv(netdev);
1628         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1629
1630         if (!sriov_enabled(adapter))
1631                 return -EPERM;
1632
1633         if (vf >= adapter->num_vfs)
1634                 return -EINVAL;
1635
1636         vi->vf = vf;
1637         vi->max_tx_rate = vf_cfg->tx_rate;
1638         vi->min_tx_rate = 0;
1639         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1640         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1641         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1642         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1643         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1644
1645         return 0;
1646 }
1647
1648 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1649 {
1650         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1651         u16 vids[BE_NUM_VLANS_SUPPORTED];
1652         int vf_if_id = vf_cfg->if_handle;
1653         int status;
1654
1655         /* Enable Transparent VLAN Tagging */
1656         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1657         if (status)
1658                 return status;
1659
1660         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1661         vids[0] = 0;
1662         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1663         if (!status)
1664                 dev_info(&adapter->pdev->dev,
1665                          "Cleared guest VLANs on VF%d", vf);
1666
1667         /* After TVT is enabled, disallow VFs to program VLAN filters */
1668         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1669                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1670                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1671                 if (!status)
1672                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1673         }
1674         return 0;
1675 }
1676
1677 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1678 {
1679         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1680         struct device *dev = &adapter->pdev->dev;
1681         int status;
1682
1683         /* Reset Transparent VLAN Tagging. */
1684         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1685                                        vf_cfg->if_handle, 0, 0);
1686         if (status)
1687                 return status;
1688
1689         /* Allow VFs to program VLAN filtering */
1690         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1691                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1692                                                   BE_PRIV_FILTMGMT, vf + 1);
1693                 if (!status) {
1694                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1695                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1696                 }
1697         }
1698
1699         dev_info(dev,
1700                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1701         return 0;
1702 }
1703
1704 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
1705 {
1706         struct be_adapter *adapter = netdev_priv(netdev);
1707         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1708         int status;
1709
1710         if (!sriov_enabled(adapter))
1711                 return -EPERM;
1712
1713         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1714                 return -EINVAL;
1715
1716         if (vlan || qos) {
1717                 vlan |= qos << VLAN_PRIO_SHIFT;
1718                 status = be_set_vf_tvt(adapter, vf, vlan);
1719         } else {
1720                 status = be_clear_vf_tvt(adapter, vf);
1721         }
1722
1723         if (status) {
1724                 dev_err(&adapter->pdev->dev,
1725                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1726                         status);
1727                 return be_cmd_status(status);
1728         }
1729
1730         vf_cfg->vlan_tag = vlan;
1731         return 0;
1732 }
1733
1734 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1735                              int min_tx_rate, int max_tx_rate)
1736 {
1737         struct be_adapter *adapter = netdev_priv(netdev);
1738         struct device *dev = &adapter->pdev->dev;
1739         int percent_rate, status = 0;
1740         u16 link_speed = 0;
1741         u8 link_status;
1742
1743         if (!sriov_enabled(adapter))
1744                 return -EPERM;
1745
1746         if (vf >= adapter->num_vfs)
1747                 return -EINVAL;
1748
1749         if (min_tx_rate)
1750                 return -EINVAL;
1751
1752         if (!max_tx_rate)
1753                 goto config_qos;
1754
1755         status = be_cmd_link_status_query(adapter, &link_speed,
1756                                           &link_status, 0);
1757         if (status)
1758                 goto err;
1759
1760         if (!link_status) {
1761                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1762                 status = -ENETDOWN;
1763                 goto err;
1764         }
1765
1766         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1767                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1768                         link_speed);
1769                 status = -EINVAL;
1770                 goto err;
1771         }
1772
1773         /* On Skyhawk the QOS setting must be done only as a % value */
1774         percent_rate = link_speed / 100;
1775         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1776                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1777                         percent_rate);
1778                 status = -EINVAL;
1779                 goto err;
1780         }
1781
1782 config_qos:
1783         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1784         if (status)
1785                 goto err;
1786
1787         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1788         return 0;
1789
1790 err:
1791         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1792                 max_tx_rate, vf);
1793         return be_cmd_status(status);
1794 }
1795
1796 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1797                                 int link_state)
1798 {
1799         struct be_adapter *adapter = netdev_priv(netdev);
1800         int status;
1801
1802         if (!sriov_enabled(adapter))
1803                 return -EPERM;
1804
1805         if (vf >= adapter->num_vfs)
1806                 return -EINVAL;
1807
1808         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
1809         if (status) {
1810                 dev_err(&adapter->pdev->dev,
1811                         "Link state change on VF %d failed: %#x\n", vf, status);
1812                 return be_cmd_status(status);
1813         }
1814
1815         adapter->vf_cfg[vf].plink_tracking = link_state;
1816
1817         return 0;
1818 }
1819
1820 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
1821 {
1822         struct be_adapter *adapter = netdev_priv(netdev);
1823         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1824         u8 spoofchk;
1825         int status;
1826
1827         if (!sriov_enabled(adapter))
1828                 return -EPERM;
1829
1830         if (vf >= adapter->num_vfs)
1831                 return -EINVAL;
1832
1833         if (BEx_chip(adapter))
1834                 return -EOPNOTSUPP;
1835
1836         if (enable == vf_cfg->spoofchk)
1837                 return 0;
1838
1839         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
1840
1841         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
1842                                        0, spoofchk);
1843         if (status) {
1844                 dev_err(&adapter->pdev->dev,
1845                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
1846                 return be_cmd_status(status);
1847         }
1848
1849         vf_cfg->spoofchk = enable;
1850         return 0;
1851 }
1852
1853 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
1854                           ulong now)
1855 {
1856         aic->rx_pkts_prev = rx_pkts;
1857         aic->tx_reqs_prev = tx_pkts;
1858         aic->jiffies = now;
1859 }
1860
1861 static int be_get_new_eqd(struct be_eq_obj *eqo)
1862 {
1863         struct be_adapter *adapter = eqo->adapter;
1864         int eqd, start;
1865         struct be_aic_obj *aic;
1866         struct be_rx_obj *rxo;
1867         struct be_tx_obj *txo;
1868         u64 rx_pkts = 0, tx_pkts = 0;
1869         ulong now;
1870         u32 pps, delta;
1871         int i;
1872
1873         aic = &adapter->aic_obj[eqo->idx];
1874         if (!aic->enable) {
1875                 if (aic->jiffies)
1876                         aic->jiffies = 0;
1877                 eqd = aic->et_eqd;
1878                 return eqd;
1879         }
1880
1881         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
1882                 do {
1883                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
1884                         rx_pkts += rxo->stats.rx_pkts;
1885                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
1886         }
1887
1888         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
1889                 do {
1890                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
1891                         tx_pkts += txo->stats.tx_reqs;
1892                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
1893         }
1894
1895         /* Skip, if wrapped around or first calculation */
1896         now = jiffies;
1897         if (!aic->jiffies || time_before(now, aic->jiffies) ||
1898             rx_pkts < aic->rx_pkts_prev ||
1899             tx_pkts < aic->tx_reqs_prev) {
1900                 be_aic_update(aic, rx_pkts, tx_pkts, now);
1901                 return aic->prev_eqd;
1902         }
1903
1904         delta = jiffies_to_msecs(now - aic->jiffies);
1905         if (delta == 0)
1906                 return aic->prev_eqd;
1907
1908         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
1909                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
1910         eqd = (pps / 15000) << 2;
1911
1912         if (eqd < 8)
1913                 eqd = 0;
1914         eqd = min_t(u32, eqd, aic->max_eqd);
1915         eqd = max_t(u32, eqd, aic->min_eqd);
1916
1917         be_aic_update(aic, rx_pkts, tx_pkts, now);
1918
1919         return eqd;
1920 }
1921
1922 /* For Skyhawk-R only */
1923 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
1924 {
1925         struct be_adapter *adapter = eqo->adapter;
1926         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
1927         ulong now = jiffies;
1928         int eqd;
1929         u32 mult_enc;
1930
1931         if (!aic->enable)
1932                 return 0;
1933
1934         if (jiffies_to_msecs(now - aic->jiffies) < 1)
1935                 eqd = aic->prev_eqd;
1936         else
1937                 eqd = be_get_new_eqd(eqo);
1938
1939         if (eqd > 100)
1940                 mult_enc = R2I_DLY_ENC_1;
1941         else if (eqd > 60)
1942                 mult_enc = R2I_DLY_ENC_2;
1943         else if (eqd > 20)
1944                 mult_enc = R2I_DLY_ENC_3;
1945         else
1946                 mult_enc = R2I_DLY_ENC_0;
1947
1948         aic->prev_eqd = eqd;
1949
1950         return mult_enc;
1951 }
1952
1953 void be_eqd_update(struct be_adapter *adapter, bool force_update)
1954 {
1955         struct be_set_eqd set_eqd[MAX_EVT_QS];
1956         struct be_aic_obj *aic;
1957         struct be_eq_obj *eqo;
1958         int i, num = 0, eqd;
1959
1960         for_all_evt_queues(adapter, eqo, i) {
1961                 aic = &adapter->aic_obj[eqo->idx];
1962                 eqd = be_get_new_eqd(eqo);
1963                 if (force_update || eqd != aic->prev_eqd) {
1964                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
1965                         set_eqd[num].eq_id = eqo->q.id;
1966                         aic->prev_eqd = eqd;
1967                         num++;
1968                 }
1969         }
1970
1971         if (num)
1972                 be_cmd_modify_eqd(adapter, set_eqd, num);
1973 }
1974
1975 static void be_rx_stats_update(struct be_rx_obj *rxo,
1976                                struct be_rx_compl_info *rxcp)
1977 {
1978         struct be_rx_stats *stats = rx_stats(rxo);
1979
1980         u64_stats_update_begin(&stats->sync);
1981         stats->rx_compl++;
1982         stats->rx_bytes += rxcp->pkt_size;
1983         stats->rx_pkts++;
1984         if (rxcp->tunneled)
1985                 stats->rx_vxlan_offload_pkts++;
1986         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
1987                 stats->rx_mcast_pkts++;
1988         if (rxcp->err)
1989                 stats->rx_compl_err++;
1990         u64_stats_update_end(&stats->sync);
1991 }
1992
1993 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
1994 {
1995         /* L4 checksum is not reliable for non TCP/UDP packets.
1996          * Also ignore ipcksm for ipv6 pkts
1997          */
1998         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
1999                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2000 }
2001
2002 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2003 {
2004         struct be_adapter *adapter = rxo->adapter;
2005         struct be_rx_page_info *rx_page_info;
2006         struct be_queue_info *rxq = &rxo->q;
2007         u32 frag_idx = rxq->tail;
2008
2009         rx_page_info = &rxo->page_info_tbl[frag_idx];
2010         BUG_ON(!rx_page_info->page);
2011
2012         if (rx_page_info->last_frag) {
2013                 dma_unmap_page(&adapter->pdev->dev,
2014                                dma_unmap_addr(rx_page_info, bus),
2015                                adapter->big_page_size, DMA_FROM_DEVICE);
2016                 rx_page_info->last_frag = false;
2017         } else {
2018                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2019                                         dma_unmap_addr(rx_page_info, bus),
2020                                         rx_frag_size, DMA_FROM_DEVICE);
2021         }
2022
2023         queue_tail_inc(rxq);
2024         atomic_dec(&rxq->used);
2025         return rx_page_info;
2026 }
2027
2028 /* Throwaway the data in the Rx completion */
2029 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2030                                 struct be_rx_compl_info *rxcp)
2031 {
2032         struct be_rx_page_info *page_info;
2033         u16 i, num_rcvd = rxcp->num_rcvd;
2034
2035         for (i = 0; i < num_rcvd; i++) {
2036                 page_info = get_rx_page_info(rxo);
2037                 put_page(page_info->page);
2038                 memset(page_info, 0, sizeof(*page_info));
2039         }
2040 }
2041
2042 /*
2043  * skb_fill_rx_data forms a complete skb for an ether frame
2044  * indicated by rxcp.
2045  */
2046 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2047                              struct be_rx_compl_info *rxcp)
2048 {
2049         struct be_rx_page_info *page_info;
2050         u16 i, j;
2051         u16 hdr_len, curr_frag_len, remaining;
2052         u8 *start;
2053
2054         page_info = get_rx_page_info(rxo);
2055         start = page_address(page_info->page) + page_info->page_offset;
2056         prefetch(start);
2057
2058         /* Copy data in the first descriptor of this completion */
2059         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2060
2061         skb->len = curr_frag_len;
2062         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2063                 memcpy(skb->data, start, curr_frag_len);
2064                 /* Complete packet has now been moved to data */
2065                 put_page(page_info->page);
2066                 skb->data_len = 0;
2067                 skb->tail += curr_frag_len;
2068         } else {
2069                 hdr_len = ETH_HLEN;
2070                 memcpy(skb->data, start, hdr_len);
2071                 skb_shinfo(skb)->nr_frags = 1;
2072                 skb_frag_set_page(skb, 0, page_info->page);
2073                 skb_shinfo(skb)->frags[0].page_offset =
2074                                         page_info->page_offset + hdr_len;
2075                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2076                                   curr_frag_len - hdr_len);
2077                 skb->data_len = curr_frag_len - hdr_len;
2078                 skb->truesize += rx_frag_size;
2079                 skb->tail += hdr_len;
2080         }
2081         page_info->page = NULL;
2082
2083         if (rxcp->pkt_size <= rx_frag_size) {
2084                 BUG_ON(rxcp->num_rcvd != 1);
2085                 return;
2086         }
2087
2088         /* More frags present for this completion */
2089         remaining = rxcp->pkt_size - curr_frag_len;
2090         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2091                 page_info = get_rx_page_info(rxo);
2092                 curr_frag_len = min(remaining, rx_frag_size);
2093
2094                 /* Coalesce all frags from the same physical page in one slot */
2095                 if (page_info->page_offset == 0) {
2096                         /* Fresh page */
2097                         j++;
2098                         skb_frag_set_page(skb, j, page_info->page);
2099                         skb_shinfo(skb)->frags[j].page_offset =
2100                                                         page_info->page_offset;
2101                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2102                         skb_shinfo(skb)->nr_frags++;
2103                 } else {
2104                         put_page(page_info->page);
2105                 }
2106
2107                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2108                 skb->len += curr_frag_len;
2109                 skb->data_len += curr_frag_len;
2110                 skb->truesize += rx_frag_size;
2111                 remaining -= curr_frag_len;
2112                 page_info->page = NULL;
2113         }
2114         BUG_ON(j > MAX_SKB_FRAGS);
2115 }
2116
2117 /* Process the RX completion indicated by rxcp when GRO is disabled */
2118 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2119                                 struct be_rx_compl_info *rxcp)
2120 {
2121         struct be_adapter *adapter = rxo->adapter;
2122         struct net_device *netdev = adapter->netdev;
2123         struct sk_buff *skb;
2124
2125         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2126         if (unlikely(!skb)) {
2127                 rx_stats(rxo)->rx_drops_no_skbs++;
2128                 be_rx_compl_discard(rxo, rxcp);
2129                 return;
2130         }
2131
2132         skb_fill_rx_data(rxo, skb, rxcp);
2133
2134         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2135                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2136         else
2137                 skb_checksum_none_assert(skb);
2138
2139         skb->protocol = eth_type_trans(skb, netdev);
2140         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2141         if (netdev->features & NETIF_F_RXHASH)
2142                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2143
2144         skb->csum_level = rxcp->tunneled;
2145         skb_mark_napi_id(skb, napi);
2146
2147         if (rxcp->vlanf)
2148                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2149
2150         netif_receive_skb(skb);
2151 }
2152
2153 /* Process the RX completion indicated by rxcp when GRO is enabled */
2154 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2155                                     struct napi_struct *napi,
2156                                     struct be_rx_compl_info *rxcp)
2157 {
2158         struct be_adapter *adapter = rxo->adapter;
2159         struct be_rx_page_info *page_info;
2160         struct sk_buff *skb = NULL;
2161         u16 remaining, curr_frag_len;
2162         u16 i, j;
2163
2164         skb = napi_get_frags(napi);
2165         if (!skb) {
2166                 be_rx_compl_discard(rxo, rxcp);
2167                 return;
2168         }
2169
2170         remaining = rxcp->pkt_size;
2171         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2172                 page_info = get_rx_page_info(rxo);
2173
2174                 curr_frag_len = min(remaining, rx_frag_size);
2175
2176                 /* Coalesce all frags from the same physical page in one slot */
2177                 if (i == 0 || page_info->page_offset == 0) {
2178                         /* First frag or Fresh page */
2179                         j++;
2180                         skb_frag_set_page(skb, j, page_info->page);
2181                         skb_shinfo(skb)->frags[j].page_offset =
2182                                                         page_info->page_offset;
2183                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2184                 } else {
2185                         put_page(page_info->page);
2186                 }
2187                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2188                 skb->truesize += rx_frag_size;
2189                 remaining -= curr_frag_len;
2190                 memset(page_info, 0, sizeof(*page_info));
2191         }
2192         BUG_ON(j > MAX_SKB_FRAGS);
2193
2194         skb_shinfo(skb)->nr_frags = j + 1;
2195         skb->len = rxcp->pkt_size;
2196         skb->data_len = rxcp->pkt_size;
2197         skb->ip_summed = CHECKSUM_UNNECESSARY;
2198         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2199         if (adapter->netdev->features & NETIF_F_RXHASH)
2200                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2201
2202         skb->csum_level = rxcp->tunneled;
2203
2204         if (rxcp->vlanf)
2205                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2206
2207         napi_gro_frags(napi);
2208 }
2209
2210 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2211                                  struct be_rx_compl_info *rxcp)
2212 {
2213         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2214         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2215         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2216         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2217         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2218         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2219         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2220         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2221         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2222         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2223         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2224         if (rxcp->vlanf) {
2225                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2226                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2227         }
2228         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2229         rxcp->tunneled =
2230                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2231 }
2232
2233 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2234                                  struct be_rx_compl_info *rxcp)
2235 {
2236         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2237         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2238         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2239         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2240         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2241         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2242         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2243         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2244         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2245         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2246         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2247         if (rxcp->vlanf) {
2248                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2249                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2250         }
2251         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2252         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2253 }
2254
2255 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2256 {
2257         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2258         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2259         struct be_adapter *adapter = rxo->adapter;
2260
2261         /* For checking the valid bit it is Ok to use either definition as the
2262          * valid bit is at the same position in both v0 and v1 Rx compl */
2263         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2264                 return NULL;
2265
2266         rmb();
2267         be_dws_le_to_cpu(compl, sizeof(*compl));
2268
2269         if (adapter->be3_native)
2270                 be_parse_rx_compl_v1(compl, rxcp);
2271         else
2272                 be_parse_rx_compl_v0(compl, rxcp);
2273
2274         if (rxcp->ip_frag)
2275                 rxcp->l4_csum = 0;
2276
2277         if (rxcp->vlanf) {
2278                 /* In QNQ modes, if qnq bit is not set, then the packet was
2279                  * tagged only with the transparent outer vlan-tag and must
2280                  * not be treated as a vlan packet by host
2281                  */
2282                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2283                         rxcp->vlanf = 0;
2284
2285                 if (!lancer_chip(adapter))
2286                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2287
2288                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2289                     !test_bit(rxcp->vlan_tag, adapter->vids))
2290                         rxcp->vlanf = 0;
2291         }
2292
2293         /* As the compl has been parsed, reset it; we wont touch it again */
2294         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2295
2296         queue_tail_inc(&rxo->cq);
2297         return rxcp;
2298 }
2299
2300 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2301 {
2302         u32 order = get_order(size);
2303
2304         if (order > 0)
2305                 gfp |= __GFP_COMP;
2306         return  alloc_pages(gfp, order);
2307 }
2308
2309 /*
2310  * Allocate a page, split it to fragments of size rx_frag_size and post as
2311  * receive buffers to BE
2312  */
2313 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2314 {
2315         struct be_adapter *adapter = rxo->adapter;
2316         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2317         struct be_queue_info *rxq = &rxo->q;
2318         struct page *pagep = NULL;
2319         struct device *dev = &adapter->pdev->dev;
2320         struct be_eth_rx_d *rxd;
2321         u64 page_dmaaddr = 0, frag_dmaaddr;
2322         u32 posted, page_offset = 0, notify = 0;
2323
2324         page_info = &rxo->page_info_tbl[rxq->head];
2325         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2326                 if (!pagep) {
2327                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2328                         if (unlikely(!pagep)) {
2329                                 rx_stats(rxo)->rx_post_fail++;
2330                                 break;
2331                         }
2332                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2333                                                     adapter->big_page_size,
2334                                                     DMA_FROM_DEVICE);
2335                         if (dma_mapping_error(dev, page_dmaaddr)) {
2336                                 put_page(pagep);
2337                                 pagep = NULL;
2338                                 adapter->drv_stats.dma_map_errors++;
2339                                 break;
2340                         }
2341                         page_offset = 0;
2342                 } else {
2343                         get_page(pagep);
2344                         page_offset += rx_frag_size;
2345                 }
2346                 page_info->page_offset = page_offset;
2347                 page_info->page = pagep;
2348
2349                 rxd = queue_head_node(rxq);
2350                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2351                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2352                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2353
2354                 /* Any space left in the current big page for another frag? */
2355                 if ((page_offset + rx_frag_size + rx_frag_size) >
2356                                         adapter->big_page_size) {
2357                         pagep = NULL;
2358                         page_info->last_frag = true;
2359                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2360                 } else {
2361                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2362                 }
2363
2364                 prev_page_info = page_info;
2365                 queue_head_inc(rxq);
2366                 page_info = &rxo->page_info_tbl[rxq->head];
2367         }
2368
2369         /* Mark the last frag of a page when we break out of the above loop
2370          * with no more slots available in the RXQ
2371          */
2372         if (pagep) {
2373                 prev_page_info->last_frag = true;
2374                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2375         }
2376
2377         if (posted) {
2378                 atomic_add(posted, &rxq->used);
2379                 if (rxo->rx_post_starved)
2380                         rxo->rx_post_starved = false;
2381                 do {
2382                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2383                         be_rxq_notify(adapter, rxq->id, notify);
2384                         posted -= notify;
2385                 } while (posted);
2386         } else if (atomic_read(&rxq->used) == 0) {
2387                 /* Let be_worker replenish when memory is available */
2388                 rxo->rx_post_starved = true;
2389         }
2390 }
2391
2392 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2393 {
2394         struct be_queue_info *tx_cq = &txo->cq;
2395         struct be_tx_compl_info *txcp = &txo->txcp;
2396         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2397
2398         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2399                 return NULL;
2400
2401         /* Ensure load ordering of valid bit dword and other dwords below */
2402         rmb();
2403         be_dws_le_to_cpu(compl, sizeof(*compl));
2404
2405         txcp->status = GET_TX_COMPL_BITS(status, compl);
2406         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2407
2408         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2409         queue_tail_inc(tx_cq);
2410         return txcp;
2411 }
2412
2413 static u16 be_tx_compl_process(struct be_adapter *adapter,
2414                                struct be_tx_obj *txo, u16 last_index)
2415 {
2416         struct sk_buff **sent_skbs = txo->sent_skb_list;
2417         struct be_queue_info *txq = &txo->q;
2418         struct sk_buff *skb = NULL;
2419         bool unmap_skb_hdr = false;
2420         struct be_eth_wrb *wrb;
2421         u16 num_wrbs = 0;
2422         u32 frag_index;
2423
2424         do {
2425                 if (sent_skbs[txq->tail]) {
2426                         /* Free skb from prev req */
2427                         if (skb)
2428                                 dev_consume_skb_any(skb);
2429                         skb = sent_skbs[txq->tail];
2430                         sent_skbs[txq->tail] = NULL;
2431                         queue_tail_inc(txq);  /* skip hdr wrb */
2432                         num_wrbs++;
2433                         unmap_skb_hdr = true;
2434                 }
2435                 wrb = queue_tail_node(txq);
2436                 frag_index = txq->tail;
2437                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2438                               (unmap_skb_hdr && skb_headlen(skb)));
2439                 unmap_skb_hdr = false;
2440                 queue_tail_inc(txq);
2441                 num_wrbs++;
2442         } while (frag_index != last_index);
2443         dev_consume_skb_any(skb);
2444
2445         return num_wrbs;
2446 }
2447
2448 /* Return the number of events in the event queue */
2449 static inline int events_get(struct be_eq_obj *eqo)
2450 {
2451         struct be_eq_entry *eqe;
2452         int num = 0;
2453
2454         do {
2455                 eqe = queue_tail_node(&eqo->q);
2456                 if (eqe->evt == 0)
2457                         break;
2458
2459                 rmb();
2460                 eqe->evt = 0;
2461                 num++;
2462                 queue_tail_inc(&eqo->q);
2463         } while (true);
2464
2465         return num;
2466 }
2467
2468 /* Leaves the EQ is disarmed state */
2469 static void be_eq_clean(struct be_eq_obj *eqo)
2470 {
2471         int num = events_get(eqo);
2472
2473         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2474 }
2475
2476 /* Free posted rx buffers that were not used */
2477 static void be_rxq_clean(struct be_rx_obj *rxo)
2478 {
2479         struct be_queue_info *rxq = &rxo->q;
2480         struct be_rx_page_info *page_info;
2481
2482         while (atomic_read(&rxq->used) > 0) {
2483                 page_info = get_rx_page_info(rxo);
2484                 put_page(page_info->page);
2485                 memset(page_info, 0, sizeof(*page_info));
2486         }
2487         BUG_ON(atomic_read(&rxq->used));
2488         rxq->tail = 0;
2489         rxq->head = 0;
2490 }
2491
2492 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2493 {
2494         struct be_queue_info *rx_cq = &rxo->cq;
2495         struct be_rx_compl_info *rxcp;
2496         struct be_adapter *adapter = rxo->adapter;
2497         int flush_wait = 0;
2498
2499         /* Consume pending rx completions.
2500          * Wait for the flush completion (identified by zero num_rcvd)
2501          * to arrive. Notify CQ even when there are no more CQ entries
2502          * for HW to flush partially coalesced CQ entries.
2503          * In Lancer, there is no need to wait for flush compl.
2504          */
2505         for (;;) {
2506                 rxcp = be_rx_compl_get(rxo);
2507                 if (!rxcp) {
2508                         if (lancer_chip(adapter))
2509                                 break;
2510
2511                         if (flush_wait++ > 50 ||
2512                             be_check_error(adapter,
2513                                            BE_ERROR_HW)) {
2514                                 dev_warn(&adapter->pdev->dev,
2515                                          "did not receive flush compl\n");
2516                                 break;
2517                         }
2518                         be_cq_notify(adapter, rx_cq->id, true, 0);
2519                         mdelay(1);
2520                 } else {
2521                         be_rx_compl_discard(rxo, rxcp);
2522                         be_cq_notify(adapter, rx_cq->id, false, 1);
2523                         if (rxcp->num_rcvd == 0)
2524                                 break;
2525                 }
2526         }
2527
2528         /* After cleanup, leave the CQ in unarmed state */
2529         be_cq_notify(adapter, rx_cq->id, false, 0);
2530 }
2531
2532 static void be_tx_compl_clean(struct be_adapter *adapter)
2533 {
2534         struct device *dev = &adapter->pdev->dev;
2535         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2536         struct be_tx_compl_info *txcp;
2537         struct be_queue_info *txq;
2538         u32 end_idx, notified_idx;
2539         struct be_tx_obj *txo;
2540         int i, pending_txqs;
2541
2542         /* Stop polling for compls when HW has been silent for 10ms */
2543         do {
2544                 pending_txqs = adapter->num_tx_qs;
2545
2546                 for_all_tx_queues(adapter, txo, i) {
2547                         cmpl = 0;
2548                         num_wrbs = 0;
2549                         txq = &txo->q;
2550                         while ((txcp = be_tx_compl_get(txo))) {
2551                                 num_wrbs +=
2552                                         be_tx_compl_process(adapter, txo,
2553                                                             txcp->end_index);
2554                                 cmpl++;
2555                         }
2556                         if (cmpl) {
2557                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2558                                 atomic_sub(num_wrbs, &txq->used);
2559                                 timeo = 0;
2560                         }
2561                         if (!be_is_tx_compl_pending(txo))
2562                                 pending_txqs--;
2563                 }
2564
2565                 if (pending_txqs == 0 || ++timeo > 10 ||
2566                     be_check_error(adapter, BE_ERROR_HW))
2567                         break;
2568
2569                 mdelay(1);
2570         } while (true);
2571
2572         /* Free enqueued TX that was never notified to HW */
2573         for_all_tx_queues(adapter, txo, i) {
2574                 txq = &txo->q;
2575
2576                 if (atomic_read(&txq->used)) {
2577                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2578                                  i, atomic_read(&txq->used));
2579                         notified_idx = txq->tail;
2580                         end_idx = txq->tail;
2581                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2582                                   txq->len);
2583                         /* Use the tx-compl process logic to handle requests
2584                          * that were not sent to the HW.
2585                          */
2586                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2587                         atomic_sub(num_wrbs, &txq->used);
2588                         BUG_ON(atomic_read(&txq->used));
2589                         txo->pend_wrb_cnt = 0;
2590                         /* Since hw was never notified of these requests,
2591                          * reset TXQ indices
2592                          */
2593                         txq->head = notified_idx;
2594                         txq->tail = notified_idx;
2595                 }
2596         }
2597 }
2598
2599 static void be_evt_queues_destroy(struct be_adapter *adapter)
2600 {
2601         struct be_eq_obj *eqo;
2602         int i;
2603
2604         for_all_evt_queues(adapter, eqo, i) {
2605                 if (eqo->q.created) {
2606                         be_eq_clean(eqo);
2607                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2608                         napi_hash_del(&eqo->napi);
2609                         netif_napi_del(&eqo->napi);
2610                         free_cpumask_var(eqo->affinity_mask);
2611                 }
2612                 be_queue_free(adapter, &eqo->q);
2613         }
2614 }
2615
2616 static int be_evt_queues_create(struct be_adapter *adapter)
2617 {
2618         struct be_queue_info *eq;
2619         struct be_eq_obj *eqo;
2620         struct be_aic_obj *aic;
2621         int i, rc;
2622
2623         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2624                                     adapter->cfg_num_qs);
2625
2626         for_all_evt_queues(adapter, eqo, i) {
2627                 int numa_node = dev_to_node(&adapter->pdev->dev);
2628
2629                 aic = &adapter->aic_obj[i];
2630                 eqo->adapter = adapter;
2631                 eqo->idx = i;
2632                 aic->max_eqd = BE_MAX_EQD;
2633                 aic->enable = true;
2634
2635                 eq = &eqo->q;
2636                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2637                                     sizeof(struct be_eq_entry));
2638                 if (rc)
2639                         return rc;
2640
2641                 rc = be_cmd_eq_create(adapter, eqo);
2642                 if (rc)
2643                         return rc;
2644
2645                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2646                         return -ENOMEM;
2647                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2648                                 eqo->affinity_mask);
2649                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2650                                BE_NAPI_WEIGHT);
2651         }
2652         return 0;
2653 }
2654
2655 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2656 {
2657         struct be_queue_info *q;
2658
2659         q = &adapter->mcc_obj.q;
2660         if (q->created)
2661                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2662         be_queue_free(adapter, q);
2663
2664         q = &adapter->mcc_obj.cq;
2665         if (q->created)
2666                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2667         be_queue_free(adapter, q);
2668 }
2669
2670 /* Must be called only after TX qs are created as MCC shares TX EQ */
2671 static int be_mcc_queues_create(struct be_adapter *adapter)
2672 {
2673         struct be_queue_info *q, *cq;
2674
2675         cq = &adapter->mcc_obj.cq;
2676         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2677                            sizeof(struct be_mcc_compl)))
2678                 goto err;
2679
2680         /* Use the default EQ for MCC completions */
2681         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2682                 goto mcc_cq_free;
2683
2684         q = &adapter->mcc_obj.q;
2685         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2686                 goto mcc_cq_destroy;
2687
2688         if (be_cmd_mccq_create(adapter, q, cq))
2689                 goto mcc_q_free;
2690
2691         return 0;
2692
2693 mcc_q_free:
2694         be_queue_free(adapter, q);
2695 mcc_cq_destroy:
2696         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2697 mcc_cq_free:
2698         be_queue_free(adapter, cq);
2699 err:
2700         return -1;
2701 }
2702
2703 static void be_tx_queues_destroy(struct be_adapter *adapter)
2704 {
2705         struct be_queue_info *q;
2706         struct be_tx_obj *txo;
2707         u8 i;
2708
2709         for_all_tx_queues(adapter, txo, i) {
2710                 q = &txo->q;
2711                 if (q->created)
2712                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2713                 be_queue_free(adapter, q);
2714
2715                 q = &txo->cq;
2716                 if (q->created)
2717                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2718                 be_queue_free(adapter, q);
2719         }
2720 }
2721
2722 static int be_tx_qs_create(struct be_adapter *adapter)
2723 {
2724         struct be_queue_info *cq;
2725         struct be_tx_obj *txo;
2726         struct be_eq_obj *eqo;
2727         int status, i;
2728
2729         adapter->num_tx_qs = min(adapter->num_evt_qs, be_max_txqs(adapter));
2730
2731         for_all_tx_queues(adapter, txo, i) {
2732                 cq = &txo->cq;
2733                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2734                                         sizeof(struct be_eth_tx_compl));
2735                 if (status)
2736                         return status;
2737
2738                 u64_stats_init(&txo->stats.sync);
2739                 u64_stats_init(&txo->stats.sync_compl);
2740
2741                 /* If num_evt_qs is less than num_tx_qs, then more than
2742                  * one txq share an eq
2743                  */
2744                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2745                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2746                 if (status)
2747                         return status;
2748
2749                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2750                                         sizeof(struct be_eth_wrb));
2751                 if (status)
2752                         return status;
2753
2754                 status = be_cmd_txq_create(adapter, txo);
2755                 if (status)
2756                         return status;
2757
2758                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2759                                     eqo->idx);
2760         }
2761
2762         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2763                  adapter->num_tx_qs);
2764         return 0;
2765 }
2766
2767 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2768 {
2769         struct be_queue_info *q;
2770         struct be_rx_obj *rxo;
2771         int i;
2772
2773         for_all_rx_queues(adapter, rxo, i) {
2774                 q = &rxo->cq;
2775                 if (q->created)
2776                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2777                 be_queue_free(adapter, q);
2778         }
2779 }
2780
2781 static int be_rx_cqs_create(struct be_adapter *adapter)
2782 {
2783         struct be_queue_info *eq, *cq;
2784         struct be_rx_obj *rxo;
2785         int rc, i;
2786
2787         /* We can create as many RSS rings as there are EQs. */
2788         adapter->num_rss_qs = adapter->num_evt_qs;
2789
2790         /* We'll use RSS only if atleast 2 RSS rings are supported. */
2791         if (adapter->num_rss_qs <= 1)
2792                 adapter->num_rss_qs = 0;
2793
2794         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2795
2796         /* When the interface is not capable of RSS rings (and there is no
2797          * need to create a default RXQ) we'll still need one RXQ
2798          */
2799         if (adapter->num_rx_qs == 0)
2800                 adapter->num_rx_qs = 1;
2801
2802         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2803         for_all_rx_queues(adapter, rxo, i) {
2804                 rxo->adapter = adapter;
2805                 cq = &rxo->cq;
2806                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
2807                                     sizeof(struct be_eth_rx_compl));
2808                 if (rc)
2809                         return rc;
2810
2811                 u64_stats_init(&rxo->stats.sync);
2812                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
2813                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
2814                 if (rc)
2815                         return rc;
2816         }
2817
2818         dev_info(&adapter->pdev->dev,
2819                  "created %d RX queue(s)\n", adapter->num_rx_qs);
2820         return 0;
2821 }
2822
2823 static irqreturn_t be_intx(int irq, void *dev)
2824 {
2825         struct be_eq_obj *eqo = dev;
2826         struct be_adapter *adapter = eqo->adapter;
2827         int num_evts = 0;
2828
2829         /* IRQ is not expected when NAPI is scheduled as the EQ
2830          * will not be armed.
2831          * But, this can happen on Lancer INTx where it takes
2832          * a while to de-assert INTx or in BE2 where occasionaly
2833          * an interrupt may be raised even when EQ is unarmed.
2834          * If NAPI is already scheduled, then counting & notifying
2835          * events will orphan them.
2836          */
2837         if (napi_schedule_prep(&eqo->napi)) {
2838                 num_evts = events_get(eqo);
2839                 __napi_schedule(&eqo->napi);
2840                 if (num_evts)
2841                         eqo->spurious_intr = 0;
2842         }
2843         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
2844
2845         /* Return IRQ_HANDLED only for the the first spurious intr
2846          * after a valid intr to stop the kernel from branding
2847          * this irq as a bad one!
2848          */
2849         if (num_evts || eqo->spurious_intr++ == 0)
2850                 return IRQ_HANDLED;
2851         else
2852                 return IRQ_NONE;
2853 }
2854
2855 static irqreturn_t be_msix(int irq, void *dev)
2856 {
2857         struct be_eq_obj *eqo = dev;
2858
2859         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
2860         napi_schedule(&eqo->napi);
2861         return IRQ_HANDLED;
2862 }
2863
2864 static inline bool do_gro(struct be_rx_compl_info *rxcp)
2865 {
2866         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
2867 }
2868
2869 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
2870                          int budget, int polling)
2871 {
2872         struct be_adapter *adapter = rxo->adapter;
2873         struct be_queue_info *rx_cq = &rxo->cq;
2874         struct be_rx_compl_info *rxcp;
2875         u32 work_done;
2876         u32 frags_consumed = 0;
2877
2878         for (work_done = 0; work_done < budget; work_done++) {
2879                 rxcp = be_rx_compl_get(rxo);
2880                 if (!rxcp)
2881                         break;
2882
2883                 /* Is it a flush compl that has no data */
2884                 if (unlikely(rxcp->num_rcvd == 0))
2885                         goto loop_continue;
2886
2887                 /* Discard compl with partial DMA Lancer B0 */
2888                 if (unlikely(!rxcp->pkt_size)) {
2889                         be_rx_compl_discard(rxo, rxcp);
2890                         goto loop_continue;
2891                 }
2892
2893                 /* On BE drop pkts that arrive due to imperfect filtering in
2894                  * promiscuous mode on some skews
2895                  */
2896                 if (unlikely(rxcp->port != adapter->port_num &&
2897                              !lancer_chip(adapter))) {
2898                         be_rx_compl_discard(rxo, rxcp);
2899                         goto loop_continue;
2900                 }
2901
2902                 /* Don't do gro when we're busy_polling */
2903                 if (do_gro(rxcp) && polling != BUSY_POLLING)
2904                         be_rx_compl_process_gro(rxo, napi, rxcp);
2905                 else
2906                         be_rx_compl_process(rxo, napi, rxcp);
2907
2908 loop_continue:
2909                 frags_consumed += rxcp->num_rcvd;
2910                 be_rx_stats_update(rxo, rxcp);
2911         }
2912
2913         if (work_done) {
2914                 be_cq_notify(adapter, rx_cq->id, true, work_done);
2915
2916                 /* When an rx-obj gets into post_starved state, just
2917                  * let be_worker do the posting.
2918                  */
2919                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
2920                     !rxo->rx_post_starved)
2921                         be_post_rx_frags(rxo, GFP_ATOMIC,
2922                                          max_t(u32, MAX_RX_POST,
2923                                                frags_consumed));
2924         }
2925
2926         return work_done;
2927 }
2928
2929 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2930 {
2931         switch (status) {
2932         case BE_TX_COMP_HDR_PARSE_ERR:
2933                 tx_stats(txo)->tx_hdr_parse_err++;
2934                 break;
2935         case BE_TX_COMP_NDMA_ERR:
2936                 tx_stats(txo)->tx_dma_err++;
2937                 break;
2938         case BE_TX_COMP_ACL_ERR:
2939                 tx_stats(txo)->tx_spoof_check_err++;
2940                 break;
2941         }
2942 }
2943
2944 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2945 {
2946         switch (status) {
2947         case LANCER_TX_COMP_LSO_ERR:
2948                 tx_stats(txo)->tx_tso_err++;
2949                 break;
2950         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2951         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2952                 tx_stats(txo)->tx_spoof_check_err++;
2953                 break;
2954         case LANCER_TX_COMP_QINQ_ERR:
2955                 tx_stats(txo)->tx_qinq_err++;
2956                 break;
2957         case LANCER_TX_COMP_PARITY_ERR:
2958                 tx_stats(txo)->tx_internal_parity_err++;
2959                 break;
2960         case LANCER_TX_COMP_DMA_ERR:
2961                 tx_stats(txo)->tx_dma_err++;
2962                 break;
2963         }
2964 }
2965
2966 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
2967                           int idx)
2968 {
2969         int num_wrbs = 0, work_done = 0;
2970         struct be_tx_compl_info *txcp;
2971
2972         while ((txcp = be_tx_compl_get(txo))) {
2973                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
2974                 work_done++;
2975
2976                 if (txcp->status) {
2977                         if (lancer_chip(adapter))
2978                                 lancer_update_tx_err(txo, txcp->status);
2979                         else
2980                                 be_update_tx_err(txo, txcp->status);
2981                 }
2982         }
2983
2984         if (work_done) {
2985                 be_cq_notify(adapter, txo->cq.id, true, work_done);
2986                 atomic_sub(num_wrbs, &txo->q.used);
2987
2988                 /* As Tx wrbs have been freed up, wake up netdev queue
2989                  * if it was stopped due to lack of tx wrbs.  */
2990                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
2991                     be_can_txq_wake(txo)) {
2992                         netif_wake_subqueue(adapter->netdev, idx);
2993                 }
2994
2995                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
2996                 tx_stats(txo)->tx_compl += work_done;
2997                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
2998         }
2999 }
3000
3001 #ifdef CONFIG_NET_RX_BUSY_POLL
3002 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3003 {
3004         bool status = true;
3005
3006         spin_lock(&eqo->lock); /* BH is already disabled */
3007         if (eqo->state & BE_EQ_LOCKED) {
3008                 WARN_ON(eqo->state & BE_EQ_NAPI);
3009                 eqo->state |= BE_EQ_NAPI_YIELD;
3010                 status = false;
3011         } else {
3012                 eqo->state = BE_EQ_NAPI;
3013         }
3014         spin_unlock(&eqo->lock);
3015         return status;
3016 }
3017
3018 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3019 {
3020         spin_lock(&eqo->lock); /* BH is already disabled */
3021
3022         WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3023         eqo->state = BE_EQ_IDLE;
3024
3025         spin_unlock(&eqo->lock);
3026 }
3027
3028 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3029 {
3030         bool status = true;
3031
3032         spin_lock_bh(&eqo->lock);
3033         if (eqo->state & BE_EQ_LOCKED) {
3034                 eqo->state |= BE_EQ_POLL_YIELD;
3035                 status = false;
3036         } else {
3037                 eqo->state |= BE_EQ_POLL;
3038         }
3039         spin_unlock_bh(&eqo->lock);
3040         return status;
3041 }
3042
3043 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3044 {
3045         spin_lock_bh(&eqo->lock);
3046
3047         WARN_ON(eqo->state & (BE_EQ_NAPI));
3048         eqo->state = BE_EQ_IDLE;
3049
3050         spin_unlock_bh(&eqo->lock);
3051 }
3052
3053 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3054 {
3055         spin_lock_init(&eqo->lock);
3056         eqo->state = BE_EQ_IDLE;
3057 }
3058
3059 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3060 {
3061         local_bh_disable();
3062
3063         /* It's enough to just acquire napi lock on the eqo to stop
3064          * be_busy_poll() from processing any queueus.
3065          */
3066         while (!be_lock_napi(eqo))
3067                 mdelay(1);
3068
3069         local_bh_enable();
3070 }
3071
3072 #else /* CONFIG_NET_RX_BUSY_POLL */
3073
3074 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3075 {
3076         return true;
3077 }
3078
3079 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3080 {
3081 }
3082
3083 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3084 {
3085         return false;
3086 }
3087
3088 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3089 {
3090 }
3091
3092 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3093 {
3094 }
3095
3096 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3097 {
3098 }
3099 #endif /* CONFIG_NET_RX_BUSY_POLL */
3100
3101 int be_poll(struct napi_struct *napi, int budget)
3102 {
3103         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3104         struct be_adapter *adapter = eqo->adapter;
3105         int max_work = 0, work, i, num_evts;
3106         struct be_rx_obj *rxo;
3107         struct be_tx_obj *txo;
3108         u32 mult_enc = 0;
3109
3110         num_evts = events_get(eqo);
3111
3112         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3113                 be_process_tx(adapter, txo, i);
3114
3115         if (be_lock_napi(eqo)) {
3116                 /* This loop will iterate twice for EQ0 in which
3117                  * completions of the last RXQ (default one) are also processed
3118                  * For other EQs the loop iterates only once
3119                  */
3120                 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3121                         work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3122                         max_work = max(work, max_work);
3123                 }
3124                 be_unlock_napi(eqo);
3125         } else {
3126                 max_work = budget;
3127         }
3128
3129         if (is_mcc_eqo(eqo))
3130                 be_process_mcc(adapter);
3131
3132         if (max_work < budget) {
3133                 napi_complete(napi);
3134
3135                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3136                  * delay via a delay multiplier encoding value
3137                  */
3138                 if (skyhawk_chip(adapter))
3139                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3140
3141                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3142                              mult_enc);
3143         } else {
3144                 /* As we'll continue in polling mode, count and clear events */
3145                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3146         }
3147         return max_work;
3148 }
3149
3150 #ifdef CONFIG_NET_RX_BUSY_POLL
3151 static int be_busy_poll(struct napi_struct *napi)
3152 {
3153         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3154         struct be_adapter *adapter = eqo->adapter;
3155         struct be_rx_obj *rxo;
3156         int i, work = 0;
3157
3158         if (!be_lock_busy_poll(eqo))
3159                 return LL_FLUSH_BUSY;
3160
3161         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3162                 work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3163                 if (work)
3164                         break;
3165         }
3166
3167         be_unlock_busy_poll(eqo);
3168         return work;
3169 }
3170 #endif
3171
3172 void be_detect_error(struct be_adapter *adapter)
3173 {
3174         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3175         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3176         u32 i;
3177         struct device *dev = &adapter->pdev->dev;
3178
3179         if (be_check_error(adapter, BE_ERROR_HW))
3180                 return;
3181
3182         if (lancer_chip(adapter)) {
3183                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3184                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3185                         be_set_error(adapter, BE_ERROR_UE);
3186                         sliport_err1 = ioread32(adapter->db +
3187                                                 SLIPORT_ERROR1_OFFSET);
3188                         sliport_err2 = ioread32(adapter->db +
3189                                                 SLIPORT_ERROR2_OFFSET);
3190                         /* Do not log error messages if its a FW reset */
3191                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3192                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3193                                 dev_info(dev, "Firmware update in progress\n");
3194                         } else {
3195                                 dev_err(dev, "Error detected in the card\n");
3196                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3197                                         sliport_status);
3198                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3199                                         sliport_err1);
3200                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3201                                         sliport_err2);
3202                         }
3203                 }
3204         } else {
3205                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3206                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3207                 ue_lo_mask = ioread32(adapter->pcicfg +
3208                                       PCICFG_UE_STATUS_LOW_MASK);
3209                 ue_hi_mask = ioread32(adapter->pcicfg +
3210                                       PCICFG_UE_STATUS_HI_MASK);
3211
3212                 ue_lo = (ue_lo & ~ue_lo_mask);
3213                 ue_hi = (ue_hi & ~ue_hi_mask);
3214
3215                 /* On certain platforms BE hardware can indicate spurious UEs.
3216                  * Allow HW to stop working completely in case of a real UE.
3217                  * Hence not setting the hw_error for UE detection.
3218                  */
3219
3220                 if (ue_lo || ue_hi) {
3221                         dev_err(dev,
3222                                 "Unrecoverable Error detected in the adapter");
3223                         dev_err(dev, "Please reboot server to recover");
3224                         if (skyhawk_chip(adapter))
3225                                 be_set_error(adapter, BE_ERROR_UE);
3226
3227                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3228                                 if (ue_lo & 1)
3229                                         dev_err(dev, "UE: %s bit set\n",
3230                                                 ue_status_low_desc[i]);
3231                         }
3232                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3233                                 if (ue_hi & 1)
3234                                         dev_err(dev, "UE: %s bit set\n",
3235                                                 ue_status_hi_desc[i]);
3236                         }
3237                 }
3238         }
3239 }
3240
3241 static void be_msix_disable(struct be_adapter *adapter)
3242 {
3243         if (msix_enabled(adapter)) {
3244                 pci_disable_msix(adapter->pdev);
3245                 adapter->num_msix_vec = 0;
3246                 adapter->num_msix_roce_vec = 0;
3247         }
3248 }
3249
3250 static int be_msix_enable(struct be_adapter *adapter)
3251 {
3252         int i, num_vec;
3253         struct device *dev = &adapter->pdev->dev;
3254
3255         /* If RoCE is supported, program the max number of NIC vectors that
3256          * may be configured via set-channels, along with vectors needed for
3257          * RoCe. Else, just program the number we'll use initially.
3258          */
3259         if (be_roce_supported(adapter))
3260                 num_vec = min_t(int, 2 * be_max_eqs(adapter),
3261                                 2 * num_online_cpus());
3262         else
3263                 num_vec = adapter->cfg_num_qs;
3264
3265         for (i = 0; i < num_vec; i++)
3266                 adapter->msix_entries[i].entry = i;
3267
3268         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3269                                         MIN_MSIX_VECTORS, num_vec);
3270         if (num_vec < 0)
3271                 goto fail;
3272
3273         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3274                 adapter->num_msix_roce_vec = num_vec / 2;
3275                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3276                          adapter->num_msix_roce_vec);
3277         }
3278
3279         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3280
3281         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3282                  adapter->num_msix_vec);
3283         return 0;
3284
3285 fail:
3286         dev_warn(dev, "MSIx enable failed\n");
3287
3288         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3289         if (be_virtfn(adapter))
3290                 return num_vec;
3291         return 0;
3292 }
3293
3294 static inline int be_msix_vec_get(struct be_adapter *adapter,
3295                                   struct be_eq_obj *eqo)
3296 {
3297         return adapter->msix_entries[eqo->msix_idx].vector;
3298 }
3299
3300 static int be_msix_register(struct be_adapter *adapter)
3301 {
3302         struct net_device *netdev = adapter->netdev;
3303         struct be_eq_obj *eqo;
3304         int status, i, vec;
3305
3306         for_all_evt_queues(adapter, eqo, i) {
3307                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3308                 vec = be_msix_vec_get(adapter, eqo);
3309                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3310                 if (status)
3311                         goto err_msix;
3312
3313                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3314         }
3315
3316         return 0;
3317 err_msix:
3318         for (i--; i >= 0; i--) {
3319                 eqo = &adapter->eq_obj[i];
3320                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3321         }
3322         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3323                  status);
3324         be_msix_disable(adapter);
3325         return status;
3326 }
3327
3328 static int be_irq_register(struct be_adapter *adapter)
3329 {
3330         struct net_device *netdev = adapter->netdev;
3331         int status;
3332
3333         if (msix_enabled(adapter)) {
3334                 status = be_msix_register(adapter);
3335                 if (status == 0)
3336                         goto done;
3337                 /* INTx is not supported for VF */
3338                 if (be_virtfn(adapter))
3339                         return status;
3340         }
3341
3342         /* INTx: only the first EQ is used */
3343         netdev->irq = adapter->pdev->irq;
3344         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3345                              &adapter->eq_obj[0]);
3346         if (status) {
3347                 dev_err(&adapter->pdev->dev,
3348                         "INTx request IRQ failed - err %d\n", status);
3349                 return status;
3350         }
3351 done:
3352         adapter->isr_registered = true;
3353         return 0;
3354 }
3355
3356 static void be_irq_unregister(struct be_adapter *adapter)
3357 {
3358         struct net_device *netdev = adapter->netdev;
3359         struct be_eq_obj *eqo;
3360         int i, vec;
3361
3362         if (!adapter->isr_registered)
3363                 return;
3364
3365         /* INTx */
3366         if (!msix_enabled(adapter)) {
3367                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3368                 goto done;
3369         }
3370
3371         /* MSIx */
3372         for_all_evt_queues(adapter, eqo, i) {
3373                 vec = be_msix_vec_get(adapter, eqo);
3374                 irq_set_affinity_hint(vec, NULL);
3375                 free_irq(vec, eqo);
3376         }
3377
3378 done:
3379         adapter->isr_registered = false;
3380 }
3381
3382 static void be_rx_qs_destroy(struct be_adapter *adapter)
3383 {
3384         struct rss_info *rss = &adapter->rss_info;
3385         struct be_queue_info *q;
3386         struct be_rx_obj *rxo;
3387         int i;
3388
3389         for_all_rx_queues(adapter, rxo, i) {
3390                 q = &rxo->q;
3391                 if (q->created) {
3392                         /* If RXQs are destroyed while in an "out of buffer"
3393                          * state, there is a possibility of an HW stall on
3394                          * Lancer. So, post 64 buffers to each queue to relieve
3395                          * the "out of buffer" condition.
3396                          * Make sure there's space in the RXQ before posting.
3397                          */
3398                         if (lancer_chip(adapter)) {
3399                                 be_rx_cq_clean(rxo);
3400                                 if (atomic_read(&q->used) == 0)
3401                                         be_post_rx_frags(rxo, GFP_KERNEL,
3402                                                          MAX_RX_POST);
3403                         }
3404
3405                         be_cmd_rxq_destroy(adapter, q);
3406                         be_rx_cq_clean(rxo);
3407                         be_rxq_clean(rxo);
3408                 }
3409                 be_queue_free(adapter, q);
3410         }
3411
3412         if (rss->rss_flags) {
3413                 rss->rss_flags = RSS_ENABLE_NONE;
3414                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3415                                   128, rss->rss_hkey);
3416         }
3417 }
3418
3419 static void be_disable_if_filters(struct be_adapter *adapter)
3420 {
3421         be_cmd_pmac_del(adapter, adapter->if_handle,
3422                         adapter->pmac_id[0], 0);
3423
3424         be_clear_uc_list(adapter);
3425
3426         /* The IFACE flags are enabled in the open path and cleared
3427          * in the close path. When a VF gets detached from the host and
3428          * assigned to a VM the following happens:
3429          *      - VF's IFACE flags get cleared in the detach path
3430          *      - IFACE create is issued by the VF in the attach path
3431          * Due to a bug in the BE3/Skyhawk-R FW
3432          * (Lancer FW doesn't have the bug), the IFACE capability flags
3433          * specified along with the IFACE create cmd issued by a VF are not
3434          * honoured by FW.  As a consequence, if a *new* driver
3435          * (that enables/disables IFACE flags in open/close)
3436          * is loaded in the host and an *old* driver is * used by a VM/VF,
3437          * the IFACE gets created *without* the needed flags.
3438          * To avoid this, disable RX-filter flags only for Lancer.
3439          */
3440         if (lancer_chip(adapter)) {
3441                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3442                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3443         }
3444 }
3445
3446 static int be_close(struct net_device *netdev)
3447 {
3448         struct be_adapter *adapter = netdev_priv(netdev);
3449         struct be_eq_obj *eqo;
3450         int i;
3451
3452         /* This protection is needed as be_close() may be called even when the
3453          * adapter is in cleared state (after eeh perm failure)
3454          */
3455         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3456                 return 0;
3457
3458         be_disable_if_filters(adapter);
3459
3460         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3461                 for_all_evt_queues(adapter, eqo, i) {
3462                         napi_disable(&eqo->napi);
3463                         be_disable_busy_poll(eqo);
3464                 }
3465                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3466         }
3467
3468         be_async_mcc_disable(adapter);
3469
3470         /* Wait for all pending tx completions to arrive so that
3471          * all tx skbs are freed.
3472          */
3473         netif_tx_disable(netdev);
3474         be_tx_compl_clean(adapter);
3475
3476         be_rx_qs_destroy(adapter);
3477
3478         for_all_evt_queues(adapter, eqo, i) {
3479                 if (msix_enabled(adapter))
3480                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3481                 else
3482                         synchronize_irq(netdev->irq);
3483                 be_eq_clean(eqo);
3484         }
3485
3486         be_irq_unregister(adapter);
3487
3488         return 0;
3489 }
3490
3491 static int be_rx_qs_create(struct be_adapter *adapter)
3492 {
3493         struct rss_info *rss = &adapter->rss_info;
3494         u8 rss_key[RSS_HASH_KEY_LEN];
3495         struct be_rx_obj *rxo;
3496         int rc, i, j;
3497
3498         for_all_rx_queues(adapter, rxo, i) {
3499                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3500                                     sizeof(struct be_eth_rx_d));
3501                 if (rc)
3502                         return rc;
3503         }
3504
3505         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3506                 rxo = default_rxo(adapter);
3507                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3508                                        rx_frag_size, adapter->if_handle,
3509                                        false, &rxo->rss_id);
3510                 if (rc)
3511                         return rc;
3512         }
3513
3514         for_all_rss_queues(adapter, rxo, i) {
3515                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3516                                        rx_frag_size, adapter->if_handle,
3517                                        true, &rxo->rss_id);
3518                 if (rc)
3519                         return rc;
3520         }
3521
3522         if (be_multi_rxq(adapter)) {
3523                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3524                         for_all_rss_queues(adapter, rxo, i) {
3525                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3526                                         break;
3527                                 rss->rsstable[j + i] = rxo->rss_id;
3528                                 rss->rss_queue[j + i] = i;
3529                         }
3530                 }
3531                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3532                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3533
3534                 if (!BEx_chip(adapter))
3535                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3536                                 RSS_ENABLE_UDP_IPV6;
3537
3538                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3539                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3540                                        RSS_INDIR_TABLE_LEN, rss_key);
3541                 if (rc) {
3542                         rss->rss_flags = RSS_ENABLE_NONE;
3543                         return rc;
3544                 }
3545
3546                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3547         } else {
3548                 /* Disable RSS, if only default RX Q is created */
3549                 rss->rss_flags = RSS_ENABLE_NONE;
3550         }
3551
3552
3553         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3554          * which is a queue empty condition
3555          */
3556         for_all_rx_queues(adapter, rxo, i)
3557                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3558
3559         return 0;
3560 }
3561
3562 static int be_enable_if_filters(struct be_adapter *adapter)
3563 {
3564         int status;
3565
3566         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3567         if (status)
3568                 return status;
3569
3570         /* For BE3 VFs, the PF programs the initial MAC address */
3571         if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
3572                 status = be_cmd_pmac_add(adapter, adapter->netdev->dev_addr,
3573                                          adapter->if_handle,
3574                                          &adapter->pmac_id[0], 0);
3575                 if (status)
3576                         return status;
3577         }
3578
3579         if (adapter->vlans_added)
3580                 be_vid_config(adapter);
3581
3582         be_set_rx_mode(adapter->netdev);
3583
3584         return 0;
3585 }
3586
3587 static int be_open(struct net_device *netdev)
3588 {
3589         struct be_adapter *adapter = netdev_priv(netdev);
3590         struct be_eq_obj *eqo;
3591         struct be_rx_obj *rxo;
3592         struct be_tx_obj *txo;
3593         u8 link_status;
3594         int status, i;
3595
3596         status = be_rx_qs_create(adapter);
3597         if (status)
3598                 goto err;
3599
3600         status = be_enable_if_filters(adapter);
3601         if (status)
3602                 goto err;
3603
3604         status = be_irq_register(adapter);
3605         if (status)
3606                 goto err;
3607
3608         for_all_rx_queues(adapter, rxo, i)
3609                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3610
3611         for_all_tx_queues(adapter, txo, i)
3612                 be_cq_notify(adapter, txo->cq.id, true, 0);
3613
3614         be_async_mcc_enable(adapter);
3615
3616         for_all_evt_queues(adapter, eqo, i) {
3617                 napi_enable(&eqo->napi);
3618                 be_enable_busy_poll(eqo);
3619                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3620         }
3621         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3622
3623         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3624         if (!status)
3625                 be_link_status_update(adapter, link_status);
3626
3627         netif_tx_start_all_queues(netdev);
3628         if (skyhawk_chip(adapter))
3629                 udp_tunnel_get_rx_info(netdev);
3630
3631         return 0;
3632 err:
3633         be_close(adapter->netdev);
3634         return -EIO;
3635 }
3636
3637 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3638 {
3639         u32 addr;
3640
3641         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3642
3643         mac[5] = (u8)(addr & 0xFF);
3644         mac[4] = (u8)((addr >> 8) & 0xFF);
3645         mac[3] = (u8)((addr >> 16) & 0xFF);
3646         /* Use the OUI from the current MAC address */
3647         memcpy(mac, adapter->netdev->dev_addr, 3);
3648 }
3649
3650 /*
3651  * Generate a seed MAC address from the PF MAC Address using jhash.
3652  * MAC Address for VFs are assigned incrementally starting from the seed.
3653  * These addresses are programmed in the ASIC by the PF and the VF driver
3654  * queries for the MAC address during its probe.
3655  */
3656 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3657 {
3658         u32 vf;
3659         int status = 0;
3660         u8 mac[ETH_ALEN];
3661         struct be_vf_cfg *vf_cfg;
3662
3663         be_vf_eth_addr_generate(adapter, mac);
3664
3665         for_all_vfs(adapter, vf_cfg, vf) {
3666                 if (BEx_chip(adapter))
3667                         status = be_cmd_pmac_add(adapter, mac,
3668                                                  vf_cfg->if_handle,
3669                                                  &vf_cfg->pmac_id, vf + 1);
3670                 else
3671                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3672                                                 vf + 1);
3673
3674                 if (status)
3675                         dev_err(&adapter->pdev->dev,
3676                                 "Mac address assignment failed for VF %d\n",
3677                                 vf);
3678                 else
3679                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3680
3681                 mac[5] += 1;
3682         }
3683         return status;
3684 }
3685
3686 static int be_vfs_mac_query(struct be_adapter *adapter)
3687 {
3688         int status, vf;
3689         u8 mac[ETH_ALEN];
3690         struct be_vf_cfg *vf_cfg;
3691
3692         for_all_vfs(adapter, vf_cfg, vf) {
3693                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3694                                                mac, vf_cfg->if_handle,
3695                                                false, vf+1);
3696                 if (status)
3697                         return status;
3698                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3699         }
3700         return 0;
3701 }
3702
3703 static void be_vf_clear(struct be_adapter *adapter)
3704 {
3705         struct be_vf_cfg *vf_cfg;
3706         u32 vf;
3707
3708         if (pci_vfs_assigned(adapter->pdev)) {
3709                 dev_warn(&adapter->pdev->dev,
3710                          "VFs are assigned to VMs: not disabling VFs\n");
3711                 goto done;
3712         }
3713
3714         pci_disable_sriov(adapter->pdev);
3715
3716         for_all_vfs(adapter, vf_cfg, vf) {
3717                 if (BEx_chip(adapter))
3718                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3719                                         vf_cfg->pmac_id, vf + 1);
3720                 else
3721                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3722                                        vf + 1);
3723
3724                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3725         }
3726 done:
3727         kfree(adapter->vf_cfg);
3728         adapter->num_vfs = 0;
3729         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3730 }
3731
3732 static void be_clear_queues(struct be_adapter *adapter)
3733 {
3734         be_mcc_queues_destroy(adapter);
3735         be_rx_cqs_destroy(adapter);
3736         be_tx_queues_destroy(adapter);
3737         be_evt_queues_destroy(adapter);
3738 }
3739
3740 static void be_cancel_worker(struct be_adapter *adapter)
3741 {
3742         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3743                 cancel_delayed_work_sync(&adapter->work);
3744                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3745         }
3746 }
3747
3748 static void be_cancel_err_detection(struct be_adapter *adapter)
3749 {
3750         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3751                 cancel_delayed_work_sync(&adapter->be_err_detection_work);
3752                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3753         }
3754 }
3755
3756 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3757 {
3758         struct net_device *netdev = adapter->netdev;
3759
3760         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3761                 be_cmd_manage_iface(adapter, adapter->if_handle,
3762                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3763
3764         if (adapter->vxlan_port)
3765                 be_cmd_set_vxlan_port(adapter, 0);
3766
3767         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3768         adapter->vxlan_port = 0;
3769
3770         netdev->hw_enc_features = 0;
3771         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3772         netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3773 }
3774
3775 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3776                                 struct be_resources *vft_res)
3777 {
3778         struct be_resources res = adapter->pool_res;
3779         u32 vf_if_cap_flags = res.vf_if_cap_flags;
3780         struct be_resources res_mod = {0};
3781         u16 num_vf_qs = 1;
3782
3783         /* Distribute the queue resources among the PF and it's VFs */
3784         if (num_vfs) {
3785                 /* Divide the rx queues evenly among the VFs and the PF, capped
3786                  * at VF-EQ-count. Any remainder queues belong to the PF.
3787                  */
3788                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3789                                 res.max_rss_qs / (num_vfs + 1));
3790
3791                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
3792                  * RSS Tables per port. Provide RSS on VFs, only if number of
3793                  * VFs requested is less than it's PF Pool's RSS Tables limit.
3794                  */
3795                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
3796                         num_vf_qs = 1;
3797         }
3798
3799         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
3800          * which are modifiable using SET_PROFILE_CONFIG cmd.
3801          */
3802         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
3803                                   RESOURCE_MODIFIABLE, 0);
3804
3805         /* If RSS IFACE capability flags are modifiable for a VF, set the
3806          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
3807          * more than 1 RSSQ is available for a VF.
3808          * Otherwise, provision only 1 queue pair for VF.
3809          */
3810         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
3811                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3812                 if (num_vf_qs > 1) {
3813                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
3814                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
3815                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
3816                 } else {
3817                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
3818                                              BE_IF_FLAGS_DEFQ_RSS);
3819                 }
3820         } else {
3821                 num_vf_qs = 1;
3822         }
3823
3824         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
3825                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3826                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
3827         }
3828
3829         vft_res->vf_if_cap_flags = vf_if_cap_flags;
3830         vft_res->max_rx_qs = num_vf_qs;
3831         vft_res->max_rss_qs = num_vf_qs;
3832         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
3833         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
3834
3835         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
3836          * among the PF and it's VFs, if the fields are changeable
3837          */
3838         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
3839                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
3840
3841         if (res_mod.max_vlans == FIELD_MODIFIABLE)
3842                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
3843
3844         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
3845                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
3846
3847         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
3848                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
3849 }
3850
3851 static int be_clear(struct be_adapter *adapter)
3852 {
3853         struct pci_dev *pdev = adapter->pdev;
3854         struct  be_resources vft_res = {0};
3855
3856         be_cancel_worker(adapter);
3857
3858         if (sriov_enabled(adapter))
3859                 be_vf_clear(adapter);
3860
3861         /* Re-configure FW to distribute resources evenly across max-supported
3862          * number of VFs, only when VFs are not already enabled.
3863          */
3864         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
3865             !pci_vfs_assigned(pdev)) {
3866                 be_calculate_vf_res(adapter,
3867                                     pci_sriov_get_totalvfs(pdev),
3868                                     &vft_res);
3869                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
3870                                         pci_sriov_get_totalvfs(pdev),
3871                                         &vft_res);
3872         }
3873
3874         be_disable_vxlan_offloads(adapter);
3875         kfree(adapter->pmac_id);
3876         adapter->pmac_id = NULL;
3877
3878         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
3879
3880         be_clear_queues(adapter);
3881
3882         be_msix_disable(adapter);
3883         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
3884         return 0;
3885 }
3886
3887 static int be_vfs_if_create(struct be_adapter *adapter)
3888 {
3889         struct be_resources res = {0};
3890         u32 cap_flags, en_flags, vf;
3891         struct be_vf_cfg *vf_cfg;
3892         int status;
3893
3894         /* If a FW profile exists, then cap_flags are updated */
3895         cap_flags = BE_VF_IF_EN_FLAGS;
3896
3897         for_all_vfs(adapter, vf_cfg, vf) {
3898                 if (!BE3_chip(adapter)) {
3899                         status = be_cmd_get_profile_config(adapter, &res, NULL,
3900                                                            ACTIVE_PROFILE_TYPE,
3901                                                            RESOURCE_LIMITS,
3902                                                            vf + 1);
3903                         if (!status) {
3904                                 cap_flags = res.if_cap_flags;
3905                                 /* Prevent VFs from enabling VLAN promiscuous
3906                                  * mode
3907                                  */
3908                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
3909                         }
3910                 }
3911
3912                 /* PF should enable IF flags during proxy if_create call */
3913                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
3914                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
3915                                           &vf_cfg->if_handle, vf + 1);
3916                 if (status)
3917                         return status;
3918         }
3919
3920         return 0;
3921 }
3922
3923 static int be_vf_setup_init(struct be_adapter *adapter)
3924 {
3925         struct be_vf_cfg *vf_cfg;
3926         int vf;
3927
3928         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
3929                                   GFP_KERNEL);
3930         if (!adapter->vf_cfg)
3931                 return -ENOMEM;
3932
3933         for_all_vfs(adapter, vf_cfg, vf) {
3934                 vf_cfg->if_handle = -1;
3935                 vf_cfg->pmac_id = -1;
3936         }
3937         return 0;
3938 }
3939
3940 static int be_vf_setup(struct be_adapter *adapter)
3941 {
3942         struct device *dev = &adapter->pdev->dev;
3943         struct be_vf_cfg *vf_cfg;
3944         int status, old_vfs, vf;
3945         bool spoofchk;
3946
3947         old_vfs = pci_num_vf(adapter->pdev);
3948
3949         status = be_vf_setup_init(adapter);
3950         if (status)
3951                 goto err;
3952
3953         if (old_vfs) {
3954                 for_all_vfs(adapter, vf_cfg, vf) {
3955                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
3956                         if (status)
3957                                 goto err;
3958                 }
3959
3960                 status = be_vfs_mac_query(adapter);
3961                 if (status)
3962                         goto err;
3963         } else {
3964                 status = be_vfs_if_create(adapter);
3965                 if (status)
3966                         goto err;
3967
3968                 status = be_vf_eth_addr_config(adapter);
3969                 if (status)
3970                         goto err;
3971         }
3972
3973         for_all_vfs(adapter, vf_cfg, vf) {
3974                 /* Allow VFs to programs MAC/VLAN filters */
3975                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
3976                                                   vf + 1);
3977                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
3978                         status = be_cmd_set_fn_privileges(adapter,
3979                                                           vf_cfg->privileges |
3980                                                           BE_PRIV_FILTMGMT,
3981                                                           vf + 1);
3982                         if (!status) {
3983                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
3984                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
3985                                          vf);
3986                         }
3987                 }
3988
3989                 /* Allow full available bandwidth */
3990                 if (!old_vfs)
3991                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
3992
3993                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
3994                                                vf_cfg->if_handle, NULL,
3995                                                &spoofchk);
3996                 if (!status)
3997                         vf_cfg->spoofchk = spoofchk;
3998
3999                 if (!old_vfs) {
4000                         be_cmd_enable_vf(adapter, vf + 1);
4001                         be_cmd_set_logical_link_config(adapter,
4002                                                        IFLA_VF_LINK_STATE_AUTO,
4003                                                        vf+1);
4004                 }
4005         }
4006
4007         if (!old_vfs) {
4008                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4009                 if (status) {
4010                         dev_err(dev, "SRIOV enable failed\n");
4011                         adapter->num_vfs = 0;
4012                         goto err;
4013                 }
4014         }
4015
4016         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4017         return 0;
4018 err:
4019         dev_err(dev, "VF setup failed\n");
4020         be_vf_clear(adapter);
4021         return status;
4022 }
4023
4024 /* Converting function_mode bits on BE3 to SH mc_type enums */
4025
4026 static u8 be_convert_mc_type(u32 function_mode)
4027 {
4028         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4029                 return vNIC1;
4030         else if (function_mode & QNQ_MODE)
4031                 return FLEX10;
4032         else if (function_mode & VNIC_MODE)
4033                 return vNIC2;
4034         else if (function_mode & UMC_ENABLED)
4035                 return UMC;
4036         else
4037                 return MC_NONE;
4038 }
4039
4040 /* On BE2/BE3 FW does not suggest the supported limits */
4041 static void BEx_get_resources(struct be_adapter *adapter,
4042                               struct be_resources *res)
4043 {
4044         bool use_sriov = adapter->num_vfs ? 1 : 0;
4045
4046         if (be_physfn(adapter))
4047                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4048         else
4049                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4050
4051         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4052
4053         if (be_is_mc(adapter)) {
4054                 /* Assuming that there are 4 channels per port,
4055                  * when multi-channel is enabled
4056                  */
4057                 if (be_is_qnq_mode(adapter))
4058                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4059                 else
4060                         /* In a non-qnq multichannel mode, the pvid
4061                          * takes up one vlan entry
4062                          */
4063                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4064         } else {
4065                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4066         }
4067
4068         res->max_mcast_mac = BE_MAX_MC;
4069
4070         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4071          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4072          *    *only* if it is RSS-capable.
4073          */
4074         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4075             be_virtfn(adapter) ||
4076             (be_is_mc(adapter) &&
4077              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4078                 res->max_tx_qs = 1;
4079         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4080                 struct be_resources super_nic_res = {0};
4081
4082                 /* On a SuperNIC profile, the driver needs to use the
4083                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4084                  */
4085                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4086                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4087                                           0);
4088                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4089                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4090         } else {
4091                 res->max_tx_qs = BE3_MAX_TX_QS;
4092         }
4093
4094         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4095             !use_sriov && be_physfn(adapter))
4096                 res->max_rss_qs = (adapter->be3_native) ?
4097                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4098         res->max_rx_qs = res->max_rss_qs + 1;
4099
4100         if (be_physfn(adapter))
4101                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4102                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4103         else
4104                 res->max_evt_qs = 1;
4105
4106         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4107         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4108         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4109                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4110 }
4111
4112 static void be_setup_init(struct be_adapter *adapter)
4113 {
4114         adapter->vlan_prio_bmap = 0xff;
4115         adapter->phy.link_speed = -1;
4116         adapter->if_handle = -1;
4117         adapter->be3_native = false;
4118         adapter->if_flags = 0;
4119         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4120         if (be_physfn(adapter))
4121                 adapter->cmd_privileges = MAX_PRIVILEGES;
4122         else
4123                 adapter->cmd_privileges = MIN_PRIVILEGES;
4124 }
4125
4126 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4127  * However, this HW limitation is not exposed to the host via any SLI cmd.
4128  * As a result, in the case of SRIOV and in particular multi-partition configs
4129  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4130  * for distribution between the VFs. This self-imposed limit will determine the
4131  * no: of VFs for which RSS can be enabled.
4132  */
4133 void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4134 {
4135         struct be_port_resources port_res = {0};
4136         u8 rss_tables_on_port;
4137         u16 max_vfs = be_max_vfs(adapter);
4138
4139         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4140                                   RESOURCE_LIMITS, 0);
4141
4142         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4143
4144         /* Each PF Pool's RSS Tables limit =
4145          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4146          */
4147         adapter->pool_res.max_rss_tables =
4148                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4149 }
4150
4151 static int be_get_sriov_config(struct be_adapter *adapter)
4152 {
4153         struct be_resources res = {0};
4154         int max_vfs, old_vfs;
4155
4156         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4157                                   RESOURCE_LIMITS, 0);
4158
4159         /* Some old versions of BE3 FW don't report max_vfs value */
4160         if (BE3_chip(adapter) && !res.max_vfs) {
4161                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4162                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4163         }
4164
4165         adapter->pool_res = res;
4166
4167         /* If during previous unload of the driver, the VFs were not disabled,
4168          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4169          * Instead use the TotalVFs value stored in the pci-dev struct.
4170          */
4171         old_vfs = pci_num_vf(adapter->pdev);
4172         if (old_vfs) {
4173                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4174                          old_vfs);
4175
4176                 adapter->pool_res.max_vfs =
4177                         pci_sriov_get_totalvfs(adapter->pdev);
4178                 adapter->num_vfs = old_vfs;
4179         }
4180
4181         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4182                 be_calculate_pf_pool_rss_tables(adapter);
4183                 dev_info(&adapter->pdev->dev,
4184                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4185                          be_max_pf_pool_rss_tables(adapter));
4186         }
4187         return 0;
4188 }
4189
4190 static void be_alloc_sriov_res(struct be_adapter *adapter)
4191 {
4192         int old_vfs = pci_num_vf(adapter->pdev);
4193         struct  be_resources vft_res = {0};
4194         int status;
4195
4196         be_get_sriov_config(adapter);
4197
4198         if (!old_vfs)
4199                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4200
4201         /* When the HW is in SRIOV capable configuration, the PF-pool
4202          * resources are given to PF during driver load, if there are no
4203          * old VFs. This facility is not available in BE3 FW.
4204          * Also, this is done by FW in Lancer chip.
4205          */
4206         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4207                 be_calculate_vf_res(adapter, 0, &vft_res);
4208                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4209                                                  &vft_res);
4210                 if (status)
4211                         dev_err(&adapter->pdev->dev,
4212                                 "Failed to optimize SRIOV resources\n");
4213         }
4214 }
4215
4216 static int be_get_resources(struct be_adapter *adapter)
4217 {
4218         struct device *dev = &adapter->pdev->dev;
4219         struct be_resources res = {0};
4220         int status;
4221
4222         if (BEx_chip(adapter)) {
4223                 BEx_get_resources(adapter, &res);
4224                 adapter->res = res;
4225         }
4226
4227         /* For Lancer, SH etc read per-function resource limits from FW.
4228          * GET_FUNC_CONFIG returns per function guaranteed limits.
4229          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4230          */
4231         if (!BEx_chip(adapter)) {
4232                 status = be_cmd_get_func_config(adapter, &res);
4233                 if (status)
4234                         return status;
4235
4236                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4237                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4238                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4239                         res.max_rss_qs -= 1;
4240
4241                 /* If RoCE may be enabled stash away half the EQs for RoCE */
4242                 if (be_roce_supported(adapter))
4243                         res.max_evt_qs /= 2;
4244                 adapter->res = res;
4245         }
4246
4247         /* If FW supports RSS default queue, then skip creating non-RSS
4248          * queue for non-IP traffic.
4249          */
4250         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4251                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4252
4253         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4254                  be_max_txqs(adapter), be_max_rxqs(adapter),
4255                  be_max_rss(adapter), be_max_eqs(adapter),
4256                  be_max_vfs(adapter));
4257         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4258                  be_max_uc(adapter), be_max_mc(adapter),
4259                  be_max_vlans(adapter));
4260
4261         /* Sanitize cfg_num_qs based on HW and platform limits */
4262         adapter->cfg_num_qs = min_t(u16, netif_get_num_default_rss_queues(),
4263                                     be_max_qs(adapter));
4264         return 0;
4265 }
4266
4267 static int be_get_config(struct be_adapter *adapter)
4268 {
4269         int status, level;
4270         u16 profile_id;
4271
4272         status = be_cmd_get_cntl_attributes(adapter);
4273         if (status)
4274                 return status;
4275
4276         status = be_cmd_query_fw_cfg(adapter);
4277         if (status)
4278                 return status;
4279
4280         if (!lancer_chip(adapter) && be_physfn(adapter))
4281                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4282
4283         if (BEx_chip(adapter)) {
4284                 level = be_cmd_get_fw_log_level(adapter);
4285                 adapter->msg_enable =
4286                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4287         }
4288
4289         be_cmd_get_acpi_wol_cap(adapter);
4290         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4291         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4292
4293         be_cmd_query_port_name(adapter);
4294
4295         if (be_physfn(adapter)) {
4296                 status = be_cmd_get_active_profile(adapter, &profile_id);
4297                 if (!status)
4298                         dev_info(&adapter->pdev->dev,
4299                                  "Using profile 0x%x\n", profile_id);
4300         }
4301
4302         return 0;
4303 }
4304
4305 static int be_mac_setup(struct be_adapter *adapter)
4306 {
4307         u8 mac[ETH_ALEN];
4308         int status;
4309
4310         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4311                 status = be_cmd_get_perm_mac(adapter, mac);
4312                 if (status)
4313                         return status;
4314
4315                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4316                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4317         }
4318
4319         return 0;
4320 }
4321
4322 static void be_schedule_worker(struct be_adapter *adapter)
4323 {
4324         schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
4325         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4326 }
4327
4328 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4329 {
4330         schedule_delayed_work(&adapter->be_err_detection_work,
4331                               msecs_to_jiffies(delay));
4332         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4333 }
4334
4335 static int be_setup_queues(struct be_adapter *adapter)
4336 {
4337         struct net_device *netdev = adapter->netdev;
4338         int status;
4339
4340         status = be_evt_queues_create(adapter);
4341         if (status)
4342                 goto err;
4343
4344         status = be_tx_qs_create(adapter);
4345         if (status)
4346                 goto err;
4347
4348         status = be_rx_cqs_create(adapter);
4349         if (status)
4350                 goto err;
4351
4352         status = be_mcc_queues_create(adapter);
4353         if (status)
4354                 goto err;
4355
4356         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4357         if (status)
4358                 goto err;
4359
4360         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4361         if (status)
4362                 goto err;
4363
4364         return 0;
4365 err:
4366         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4367         return status;
4368 }
4369
4370 static int be_if_create(struct be_adapter *adapter)
4371 {
4372         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4373         u32 cap_flags = be_if_cap_flags(adapter);
4374         int status;
4375
4376         if (adapter->cfg_num_qs == 1)
4377                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4378
4379         en_flags &= cap_flags;
4380         /* will enable all the needed filter flags in be_open() */
4381         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4382                                   &adapter->if_handle, 0);
4383
4384         return status;
4385 }
4386
4387 int be_update_queues(struct be_adapter *adapter)
4388 {
4389         struct net_device *netdev = adapter->netdev;
4390         int status;
4391
4392         if (netif_running(netdev))
4393                 be_close(netdev);
4394
4395         be_cancel_worker(adapter);
4396
4397         /* If any vectors have been shared with RoCE we cannot re-program
4398          * the MSIx table.
4399          */
4400         if (!adapter->num_msix_roce_vec)
4401                 be_msix_disable(adapter);
4402
4403         be_clear_queues(adapter);
4404         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4405         if (status)
4406                 return status;
4407
4408         if (!msix_enabled(adapter)) {
4409                 status = be_msix_enable(adapter);
4410                 if (status)
4411                         return status;
4412         }
4413
4414         status = be_if_create(adapter);
4415         if (status)
4416                 return status;
4417
4418         status = be_setup_queues(adapter);
4419         if (status)
4420                 return status;
4421
4422         be_schedule_worker(adapter);
4423
4424         if (netif_running(netdev))
4425                 status = be_open(netdev);
4426
4427         return status;
4428 }
4429
4430 static inline int fw_major_num(const char *fw_ver)
4431 {
4432         int fw_major = 0, i;
4433
4434         i = sscanf(fw_ver, "%d.", &fw_major);
4435         if (i != 1)
4436                 return 0;
4437
4438         return fw_major;
4439 }
4440
4441 /* If any VFs are already enabled don't FLR the PF */
4442 static bool be_reset_required(struct be_adapter *adapter)
4443 {
4444         return pci_num_vf(adapter->pdev) ? false : true;
4445 }
4446
4447 /* Wait for the FW to be ready and perform the required initialization */
4448 static int be_func_init(struct be_adapter *adapter)
4449 {
4450         int status;
4451
4452         status = be_fw_wait_ready(adapter);
4453         if (status)
4454                 return status;
4455
4456         if (be_reset_required(adapter)) {
4457                 status = be_cmd_reset_function(adapter);
4458                 if (status)
4459                         return status;
4460
4461                 /* Wait for interrupts to quiesce after an FLR */
4462                 msleep(100);
4463
4464                 /* We can clear all errors when function reset succeeds */
4465                 be_clear_error(adapter, BE_CLEAR_ALL);
4466         }
4467
4468         /* Tell FW we're ready to fire cmds */
4469         status = be_cmd_fw_init(adapter);
4470         if (status)
4471                 return status;
4472
4473         /* Allow interrupts for other ULPs running on NIC function */
4474         be_intr_set(adapter, true);
4475
4476         return 0;
4477 }
4478
4479 static int be_setup(struct be_adapter *adapter)
4480 {
4481         struct device *dev = &adapter->pdev->dev;
4482         int status;
4483
4484         status = be_func_init(adapter);
4485         if (status)
4486                 return status;
4487
4488         be_setup_init(adapter);
4489
4490         if (!lancer_chip(adapter))
4491                 be_cmd_req_native_mode(adapter);
4492
4493         /* invoke this cmd first to get pf_num and vf_num which are needed
4494          * for issuing profile related cmds
4495          */
4496         if (!BEx_chip(adapter)) {
4497                 status = be_cmd_get_func_config(adapter, NULL);
4498                 if (status)
4499                         return status;
4500         }
4501
4502         status = be_get_config(adapter);
4503         if (status)
4504                 goto err;
4505
4506         if (!BE2_chip(adapter) && be_physfn(adapter))
4507                 be_alloc_sriov_res(adapter);
4508
4509         status = be_get_resources(adapter);
4510         if (status)
4511                 goto err;
4512
4513         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4514                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4515         if (!adapter->pmac_id)
4516                 return -ENOMEM;
4517
4518         status = be_msix_enable(adapter);
4519         if (status)
4520                 goto err;
4521
4522         /* will enable all the needed filter flags in be_open() */
4523         status = be_if_create(adapter);
4524         if (status)
4525                 goto err;
4526
4527         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4528         rtnl_lock();
4529         status = be_setup_queues(adapter);
4530         rtnl_unlock();
4531         if (status)
4532                 goto err;
4533
4534         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4535
4536         status = be_mac_setup(adapter);
4537         if (status)
4538                 goto err;
4539
4540         be_cmd_get_fw_ver(adapter);
4541         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4542
4543         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4544                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4545                         adapter->fw_ver);
4546                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4547         }
4548
4549         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4550                                          adapter->rx_fc);
4551         if (status)
4552                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4553                                         &adapter->rx_fc);
4554
4555         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4556                  adapter->tx_fc, adapter->rx_fc);
4557
4558         if (be_physfn(adapter))
4559                 be_cmd_set_logical_link_config(adapter,
4560                                                IFLA_VF_LINK_STATE_AUTO, 0);
4561
4562         if (adapter->num_vfs)
4563                 be_vf_setup(adapter);
4564
4565         status = be_cmd_get_phy_info(adapter);
4566         if (!status && be_pause_supported(adapter))
4567                 adapter->phy.fc_autoneg = 1;
4568
4569         be_schedule_worker(adapter);
4570         adapter->flags |= BE_FLAGS_SETUP_DONE;
4571         return 0;
4572 err:
4573         be_clear(adapter);
4574         return status;
4575 }
4576
4577 #ifdef CONFIG_NET_POLL_CONTROLLER
4578 static void be_netpoll(struct net_device *netdev)
4579 {
4580         struct be_adapter *adapter = netdev_priv(netdev);
4581         struct be_eq_obj *eqo;
4582         int i;
4583
4584         for_all_evt_queues(adapter, eqo, i) {
4585                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4586                 napi_schedule(&eqo->napi);
4587         }
4588 }
4589 #endif
4590
4591 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4592 {
4593         const struct firmware *fw;
4594         int status;
4595
4596         if (!netif_running(adapter->netdev)) {
4597                 dev_err(&adapter->pdev->dev,
4598                         "Firmware load not allowed (interface is down)\n");
4599                 return -ENETDOWN;
4600         }
4601
4602         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4603         if (status)
4604                 goto fw_exit;
4605
4606         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4607
4608         if (lancer_chip(adapter))
4609                 status = lancer_fw_download(adapter, fw);
4610         else
4611                 status = be_fw_download(adapter, fw);
4612
4613         if (!status)
4614                 be_cmd_get_fw_ver(adapter);
4615
4616 fw_exit:
4617         release_firmware(fw);
4618         return status;
4619 }
4620
4621 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4622                                  u16 flags)
4623 {
4624         struct be_adapter *adapter = netdev_priv(dev);
4625         struct nlattr *attr, *br_spec;
4626         int rem;
4627         int status = 0;
4628         u16 mode = 0;
4629
4630         if (!sriov_enabled(adapter))
4631                 return -EOPNOTSUPP;
4632
4633         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4634         if (!br_spec)
4635                 return -EINVAL;
4636
4637         nla_for_each_nested(attr, br_spec, rem) {
4638                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4639                         continue;
4640
4641                 if (nla_len(attr) < sizeof(mode))
4642                         return -EINVAL;
4643
4644                 mode = nla_get_u16(attr);
4645                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4646                         return -EOPNOTSUPP;
4647
4648                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4649                         return -EINVAL;
4650
4651                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4652                                                adapter->if_handle,
4653                                                mode == BRIDGE_MODE_VEPA ?
4654                                                PORT_FWD_TYPE_VEPA :
4655                                                PORT_FWD_TYPE_VEB, 0);
4656                 if (status)
4657                         goto err;
4658
4659                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4660                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4661
4662                 return status;
4663         }
4664 err:
4665         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4666                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4667
4668         return status;
4669 }
4670
4671 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4672                                  struct net_device *dev, u32 filter_mask,
4673                                  int nlflags)
4674 {
4675         struct be_adapter *adapter = netdev_priv(dev);
4676         int status = 0;
4677         u8 hsw_mode;
4678
4679         /* BE and Lancer chips support VEB mode only */
4680         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4681                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4682                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4683                         return 0;
4684                 hsw_mode = PORT_FWD_TYPE_VEB;
4685         } else {
4686                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4687                                                adapter->if_handle, &hsw_mode,
4688                                                NULL);
4689                 if (status)
4690                         return 0;
4691
4692                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4693                         return 0;
4694         }
4695
4696         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4697                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
4698                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4699                                        0, 0, nlflags, filter_mask, NULL);
4700 }
4701
4702 /* VxLAN offload Notes:
4703  *
4704  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4705  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4706  * is expected to work across all types of IP tunnels once exported. Skyhawk
4707  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
4708  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
4709  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
4710  * those other tunnels are unexported on the fly through ndo_features_check().
4711  *
4712  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
4713  * adds more than one port, disable offloads and don't re-enable them again
4714  * until after all the tunnels are removed.
4715  */
4716 static void be_add_vxlan_port(struct net_device *netdev,
4717                               struct udp_tunnel_info *ti)
4718 {
4719         struct be_adapter *adapter = netdev_priv(netdev);
4720         struct device *dev = &adapter->pdev->dev;
4721         __be16 port = ti->port;
4722         int status;
4723
4724         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
4725                 return;
4726
4727         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
4728                 return;
4729
4730         if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
4731                 adapter->vxlan_port_aliases++;
4732                 return;
4733         }
4734
4735         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
4736                 dev_info(dev,
4737                          "Only one UDP port supported for VxLAN offloads\n");
4738                 dev_info(dev, "Disabling VxLAN offloads\n");
4739                 adapter->vxlan_port_count++;
4740                 goto err;
4741         }
4742
4743         if (adapter->vxlan_port_count++ >= 1)
4744                 return;
4745
4746         status = be_cmd_manage_iface(adapter, adapter->if_handle,
4747                                      OP_CONVERT_NORMAL_TO_TUNNEL);
4748         if (status) {
4749                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
4750                 goto err;
4751         }
4752
4753         status = be_cmd_set_vxlan_port(adapter, port);
4754         if (status) {
4755                 dev_warn(dev, "Failed to add VxLAN port\n");
4756                 goto err;
4757         }
4758         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
4759         adapter->vxlan_port = port;
4760
4761         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4762                                    NETIF_F_TSO | NETIF_F_TSO6 |
4763                                    NETIF_F_GSO_UDP_TUNNEL;
4764         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
4765         netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
4766
4767         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4768                  be16_to_cpu(port));
4769         return;
4770 err:
4771         be_disable_vxlan_offloads(adapter);
4772 }
4773
4774 static void be_del_vxlan_port(struct net_device *netdev,
4775                               struct udp_tunnel_info *ti)
4776 {
4777         struct be_adapter *adapter = netdev_priv(netdev);
4778         __be16 port = ti->port;
4779
4780         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
4781                 return;
4782
4783         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
4784                 return;
4785
4786         if (adapter->vxlan_port != port)
4787                 goto done;
4788
4789         if (adapter->vxlan_port_aliases) {
4790                 adapter->vxlan_port_aliases--;
4791                 return;
4792         }
4793
4794         be_disable_vxlan_offloads(adapter);
4795
4796         dev_info(&adapter->pdev->dev,
4797                  "Disabled VxLAN offloads for UDP port %d\n",
4798                  be16_to_cpu(port));
4799 done:
4800         adapter->vxlan_port_count--;
4801 }
4802
4803 static netdev_features_t be_features_check(struct sk_buff *skb,
4804                                            struct net_device *dev,
4805                                            netdev_features_t features)
4806 {
4807         struct be_adapter *adapter = netdev_priv(dev);
4808         u8 l4_hdr = 0;
4809
4810         /* The code below restricts offload features for some tunneled packets.
4811          * Offload features for normal (non tunnel) packets are unchanged.
4812          */
4813         if (!skb->encapsulation ||
4814             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
4815                 return features;
4816
4817         /* It's an encapsulated packet and VxLAN offloads are enabled. We
4818          * should disable tunnel offload features if it's not a VxLAN packet,
4819          * as tunnel offloads have been enabled only for VxLAN. This is done to
4820          * allow other tunneled traffic like GRE work fine while VxLAN
4821          * offloads are configured in Skyhawk-R.
4822          */
4823         switch (vlan_get_protocol(skb)) {
4824         case htons(ETH_P_IP):
4825                 l4_hdr = ip_hdr(skb)->protocol;
4826                 break;
4827         case htons(ETH_P_IPV6):
4828                 l4_hdr = ipv6_hdr(skb)->nexthdr;
4829                 break;
4830         default:
4831                 return features;
4832         }
4833
4834         if (l4_hdr != IPPROTO_UDP ||
4835             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
4836             skb->inner_protocol != htons(ETH_P_TEB) ||
4837             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
4838             sizeof(struct udphdr) + sizeof(struct vxlanhdr))
4839                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
4840
4841         return features;
4842 }
4843
4844 static int be_get_phys_port_id(struct net_device *dev,
4845                                struct netdev_phys_item_id *ppid)
4846 {
4847         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
4848         struct be_adapter *adapter = netdev_priv(dev);
4849         u8 *id;
4850
4851         if (MAX_PHYS_ITEM_ID_LEN < id_len)
4852                 return -ENOSPC;
4853
4854         ppid->id[0] = adapter->hba_port_num + 1;
4855         id = &ppid->id[1];
4856         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
4857              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
4858                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
4859
4860         ppid->id_len = id_len;
4861
4862         return 0;
4863 }
4864
4865 static const struct net_device_ops be_netdev_ops = {
4866         .ndo_open               = be_open,
4867         .ndo_stop               = be_close,
4868         .ndo_start_xmit         = be_xmit,
4869         .ndo_set_rx_mode        = be_set_rx_mode,
4870         .ndo_set_mac_address    = be_mac_addr_set,
4871         .ndo_change_mtu         = be_change_mtu,
4872         .ndo_get_stats64        = be_get_stats64,
4873         .ndo_validate_addr      = eth_validate_addr,
4874         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
4875         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
4876         .ndo_set_vf_mac         = be_set_vf_mac,
4877         .ndo_set_vf_vlan        = be_set_vf_vlan,
4878         .ndo_set_vf_rate        = be_set_vf_tx_rate,
4879         .ndo_get_vf_config      = be_get_vf_config,
4880         .ndo_set_vf_link_state  = be_set_vf_link_state,
4881         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
4882 #ifdef CONFIG_NET_POLL_CONTROLLER
4883         .ndo_poll_controller    = be_netpoll,
4884 #endif
4885         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
4886         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
4887 #ifdef CONFIG_NET_RX_BUSY_POLL
4888         .ndo_busy_poll          = be_busy_poll,
4889 #endif
4890         .ndo_udp_tunnel_add     = be_add_vxlan_port,
4891         .ndo_udp_tunnel_del     = be_del_vxlan_port,
4892         .ndo_features_check     = be_features_check,
4893         .ndo_get_phys_port_id   = be_get_phys_port_id,
4894 };
4895
4896 static void be_netdev_init(struct net_device *netdev)
4897 {
4898         struct be_adapter *adapter = netdev_priv(netdev);
4899
4900         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
4901                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
4902                 NETIF_F_HW_VLAN_CTAG_TX;
4903         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
4904                 netdev->hw_features |= NETIF_F_RXHASH;
4905
4906         netdev->features |= netdev->hw_features |
4907                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
4908
4909         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
4910                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
4911
4912         netdev->priv_flags |= IFF_UNICAST_FLT;
4913
4914         netdev->flags |= IFF_MULTICAST;
4915
4916         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
4917
4918         netdev->netdev_ops = &be_netdev_ops;
4919
4920         netdev->ethtool_ops = &be_ethtool_ops;
4921 }
4922
4923 static void be_cleanup(struct be_adapter *adapter)
4924 {
4925         struct net_device *netdev = adapter->netdev;
4926
4927         rtnl_lock();
4928         netif_device_detach(netdev);
4929         if (netif_running(netdev))
4930                 be_close(netdev);
4931         rtnl_unlock();
4932
4933         be_clear(adapter);
4934 }
4935
4936 static int be_resume(struct be_adapter *adapter)
4937 {
4938         struct net_device *netdev = adapter->netdev;
4939         int status;
4940
4941         status = be_setup(adapter);
4942         if (status)
4943                 return status;
4944
4945         rtnl_lock();
4946         if (netif_running(netdev))
4947                 status = be_open(netdev);
4948         rtnl_unlock();
4949
4950         if (status)
4951                 return status;
4952
4953         netif_device_attach(netdev);
4954
4955         return 0;
4956 }
4957
4958 static int be_err_recover(struct be_adapter *adapter)
4959 {
4960         int status;
4961
4962         /* Error recovery is supported only Lancer as of now */
4963         if (!lancer_chip(adapter))
4964                 return -EIO;
4965
4966         /* Wait for adapter to reach quiescent state before
4967          * destroying queues
4968          */
4969         status = be_fw_wait_ready(adapter);
4970         if (status)
4971                 goto err;
4972
4973         be_cleanup(adapter);
4974
4975         status = be_resume(adapter);
4976         if (status)
4977                 goto err;
4978
4979         return 0;
4980 err:
4981         return status;
4982 }
4983
4984 static void be_err_detection_task(struct work_struct *work)
4985 {
4986         struct be_adapter *adapter =
4987                                 container_of(work, struct be_adapter,
4988                                              be_err_detection_work.work);
4989         struct device *dev = &adapter->pdev->dev;
4990         int recovery_status;
4991         int delay = ERR_DETECTION_DELAY;
4992
4993         be_detect_error(adapter);
4994
4995         if (be_check_error(adapter, BE_ERROR_HW))
4996                 recovery_status = be_err_recover(adapter);
4997         else
4998                 goto reschedule_task;
4999
5000         if (!recovery_status) {
5001                 adapter->recovery_retries = 0;
5002                 dev_info(dev, "Adapter recovery successful\n");
5003                 goto reschedule_task;
5004         } else if (be_virtfn(adapter)) {
5005                 /* For VFs, check if PF have allocated resources
5006                  * every second.
5007                  */
5008                 dev_err(dev, "Re-trying adapter recovery\n");
5009                 goto reschedule_task;
5010         } else if (adapter->recovery_retries++ <
5011                    MAX_ERR_RECOVERY_RETRY_COUNT) {
5012                 /* In case of another error during recovery, it takes 30 sec
5013                  * for adapter to come out of error. Retry error recovery after
5014                  * this time interval.
5015                  */
5016                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5017                 delay = ERR_RECOVERY_RETRY_DELAY;
5018                 goto reschedule_task;
5019         } else {
5020                 dev_err(dev, "Adapter recovery failed\n");
5021         }
5022
5023         return;
5024 reschedule_task:
5025         be_schedule_err_detection(adapter, delay);
5026 }
5027
5028 static void be_log_sfp_info(struct be_adapter *adapter)
5029 {
5030         int status;
5031
5032         status = be_cmd_query_sfp_info(adapter);
5033         if (!status) {
5034                 dev_err(&adapter->pdev->dev,
5035                         "Port %c: %s Vendor: %s part no: %s",
5036                         adapter->port_name,
5037                         be_misconfig_evt_port_state[adapter->phy_state],
5038                         adapter->phy.vendor_name,
5039                         adapter->phy.vendor_pn);
5040         }
5041         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5042 }
5043
5044 static void be_worker(struct work_struct *work)
5045 {
5046         struct be_adapter *adapter =
5047                 container_of(work, struct be_adapter, work.work);
5048         struct be_rx_obj *rxo;
5049         int i;
5050
5051         /* when interrupts are not yet enabled, just reap any pending
5052          * mcc completions
5053          */
5054         if (!netif_running(adapter->netdev)) {
5055                 local_bh_disable();
5056                 be_process_mcc(adapter);
5057                 local_bh_enable();
5058                 goto reschedule;
5059         }
5060
5061         if (!adapter->stats_cmd_sent) {
5062                 if (lancer_chip(adapter))
5063                         lancer_cmd_get_pport_stats(adapter,
5064                                                    &adapter->stats_cmd);
5065                 else
5066                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5067         }
5068
5069         if (be_physfn(adapter) &&
5070             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5071                 be_cmd_get_die_temperature(adapter);
5072
5073         for_all_rx_queues(adapter, rxo, i) {
5074                 /* Replenish RX-queues starved due to memory
5075                  * allocation failures.
5076                  */
5077                 if (rxo->rx_post_starved)
5078                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5079         }
5080
5081         /* EQ-delay update for Skyhawk is done while notifying EQ */
5082         if (!skyhawk_chip(adapter))
5083                 be_eqd_update(adapter, false);
5084
5085         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5086                 be_log_sfp_info(adapter);
5087
5088 reschedule:
5089         adapter->work_counter++;
5090         schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
5091 }
5092
5093 static void be_unmap_pci_bars(struct be_adapter *adapter)
5094 {
5095         if (adapter->csr)
5096                 pci_iounmap(adapter->pdev, adapter->csr);
5097         if (adapter->db)
5098                 pci_iounmap(adapter->pdev, adapter->db);
5099         if (adapter->pcicfg && adapter->pcicfg_mapped)
5100                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5101 }
5102
5103 static int db_bar(struct be_adapter *adapter)
5104 {
5105         if (lancer_chip(adapter) || be_virtfn(adapter))
5106                 return 0;
5107         else
5108                 return 4;
5109 }
5110
5111 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5112 {
5113         if (skyhawk_chip(adapter)) {
5114                 adapter->roce_db.size = 4096;
5115                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5116                                                               db_bar(adapter));
5117                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5118                                                                db_bar(adapter));
5119         }
5120         return 0;
5121 }
5122
5123 static int be_map_pci_bars(struct be_adapter *adapter)
5124 {
5125         struct pci_dev *pdev = adapter->pdev;
5126         u8 __iomem *addr;
5127         u32 sli_intf;
5128
5129         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5130         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5131                                 SLI_INTF_FAMILY_SHIFT;
5132         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5133
5134         if (BEx_chip(adapter) && be_physfn(adapter)) {
5135                 adapter->csr = pci_iomap(pdev, 2, 0);
5136                 if (!adapter->csr)
5137                         return -ENOMEM;
5138         }
5139
5140         addr = pci_iomap(pdev, db_bar(adapter), 0);
5141         if (!addr)
5142                 goto pci_map_err;
5143         adapter->db = addr;
5144
5145         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5146                 if (be_physfn(adapter)) {
5147                         /* PCICFG is the 2nd BAR in BE2 */
5148                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5149                         if (!addr)
5150                                 goto pci_map_err;
5151                         adapter->pcicfg = addr;
5152                         adapter->pcicfg_mapped = true;
5153                 } else {
5154                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5155                         adapter->pcicfg_mapped = false;
5156                 }
5157         }
5158
5159         be_roce_map_pci_bars(adapter);
5160         return 0;
5161
5162 pci_map_err:
5163         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5164         be_unmap_pci_bars(adapter);
5165         return -ENOMEM;
5166 }
5167
5168 static void be_drv_cleanup(struct be_adapter *adapter)
5169 {
5170         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5171         struct device *dev = &adapter->pdev->dev;
5172
5173         if (mem->va)
5174                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5175
5176         mem = &adapter->rx_filter;
5177         if (mem->va)
5178                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5179
5180         mem = &adapter->stats_cmd;
5181         if (mem->va)
5182                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5183 }
5184
5185 /* Allocate and initialize various fields in be_adapter struct */
5186 static int be_drv_init(struct be_adapter *adapter)
5187 {
5188         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5189         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5190         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5191         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5192         struct device *dev = &adapter->pdev->dev;
5193         int status = 0;
5194
5195         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5196         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5197                                                  &mbox_mem_alloc->dma,
5198                                                  GFP_KERNEL);
5199         if (!mbox_mem_alloc->va)
5200                 return -ENOMEM;
5201
5202         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5203         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5204         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5205
5206         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5207         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5208                                             &rx_filter->dma, GFP_KERNEL);
5209         if (!rx_filter->va) {
5210                 status = -ENOMEM;
5211                 goto free_mbox;
5212         }
5213
5214         if (lancer_chip(adapter))
5215                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5216         else if (BE2_chip(adapter))
5217                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5218         else if (BE3_chip(adapter))
5219                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5220         else
5221                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5222         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5223                                             &stats_cmd->dma, GFP_KERNEL);
5224         if (!stats_cmd->va) {
5225                 status = -ENOMEM;
5226                 goto free_rx_filter;
5227         }
5228
5229         mutex_init(&adapter->mbox_lock);
5230         spin_lock_init(&adapter->mcc_lock);
5231         spin_lock_init(&adapter->mcc_cq_lock);
5232         init_completion(&adapter->et_cmd_compl);
5233
5234         pci_save_state(adapter->pdev);
5235
5236         INIT_DELAYED_WORK(&adapter->work, be_worker);
5237         INIT_DELAYED_WORK(&adapter->be_err_detection_work,
5238                           be_err_detection_task);
5239
5240         adapter->rx_fc = true;
5241         adapter->tx_fc = true;
5242
5243         /* Must be a power of 2 or else MODULO will BUG_ON */
5244         adapter->be_get_temp_freq = 64;
5245
5246         return 0;
5247
5248 free_rx_filter:
5249         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5250 free_mbox:
5251         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5252                           mbox_mem_alloc->dma);
5253         return status;
5254 }
5255
5256 static void be_remove(struct pci_dev *pdev)
5257 {
5258         struct be_adapter *adapter = pci_get_drvdata(pdev);
5259
5260         if (!adapter)
5261                 return;
5262
5263         be_roce_dev_remove(adapter);
5264         be_intr_set(adapter, false);
5265
5266         be_cancel_err_detection(adapter);
5267
5268         unregister_netdev(adapter->netdev);
5269
5270         be_clear(adapter);
5271
5272         /* tell fw we're done with firing cmds */
5273         be_cmd_fw_clean(adapter);
5274
5275         be_unmap_pci_bars(adapter);
5276         be_drv_cleanup(adapter);
5277
5278         pci_disable_pcie_error_reporting(pdev);
5279
5280         pci_release_regions(pdev);
5281         pci_disable_device(pdev);
5282
5283         free_netdev(adapter->netdev);
5284 }
5285
5286 static ssize_t be_hwmon_show_temp(struct device *dev,
5287                                   struct device_attribute *dev_attr,
5288                                   char *buf)
5289 {
5290         struct be_adapter *adapter = dev_get_drvdata(dev);
5291
5292         /* Unit: millidegree Celsius */
5293         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5294                 return -EIO;
5295         else
5296                 return sprintf(buf, "%u\n",
5297                                adapter->hwmon_info.be_on_die_temp * 1000);
5298 }
5299
5300 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5301                           be_hwmon_show_temp, NULL, 1);
5302
5303 static struct attribute *be_hwmon_attrs[] = {
5304         &sensor_dev_attr_temp1_input.dev_attr.attr,
5305         NULL
5306 };
5307
5308 ATTRIBUTE_GROUPS(be_hwmon);
5309
5310 static char *mc_name(struct be_adapter *adapter)
5311 {
5312         char *str = ""; /* default */
5313
5314         switch (adapter->mc_type) {
5315         case UMC:
5316                 str = "UMC";
5317                 break;
5318         case FLEX10:
5319                 str = "FLEX10";
5320                 break;
5321         case vNIC1:
5322                 str = "vNIC-1";
5323                 break;
5324         case nPAR:
5325                 str = "nPAR";
5326                 break;
5327         case UFP:
5328                 str = "UFP";
5329                 break;
5330         case vNIC2:
5331                 str = "vNIC-2";
5332                 break;
5333         default:
5334                 str = "";
5335         }
5336
5337         return str;
5338 }
5339
5340 static inline char *func_name(struct be_adapter *adapter)
5341 {
5342         return be_physfn(adapter) ? "PF" : "VF";
5343 }
5344
5345 static inline char *nic_name(struct pci_dev *pdev)
5346 {
5347         switch (pdev->device) {
5348         case OC_DEVICE_ID1:
5349                 return OC_NAME;
5350         case OC_DEVICE_ID2:
5351                 return OC_NAME_BE;
5352         case OC_DEVICE_ID3:
5353         case OC_DEVICE_ID4:
5354                 return OC_NAME_LANCER;
5355         case BE_DEVICE_ID2:
5356                 return BE3_NAME;
5357         case OC_DEVICE_ID5:
5358         case OC_DEVICE_ID6:
5359                 return OC_NAME_SH;
5360         default:
5361                 return BE_NAME;
5362         }
5363 }
5364
5365 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5366 {
5367         struct be_adapter *adapter;
5368         struct net_device *netdev;
5369         int status = 0;
5370
5371         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5372
5373         status = pci_enable_device(pdev);
5374         if (status)
5375                 goto do_none;
5376
5377         status = pci_request_regions(pdev, DRV_NAME);
5378         if (status)
5379                 goto disable_dev;
5380         pci_set_master(pdev);
5381
5382         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5383         if (!netdev) {
5384                 status = -ENOMEM;
5385                 goto rel_reg;
5386         }
5387         adapter = netdev_priv(netdev);
5388         adapter->pdev = pdev;
5389         pci_set_drvdata(pdev, adapter);
5390         adapter->netdev = netdev;
5391         SET_NETDEV_DEV(netdev, &pdev->dev);
5392
5393         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5394         if (!status) {
5395                 netdev->features |= NETIF_F_HIGHDMA;
5396         } else {
5397                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5398                 if (status) {
5399                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5400                         goto free_netdev;
5401                 }
5402         }
5403
5404         status = pci_enable_pcie_error_reporting(pdev);
5405         if (!status)
5406                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5407
5408         status = be_map_pci_bars(adapter);
5409         if (status)
5410                 goto free_netdev;
5411
5412         status = be_drv_init(adapter);
5413         if (status)
5414                 goto unmap_bars;
5415
5416         status = be_setup(adapter);
5417         if (status)
5418                 goto drv_cleanup;
5419
5420         be_netdev_init(netdev);
5421         status = register_netdev(netdev);
5422         if (status != 0)
5423                 goto unsetup;
5424
5425         be_roce_dev_add(adapter);
5426
5427         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5428
5429         /* On Die temperature not supported for VF. */
5430         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5431                 adapter->hwmon_info.hwmon_dev =
5432                         devm_hwmon_device_register_with_groups(&pdev->dev,
5433                                                                DRV_NAME,
5434                                                                adapter,
5435                                                                be_hwmon_groups);
5436                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5437         }
5438
5439         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5440                  func_name(adapter), mc_name(adapter), adapter->port_name);
5441
5442         return 0;
5443
5444 unsetup:
5445         be_clear(adapter);
5446 drv_cleanup:
5447         be_drv_cleanup(adapter);
5448 unmap_bars:
5449         be_unmap_pci_bars(adapter);
5450 free_netdev:
5451         free_netdev(netdev);
5452 rel_reg:
5453         pci_release_regions(pdev);
5454 disable_dev:
5455         pci_disable_device(pdev);
5456 do_none:
5457         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5458         return status;
5459 }
5460
5461 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5462 {
5463         struct be_adapter *adapter = pci_get_drvdata(pdev);
5464
5465         be_intr_set(adapter, false);
5466         be_cancel_err_detection(adapter);
5467
5468         be_cleanup(adapter);
5469
5470         pci_save_state(pdev);
5471         pci_disable_device(pdev);
5472         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5473         return 0;
5474 }
5475
5476 static int be_pci_resume(struct pci_dev *pdev)
5477 {
5478         struct be_adapter *adapter = pci_get_drvdata(pdev);
5479         int status = 0;
5480
5481         status = pci_enable_device(pdev);
5482         if (status)
5483                 return status;
5484
5485         pci_restore_state(pdev);
5486
5487         status = be_resume(adapter);
5488         if (status)
5489                 return status;
5490
5491         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5492
5493         return 0;
5494 }
5495
5496 /*
5497  * An FLR will stop BE from DMAing any data.
5498  */
5499 static void be_shutdown(struct pci_dev *pdev)
5500 {
5501         struct be_adapter *adapter = pci_get_drvdata(pdev);
5502
5503         if (!adapter)
5504                 return;
5505
5506         be_roce_dev_shutdown(adapter);
5507         cancel_delayed_work_sync(&adapter->work);
5508         be_cancel_err_detection(adapter);
5509
5510         netif_device_detach(adapter->netdev);
5511
5512         be_cmd_reset_function(adapter);
5513
5514         pci_disable_device(pdev);
5515 }
5516
5517 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5518                                             pci_channel_state_t state)
5519 {
5520         struct be_adapter *adapter = pci_get_drvdata(pdev);
5521
5522         dev_err(&adapter->pdev->dev, "EEH error detected\n");
5523
5524         be_roce_dev_remove(adapter);
5525
5526         if (!be_check_error(adapter, BE_ERROR_EEH)) {
5527                 be_set_error(adapter, BE_ERROR_EEH);
5528
5529                 be_cancel_err_detection(adapter);
5530
5531                 be_cleanup(adapter);
5532         }
5533
5534         if (state == pci_channel_io_perm_failure)
5535                 return PCI_ERS_RESULT_DISCONNECT;
5536
5537         pci_disable_device(pdev);
5538
5539         /* The error could cause the FW to trigger a flash debug dump.
5540          * Resetting the card while flash dump is in progress
5541          * can cause it not to recover; wait for it to finish.
5542          * Wait only for first function as it is needed only once per
5543          * adapter.
5544          */
5545         if (pdev->devfn == 0)
5546                 ssleep(30);
5547
5548         return PCI_ERS_RESULT_NEED_RESET;
5549 }
5550
5551 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
5552 {
5553         struct be_adapter *adapter = pci_get_drvdata(pdev);
5554         int status;
5555
5556         dev_info(&adapter->pdev->dev, "EEH reset\n");
5557
5558         status = pci_enable_device(pdev);
5559         if (status)
5560                 return PCI_ERS_RESULT_DISCONNECT;
5561
5562         pci_set_master(pdev);
5563         pci_restore_state(pdev);
5564
5565         /* Check if card is ok and fw is ready */
5566         dev_info(&adapter->pdev->dev,
5567                  "Waiting for FW to be ready after EEH reset\n");
5568         status = be_fw_wait_ready(adapter);
5569         if (status)
5570                 return PCI_ERS_RESULT_DISCONNECT;
5571
5572         pci_cleanup_aer_uncorrect_error_status(pdev);
5573         be_clear_error(adapter, BE_CLEAR_ALL);
5574         return PCI_ERS_RESULT_RECOVERED;
5575 }
5576
5577 static void be_eeh_resume(struct pci_dev *pdev)
5578 {
5579         int status = 0;
5580         struct be_adapter *adapter = pci_get_drvdata(pdev);
5581
5582         dev_info(&adapter->pdev->dev, "EEH resume\n");
5583
5584         pci_save_state(pdev);
5585
5586         status = be_resume(adapter);
5587         if (status)
5588                 goto err;
5589
5590         be_roce_dev_add(adapter);
5591
5592         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5593         return;
5594 err:
5595         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
5596 }
5597
5598 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
5599 {
5600         struct be_adapter *adapter = pci_get_drvdata(pdev);
5601         struct be_resources vft_res = {0};
5602         int status;
5603
5604         if (!num_vfs)
5605                 be_vf_clear(adapter);
5606
5607         adapter->num_vfs = num_vfs;
5608
5609         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
5610                 dev_warn(&pdev->dev,
5611                          "Cannot disable VFs while they are assigned\n");
5612                 return -EBUSY;
5613         }
5614
5615         /* When the HW is in SRIOV capable configuration, the PF-pool resources
5616          * are equally distributed across the max-number of VFs. The user may
5617          * request only a subset of the max-vfs to be enabled.
5618          * Based on num_vfs, redistribute the resources across num_vfs so that
5619          * each VF will have access to more number of resources.
5620          * This facility is not available in BE3 FW.
5621          * Also, this is done by FW in Lancer chip.
5622          */
5623         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
5624                 be_calculate_vf_res(adapter, adapter->num_vfs,
5625                                     &vft_res);
5626                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
5627                                                  adapter->num_vfs, &vft_res);
5628                 if (status)
5629                         dev_err(&pdev->dev,
5630                                 "Failed to optimize SR-IOV resources\n");
5631         }
5632
5633         status = be_get_resources(adapter);
5634         if (status)
5635                 return be_cmd_status(status);
5636
5637         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
5638         rtnl_lock();
5639         status = be_update_queues(adapter);
5640         rtnl_unlock();
5641         if (status)
5642                 return be_cmd_status(status);
5643
5644         if (adapter->num_vfs)
5645                 status = be_vf_setup(adapter);
5646
5647         if (!status)
5648                 return adapter->num_vfs;
5649
5650         return 0;
5651 }
5652
5653 static const struct pci_error_handlers be_eeh_handlers = {
5654         .error_detected = be_eeh_err_detected,
5655         .slot_reset = be_eeh_reset,
5656         .resume = be_eeh_resume,
5657 };
5658
5659 static struct pci_driver be_driver = {
5660         .name = DRV_NAME,
5661         .id_table = be_dev_ids,
5662         .probe = be_probe,
5663         .remove = be_remove,
5664         .suspend = be_suspend,
5665         .resume = be_pci_resume,
5666         .shutdown = be_shutdown,
5667         .sriov_configure = be_pci_sriov_configure,
5668         .err_handler = &be_eeh_handlers
5669 };
5670
5671 static int __init be_init_module(void)
5672 {
5673         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
5674             rx_frag_size != 2048) {
5675                 printk(KERN_WARNING DRV_NAME
5676                         " : Module param rx_frag_size must be 2048/4096/8192."
5677                         " Using 2048\n");
5678                 rx_frag_size = 2048;
5679         }
5680
5681         if (num_vfs > 0) {
5682                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
5683                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
5684         }
5685
5686         return pci_register_driver(&be_driver);
5687 }
5688 module_init(be_init_module);
5689
5690 static void __exit be_exit_module(void)
5691 {
5692         pci_unregister_driver(&be_driver);
5693 }
5694 module_exit(be_exit_module);