i40e/i40evf: Break up xmit_descriptor_count from maybe_stop_tx
[cascardo/linux.git] / drivers / net / ethernet / intel / i40e / i40e_txrx.c
1 /*******************************************************************************
2  *
3  * Intel Ethernet Controller XL710 Family Linux Driver
4  * Copyright(c) 2013 - 2016 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  * The full GNU General Public License is included in this distribution in
19  * the file called "COPYING".
20  *
21  * Contact Information:
22  * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
23  * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
24  *
25  ******************************************************************************/
26
27 #include <linux/prefetch.h>
28 #include <net/busy_poll.h>
29 #include "i40e.h"
30 #include "i40e_prototype.h"
31
32 static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
33                                 u32 td_tag)
34 {
35         return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
36                            ((u64)td_cmd  << I40E_TXD_QW1_CMD_SHIFT) |
37                            ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
38                            ((u64)size  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
39                            ((u64)td_tag  << I40E_TXD_QW1_L2TAG1_SHIFT));
40 }
41
42 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
43 #define I40E_FD_CLEAN_DELAY 10
44 /**
45  * i40e_program_fdir_filter - Program a Flow Director filter
46  * @fdir_data: Packet data that will be filter parameters
47  * @raw_packet: the pre-allocated packet buffer for FDir
48  * @pf: The PF pointer
49  * @add: True for add/update, False for remove
50  **/
51 int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
52                              struct i40e_pf *pf, bool add)
53 {
54         struct i40e_filter_program_desc *fdir_desc;
55         struct i40e_tx_buffer *tx_buf, *first;
56         struct i40e_tx_desc *tx_desc;
57         struct i40e_ring *tx_ring;
58         unsigned int fpt, dcc;
59         struct i40e_vsi *vsi;
60         struct device *dev;
61         dma_addr_t dma;
62         u32 td_cmd = 0;
63         u16 delay = 0;
64         u16 i;
65
66         /* find existing FDIR VSI */
67         vsi = NULL;
68         for (i = 0; i < pf->num_alloc_vsi; i++)
69                 if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR)
70                         vsi = pf->vsi[i];
71         if (!vsi)
72                 return -ENOENT;
73
74         tx_ring = vsi->tx_rings[0];
75         dev = tx_ring->dev;
76
77         /* we need two descriptors to add/del a filter and we can wait */
78         do {
79                 if (I40E_DESC_UNUSED(tx_ring) > 1)
80                         break;
81                 msleep_interruptible(1);
82                 delay++;
83         } while (delay < I40E_FD_CLEAN_DELAY);
84
85         if (!(I40E_DESC_UNUSED(tx_ring) > 1))
86                 return -EAGAIN;
87
88         dma = dma_map_single(dev, raw_packet,
89                              I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE);
90         if (dma_mapping_error(dev, dma))
91                 goto dma_fail;
92
93         /* grab the next descriptor */
94         i = tx_ring->next_to_use;
95         fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
96         first = &tx_ring->tx_bi[i];
97         memset(first, 0, sizeof(struct i40e_tx_buffer));
98
99         tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
100
101         fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
102               I40E_TXD_FLTR_QW0_QINDEX_MASK;
103
104         fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) &
105                I40E_TXD_FLTR_QW0_FLEXOFF_MASK;
106
107         fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) &
108                I40E_TXD_FLTR_QW0_PCTYPE_MASK;
109
110         /* Use LAN VSI Id if not programmed by user */
111         if (fdir_data->dest_vsi == 0)
112                 fpt |= (pf->vsi[pf->lan_vsi]->id) <<
113                        I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
114         else
115                 fpt |= ((u32)fdir_data->dest_vsi <<
116                         I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) &
117                        I40E_TXD_FLTR_QW0_DEST_VSI_MASK;
118
119         dcc = I40E_TX_DESC_DTYPE_FILTER_PROG;
120
121         if (add)
122                 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
123                        I40E_TXD_FLTR_QW1_PCMD_SHIFT;
124         else
125                 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
126                        I40E_TXD_FLTR_QW1_PCMD_SHIFT;
127
128         dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) &
129                I40E_TXD_FLTR_QW1_DEST_MASK;
130
131         dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) &
132                I40E_TXD_FLTR_QW1_FD_STATUS_MASK;
133
134         if (fdir_data->cnt_index != 0) {
135                 dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
136                 dcc |= ((u32)fdir_data->cnt_index <<
137                         I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
138                         I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
139         }
140
141         fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt);
142         fdir_desc->rsvd = cpu_to_le32(0);
143         fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc);
144         fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id);
145
146         /* Now program a dummy descriptor */
147         i = tx_ring->next_to_use;
148         tx_desc = I40E_TX_DESC(tx_ring, i);
149         tx_buf = &tx_ring->tx_bi[i];
150
151         tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
152
153         memset(tx_buf, 0, sizeof(struct i40e_tx_buffer));
154
155         /* record length, and DMA address */
156         dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
157         dma_unmap_addr_set(tx_buf, dma, dma);
158
159         tx_desc->buffer_addr = cpu_to_le64(dma);
160         td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY;
161
162         tx_buf->tx_flags = I40E_TX_FLAGS_FD_SB;
163         tx_buf->raw_buf = (void *)raw_packet;
164
165         tx_desc->cmd_type_offset_bsz =
166                 build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0);
167
168         /* Force memory writes to complete before letting h/w
169          * know there are new descriptors to fetch.
170          */
171         wmb();
172
173         /* Mark the data descriptor to be watched */
174         first->next_to_watch = tx_desc;
175
176         writel(tx_ring->next_to_use, tx_ring->tail);
177         return 0;
178
179 dma_fail:
180         return -1;
181 }
182
183 #define IP_HEADER_OFFSET 14
184 #define I40E_UDPIP_DUMMY_PACKET_LEN 42
185 /**
186  * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters
187  * @vsi: pointer to the targeted VSI
188  * @fd_data: the flow director data required for the FDir descriptor
189  * @add: true adds a filter, false removes it
190  *
191  * Returns 0 if the filters were successfully added or removed
192  **/
193 static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
194                                    struct i40e_fdir_filter *fd_data,
195                                    bool add)
196 {
197         struct i40e_pf *pf = vsi->back;
198         struct udphdr *udp;
199         struct iphdr *ip;
200         bool err = false;
201         u8 *raw_packet;
202         int ret;
203         static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
204                 0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0,
205                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
206
207         raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
208         if (!raw_packet)
209                 return -ENOMEM;
210         memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN);
211
212         ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
213         udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET
214               + sizeof(struct iphdr));
215
216         ip->daddr = fd_data->dst_ip[0];
217         udp->dest = fd_data->dst_port;
218         ip->saddr = fd_data->src_ip[0];
219         udp->source = fd_data->src_port;
220
221         fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
222         ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
223         if (ret) {
224                 dev_info(&pf->pdev->dev,
225                          "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
226                          fd_data->pctype, fd_data->fd_id, ret);
227                 err = true;
228         } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
229                 if (add)
230                         dev_info(&pf->pdev->dev,
231                                  "Filter OK for PCTYPE %d loc = %d\n",
232                                  fd_data->pctype, fd_data->fd_id);
233                 else
234                         dev_info(&pf->pdev->dev,
235                                  "Filter deleted for PCTYPE %d loc = %d\n",
236                                  fd_data->pctype, fd_data->fd_id);
237         }
238         if (err)
239                 kfree(raw_packet);
240
241         return err ? -EOPNOTSUPP : 0;
242 }
243
244 #define I40E_TCPIP_DUMMY_PACKET_LEN 54
245 /**
246  * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters
247  * @vsi: pointer to the targeted VSI
248  * @fd_data: the flow director data required for the FDir descriptor
249  * @add: true adds a filter, false removes it
250  *
251  * Returns 0 if the filters were successfully added or removed
252  **/
253 static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
254                                    struct i40e_fdir_filter *fd_data,
255                                    bool add)
256 {
257         struct i40e_pf *pf = vsi->back;
258         struct tcphdr *tcp;
259         struct iphdr *ip;
260         bool err = false;
261         u8 *raw_packet;
262         int ret;
263         /* Dummy packet */
264         static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
265                 0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0,
266                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11,
267                 0x0, 0x72, 0, 0, 0, 0};
268
269         raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
270         if (!raw_packet)
271                 return -ENOMEM;
272         memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN);
273
274         ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
275         tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET
276               + sizeof(struct iphdr));
277
278         ip->daddr = fd_data->dst_ip[0];
279         tcp->dest = fd_data->dst_port;
280         ip->saddr = fd_data->src_ip[0];
281         tcp->source = fd_data->src_port;
282
283         if (add) {
284                 pf->fd_tcp_rule++;
285                 if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
286                         if (I40E_DEBUG_FD & pf->hw.debug_mask)
287                                 dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
288                         pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
289                 }
290         } else {
291                 pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ?
292                                   (pf->fd_tcp_rule - 1) : 0;
293                 if (pf->fd_tcp_rule == 0) {
294                         pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
295                         if (I40E_DEBUG_FD & pf->hw.debug_mask)
296                                 dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n");
297                 }
298         }
299
300         fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
301         ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
302
303         if (ret) {
304                 dev_info(&pf->pdev->dev,
305                          "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
306                          fd_data->pctype, fd_data->fd_id, ret);
307                 err = true;
308         } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
309                 if (add)
310                         dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n",
311                                  fd_data->pctype, fd_data->fd_id);
312                 else
313                         dev_info(&pf->pdev->dev,
314                                  "Filter deleted for PCTYPE %d loc = %d\n",
315                                  fd_data->pctype, fd_data->fd_id);
316         }
317
318         if (err)
319                 kfree(raw_packet);
320
321         return err ? -EOPNOTSUPP : 0;
322 }
323
324 /**
325  * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
326  * a specific flow spec
327  * @vsi: pointer to the targeted VSI
328  * @fd_data: the flow director data required for the FDir descriptor
329  * @add: true adds a filter, false removes it
330  *
331  * Returns 0 if the filters were successfully added or removed
332  **/
333 static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
334                                     struct i40e_fdir_filter *fd_data,
335                                     bool add)
336 {
337         return -EOPNOTSUPP;
338 }
339
340 #define I40E_IP_DUMMY_PACKET_LEN 34
341 /**
342  * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
343  * a specific flow spec
344  * @vsi: pointer to the targeted VSI
345  * @fd_data: the flow director data required for the FDir descriptor
346  * @add: true adds a filter, false removes it
347  *
348  * Returns 0 if the filters were successfully added or removed
349  **/
350 static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
351                                   struct i40e_fdir_filter *fd_data,
352                                   bool add)
353 {
354         struct i40e_pf *pf = vsi->back;
355         struct iphdr *ip;
356         bool err = false;
357         u8 *raw_packet;
358         int ret;
359         int i;
360         static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
361                 0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0,
362                 0, 0, 0, 0};
363
364         for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
365              i <= I40E_FILTER_PCTYPE_FRAG_IPV4; i++) {
366                 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
367                 if (!raw_packet)
368                         return -ENOMEM;
369                 memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN);
370                 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
371
372                 ip->saddr = fd_data->src_ip[0];
373                 ip->daddr = fd_data->dst_ip[0];
374                 ip->protocol = 0;
375
376                 fd_data->pctype = i;
377                 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
378
379                 if (ret) {
380                         dev_info(&pf->pdev->dev,
381                                  "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
382                                  fd_data->pctype, fd_data->fd_id, ret);
383                         err = true;
384                 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
385                         if (add)
386                                 dev_info(&pf->pdev->dev,
387                                          "Filter OK for PCTYPE %d loc = %d\n",
388                                          fd_data->pctype, fd_data->fd_id);
389                         else
390                                 dev_info(&pf->pdev->dev,
391                                          "Filter deleted for PCTYPE %d loc = %d\n",
392                                          fd_data->pctype, fd_data->fd_id);
393                 }
394         }
395
396         if (err)
397                 kfree(raw_packet);
398
399         return err ? -EOPNOTSUPP : 0;
400 }
401
402 /**
403  * i40e_add_del_fdir - Build raw packets to add/del fdir filter
404  * @vsi: pointer to the targeted VSI
405  * @cmd: command to get or set RX flow classification rules
406  * @add: true adds a filter, false removes it
407  *
408  **/
409 int i40e_add_del_fdir(struct i40e_vsi *vsi,
410                       struct i40e_fdir_filter *input, bool add)
411 {
412         struct i40e_pf *pf = vsi->back;
413         int ret;
414
415         switch (input->flow_type & ~FLOW_EXT) {
416         case TCP_V4_FLOW:
417                 ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
418                 break;
419         case UDP_V4_FLOW:
420                 ret = i40e_add_del_fdir_udpv4(vsi, input, add);
421                 break;
422         case SCTP_V4_FLOW:
423                 ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
424                 break;
425         case IPV4_FLOW:
426                 ret = i40e_add_del_fdir_ipv4(vsi, input, add);
427                 break;
428         case IP_USER_FLOW:
429                 switch (input->ip4_proto) {
430                 case IPPROTO_TCP:
431                         ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
432                         break;
433                 case IPPROTO_UDP:
434                         ret = i40e_add_del_fdir_udpv4(vsi, input, add);
435                         break;
436                 case IPPROTO_SCTP:
437                         ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
438                         break;
439                 default:
440                         ret = i40e_add_del_fdir_ipv4(vsi, input, add);
441                         break;
442                 }
443                 break;
444         default:
445                 dev_info(&pf->pdev->dev, "Could not specify spec type %d\n",
446                          input->flow_type);
447                 ret = -EINVAL;
448         }
449
450         /* The buffer allocated here is freed by the i40e_clean_tx_ring() */
451         return ret;
452 }
453
454 /**
455  * i40e_fd_handle_status - check the Programming Status for FD
456  * @rx_ring: the Rx ring for this descriptor
457  * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
458  * @prog_id: the id originally used for programming
459  *
460  * This is used to verify if the FD programming or invalidation
461  * requested by SW to the HW is successful or not and take actions accordingly.
462  **/
463 static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
464                                   union i40e_rx_desc *rx_desc, u8 prog_id)
465 {
466         struct i40e_pf *pf = rx_ring->vsi->back;
467         struct pci_dev *pdev = pf->pdev;
468         u32 fcnt_prog, fcnt_avail;
469         u32 error;
470         u64 qw;
471
472         qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
473         error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
474                 I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
475
476         if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
477                 pf->fd_inv = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fd_id);
478                 if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
479                     (I40E_DEBUG_FD & pf->hw.debug_mask))
480                         dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
481                                  pf->fd_inv);
482
483                 /* Check if the programming error is for ATR.
484                  * If so, auto disable ATR and set a state for
485                  * flush in progress. Next time we come here if flush is in
486                  * progress do nothing, once flush is complete the state will
487                  * be cleared.
488                  */
489                 if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state))
490                         return;
491
492                 pf->fd_add_err++;
493                 /* store the current atr filter count */
494                 pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
495
496                 if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) &&
497                     (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) {
498                         pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
499                         set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
500                 }
501
502                 /* filter programming failed most likely due to table full */
503                 fcnt_prog = i40e_get_global_fd_count(pf);
504                 fcnt_avail = pf->fdir_pf_filter_count;
505                 /* If ATR is running fcnt_prog can quickly change,
506                  * if we are very close to full, it makes sense to disable
507                  * FD ATR/SB and then re-enable it when there is room.
508                  */
509                 if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
510                         if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
511                             !(pf->auto_disable_flags &
512                                      I40E_FLAG_FD_SB_ENABLED)) {
513                                 if (I40E_DEBUG_FD & pf->hw.debug_mask)
514                                         dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
515                                 pf->auto_disable_flags |=
516                                                         I40E_FLAG_FD_SB_ENABLED;
517                         }
518                 }
519         } else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
520                 if (I40E_DEBUG_FD & pf->hw.debug_mask)
521                         dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
522                                  rx_desc->wb.qword0.hi_dword.fd_id);
523         }
524 }
525
526 /**
527  * i40e_unmap_and_free_tx_resource - Release a Tx buffer
528  * @ring:      the ring that owns the buffer
529  * @tx_buffer: the buffer to free
530  **/
531 static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
532                                             struct i40e_tx_buffer *tx_buffer)
533 {
534         if (tx_buffer->skb) {
535                 dev_kfree_skb_any(tx_buffer->skb);
536                 if (dma_unmap_len(tx_buffer, len))
537                         dma_unmap_single(ring->dev,
538                                          dma_unmap_addr(tx_buffer, dma),
539                                          dma_unmap_len(tx_buffer, len),
540                                          DMA_TO_DEVICE);
541         } else if (dma_unmap_len(tx_buffer, len)) {
542                 dma_unmap_page(ring->dev,
543                                dma_unmap_addr(tx_buffer, dma),
544                                dma_unmap_len(tx_buffer, len),
545                                DMA_TO_DEVICE);
546         }
547
548         if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
549                 kfree(tx_buffer->raw_buf);
550
551         tx_buffer->next_to_watch = NULL;
552         tx_buffer->skb = NULL;
553         dma_unmap_len_set(tx_buffer, len, 0);
554         /* tx_buffer must be completely set up in the transmit path */
555 }
556
557 /**
558  * i40e_clean_tx_ring - Free any empty Tx buffers
559  * @tx_ring: ring to be cleaned
560  **/
561 void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
562 {
563         unsigned long bi_size;
564         u16 i;
565
566         /* ring already cleared, nothing to do */
567         if (!tx_ring->tx_bi)
568                 return;
569
570         /* Free all the Tx ring sk_buffs */
571         for (i = 0; i < tx_ring->count; i++)
572                 i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
573
574         bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
575         memset(tx_ring->tx_bi, 0, bi_size);
576
577         /* Zero out the descriptor ring */
578         memset(tx_ring->desc, 0, tx_ring->size);
579
580         tx_ring->next_to_use = 0;
581         tx_ring->next_to_clean = 0;
582
583         if (!tx_ring->netdev)
584                 return;
585
586         /* cleanup Tx queue statistics */
587         netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
588                                                   tx_ring->queue_index));
589 }
590
591 /**
592  * i40e_free_tx_resources - Free Tx resources per queue
593  * @tx_ring: Tx descriptor ring for a specific queue
594  *
595  * Free all transmit software resources
596  **/
597 void i40e_free_tx_resources(struct i40e_ring *tx_ring)
598 {
599         i40e_clean_tx_ring(tx_ring);
600         kfree(tx_ring->tx_bi);
601         tx_ring->tx_bi = NULL;
602
603         if (tx_ring->desc) {
604                 dma_free_coherent(tx_ring->dev, tx_ring->size,
605                                   tx_ring->desc, tx_ring->dma);
606                 tx_ring->desc = NULL;
607         }
608 }
609
610 /**
611  * i40e_get_tx_pending - how many tx descriptors not processed
612  * @tx_ring: the ring of descriptors
613  * @in_sw: is tx_pending being checked in SW or HW
614  *
615  * Since there is no access to the ring head register
616  * in XL710, we need to use our local copies
617  **/
618 u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw)
619 {
620         u32 head, tail;
621
622         if (!in_sw)
623                 head = i40e_get_head(ring);
624         else
625                 head = ring->next_to_clean;
626         tail = readl(ring->tail);
627
628         if (head != tail)
629                 return (head < tail) ?
630                         tail - head : (tail + ring->count - head);
631
632         return 0;
633 }
634
635 #define WB_STRIDE 0x3
636
637 /**
638  * i40e_clean_tx_irq - Reclaim resources after transmit completes
639  * @tx_ring:  tx ring to clean
640  * @budget:   how many cleans we're allowed
641  *
642  * Returns true if there's any budget left (e.g. the clean is finished)
643  **/
644 static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
645 {
646         u16 i = tx_ring->next_to_clean;
647         struct i40e_tx_buffer *tx_buf;
648         struct i40e_tx_desc *tx_head;
649         struct i40e_tx_desc *tx_desc;
650         unsigned int total_packets = 0;
651         unsigned int total_bytes = 0;
652
653         tx_buf = &tx_ring->tx_bi[i];
654         tx_desc = I40E_TX_DESC(tx_ring, i);
655         i -= tx_ring->count;
656
657         tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
658
659         do {
660                 struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
661
662                 /* if next_to_watch is not set then there is no work pending */
663                 if (!eop_desc)
664                         break;
665
666                 /* prevent any other reads prior to eop_desc */
667                 read_barrier_depends();
668
669                 /* we have caught up to head, no work left to do */
670                 if (tx_head == tx_desc)
671                         break;
672
673                 /* clear next_to_watch to prevent false hangs */
674                 tx_buf->next_to_watch = NULL;
675
676                 /* update the statistics for this packet */
677                 total_bytes += tx_buf->bytecount;
678                 total_packets += tx_buf->gso_segs;
679
680                 /* free the skb */
681                 dev_consume_skb_any(tx_buf->skb);
682
683                 /* unmap skb header data */
684                 dma_unmap_single(tx_ring->dev,
685                                  dma_unmap_addr(tx_buf, dma),
686                                  dma_unmap_len(tx_buf, len),
687                                  DMA_TO_DEVICE);
688
689                 /* clear tx_buffer data */
690                 tx_buf->skb = NULL;
691                 dma_unmap_len_set(tx_buf, len, 0);
692
693                 /* unmap remaining buffers */
694                 while (tx_desc != eop_desc) {
695
696                         tx_buf++;
697                         tx_desc++;
698                         i++;
699                         if (unlikely(!i)) {
700                                 i -= tx_ring->count;
701                                 tx_buf = tx_ring->tx_bi;
702                                 tx_desc = I40E_TX_DESC(tx_ring, 0);
703                         }
704
705                         /* unmap any remaining paged data */
706                         if (dma_unmap_len(tx_buf, len)) {
707                                 dma_unmap_page(tx_ring->dev,
708                                                dma_unmap_addr(tx_buf, dma),
709                                                dma_unmap_len(tx_buf, len),
710                                                DMA_TO_DEVICE);
711                                 dma_unmap_len_set(tx_buf, len, 0);
712                         }
713                 }
714
715                 /* move us one more past the eop_desc for start of next pkt */
716                 tx_buf++;
717                 tx_desc++;
718                 i++;
719                 if (unlikely(!i)) {
720                         i -= tx_ring->count;
721                         tx_buf = tx_ring->tx_bi;
722                         tx_desc = I40E_TX_DESC(tx_ring, 0);
723                 }
724
725                 prefetch(tx_desc);
726
727                 /* update budget accounting */
728                 budget--;
729         } while (likely(budget));
730
731         i += tx_ring->count;
732         tx_ring->next_to_clean = i;
733         u64_stats_update_begin(&tx_ring->syncp);
734         tx_ring->stats.bytes += total_bytes;
735         tx_ring->stats.packets += total_packets;
736         u64_stats_update_end(&tx_ring->syncp);
737         tx_ring->q_vector->tx.total_bytes += total_bytes;
738         tx_ring->q_vector->tx.total_packets += total_packets;
739
740         if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) {
741                 unsigned int j = 0;
742
743                 /* check to see if there are < 4 descriptors
744                  * waiting to be written back, then kick the hardware to force
745                  * them to be written back in case we stay in NAPI.
746                  * In this mode on X722 we do not enable Interrupt.
747                  */
748                 j = i40e_get_tx_pending(tx_ring, false);
749
750                 if (budget &&
751                     ((j / (WB_STRIDE + 1)) == 0) && (j != 0) &&
752                     !test_bit(__I40E_DOWN, &tx_ring->vsi->state) &&
753                     (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
754                         tx_ring->arm_wb = true;
755         }
756
757         netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
758                                                       tx_ring->queue_index),
759                                   total_packets, total_bytes);
760
761 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
762         if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
763                      (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
764                 /* Make sure that anybody stopping the queue after this
765                  * sees the new next_to_clean.
766                  */
767                 smp_mb();
768                 if (__netif_subqueue_stopped(tx_ring->netdev,
769                                              tx_ring->queue_index) &&
770                    !test_bit(__I40E_DOWN, &tx_ring->vsi->state)) {
771                         netif_wake_subqueue(tx_ring->netdev,
772                                             tx_ring->queue_index);
773                         ++tx_ring->tx_stats.restart_queue;
774                 }
775         }
776
777         return !!budget;
778 }
779
780 /**
781  * i40e_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled
782  * @vsi: the VSI we care about
783  * @q_vector: the vector on which to enable writeback
784  *
785  **/
786 static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi,
787                                   struct i40e_q_vector *q_vector)
788 {
789         u16 flags = q_vector->tx.ring[0].flags;
790         u32 val;
791
792         if (!(flags & I40E_TXR_FLAGS_WB_ON_ITR))
793                 return;
794
795         if (q_vector->arm_wb_state)
796                 return;
797
798         if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
799                 val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK |
800                       I40E_PFINT_DYN_CTLN_ITR_INDX_MASK; /* set noitr */
801
802                 wr32(&vsi->back->hw,
803                      I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1),
804                      val);
805         } else {
806                 val = I40E_PFINT_DYN_CTL0_WB_ON_ITR_MASK |
807                       I40E_PFINT_DYN_CTL0_ITR_INDX_MASK; /* set noitr */
808
809                 wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
810         }
811         q_vector->arm_wb_state = true;
812 }
813
814 /**
815  * i40e_force_wb - Issue SW Interrupt so HW does a wb
816  * @vsi: the VSI we care about
817  * @q_vector: the vector  on which to force writeback
818  *
819  **/
820 void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
821 {
822         if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
823                 u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
824                           I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */
825                           I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
826                           I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK;
827                           /* allow 00 to be written to the index */
828
829                 wr32(&vsi->back->hw,
830                      I40E_PFINT_DYN_CTLN(q_vector->v_idx +
831                                          vsi->base_vector - 1), val);
832         } else {
833                 u32 val = I40E_PFINT_DYN_CTL0_INTENA_MASK |
834                           I40E_PFINT_DYN_CTL0_ITR_INDX_MASK | /* set noitr */
835                           I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK |
836                           I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK;
837                         /* allow 00 to be written to the index */
838
839                 wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
840         }
841 }
842
843 /**
844  * i40e_set_new_dynamic_itr - Find new ITR level
845  * @rc: structure containing ring performance data
846  *
847  * Returns true if ITR changed, false if not
848  *
849  * Stores a new ITR value based on packets and byte counts during
850  * the last interrupt.  The advantage of per interrupt computation
851  * is faster updates and more accurate ITR for the current traffic
852  * pattern.  Constants in this function were computed based on
853  * theoretical maximum wire speed and thresholds were set based on
854  * testing data as well as attempting to minimize response time
855  * while increasing bulk throughput.
856  **/
857 static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
858 {
859         enum i40e_latency_range new_latency_range = rc->latency_range;
860         struct i40e_q_vector *qv = rc->ring->q_vector;
861         u32 new_itr = rc->itr;
862         int bytes_per_int;
863         int usecs;
864
865         if (rc->total_packets == 0 || !rc->itr)
866                 return false;
867
868         /* simple throttlerate management
869          *   0-10MB/s   lowest (50000 ints/s)
870          *  10-20MB/s   low    (20000 ints/s)
871          *  20-1249MB/s bulk   (18000 ints/s)
872          *  > 40000 Rx packets per second (8000 ints/s)
873          *
874          * The math works out because the divisor is in 10^(-6) which
875          * turns the bytes/us input value into MB/s values, but
876          * make sure to use usecs, as the register values written
877          * are in 2 usec increments in the ITR registers, and make sure
878          * to use the smoothed values that the countdown timer gives us.
879          */
880         usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
881         bytes_per_int = rc->total_bytes / usecs;
882
883         switch (new_latency_range) {
884         case I40E_LOWEST_LATENCY:
885                 if (bytes_per_int > 10)
886                         new_latency_range = I40E_LOW_LATENCY;
887                 break;
888         case I40E_LOW_LATENCY:
889                 if (bytes_per_int > 20)
890                         new_latency_range = I40E_BULK_LATENCY;
891                 else if (bytes_per_int <= 10)
892                         new_latency_range = I40E_LOWEST_LATENCY;
893                 break;
894         case I40E_BULK_LATENCY:
895         case I40E_ULTRA_LATENCY:
896         default:
897                 if (bytes_per_int <= 20)
898                         new_latency_range = I40E_LOW_LATENCY;
899                 break;
900         }
901
902         /* this is to adjust RX more aggressively when streaming small
903          * packets.  The value of 40000 was picked as it is just beyond
904          * what the hardware can receive per second if in low latency
905          * mode.
906          */
907 #define RX_ULTRA_PACKET_RATE 40000
908
909         if ((((rc->total_packets * 1000000) / usecs) > RX_ULTRA_PACKET_RATE) &&
910             (&qv->rx == rc))
911                 new_latency_range = I40E_ULTRA_LATENCY;
912
913         rc->latency_range = new_latency_range;
914
915         switch (new_latency_range) {
916         case I40E_LOWEST_LATENCY:
917                 new_itr = I40E_ITR_50K;
918                 break;
919         case I40E_LOW_LATENCY:
920                 new_itr = I40E_ITR_20K;
921                 break;
922         case I40E_BULK_LATENCY:
923                 new_itr = I40E_ITR_18K;
924                 break;
925         case I40E_ULTRA_LATENCY:
926                 new_itr = I40E_ITR_8K;
927                 break;
928         default:
929                 break;
930         }
931
932         rc->total_bytes = 0;
933         rc->total_packets = 0;
934
935         if (new_itr != rc->itr) {
936                 rc->itr = new_itr;
937                 return true;
938         }
939
940         return false;
941 }
942
943 /**
944  * i40e_clean_programming_status - clean the programming status descriptor
945  * @rx_ring: the rx ring that has this descriptor
946  * @rx_desc: the rx descriptor written back by HW
947  *
948  * Flow director should handle FD_FILTER_STATUS to check its filter programming
949  * status being successful or not and take actions accordingly. FCoE should
950  * handle its context/filter programming/invalidation status and take actions.
951  *
952  **/
953 static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
954                                           union i40e_rx_desc *rx_desc)
955 {
956         u64 qw;
957         u8 id;
958
959         qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
960         id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
961                   I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
962
963         if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
964                 i40e_fd_handle_status(rx_ring, rx_desc, id);
965 #ifdef I40E_FCOE
966         else if ((id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) ||
967                  (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS))
968                 i40e_fcoe_handle_status(rx_ring, rx_desc, id);
969 #endif
970 }
971
972 /**
973  * i40e_setup_tx_descriptors - Allocate the Tx descriptors
974  * @tx_ring: the tx ring to set up
975  *
976  * Return 0 on success, negative on error
977  **/
978 int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
979 {
980         struct device *dev = tx_ring->dev;
981         int bi_size;
982
983         if (!dev)
984                 return -ENOMEM;
985
986         /* warn if we are about to overwrite the pointer */
987         WARN_ON(tx_ring->tx_bi);
988         bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
989         tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
990         if (!tx_ring->tx_bi)
991                 goto err;
992
993         /* round up to nearest 4K */
994         tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
995         /* add u32 for head writeback, align after this takes care of
996          * guaranteeing this is at least one cache line in size
997          */
998         tx_ring->size += sizeof(u32);
999         tx_ring->size = ALIGN(tx_ring->size, 4096);
1000         tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
1001                                            &tx_ring->dma, GFP_KERNEL);
1002         if (!tx_ring->desc) {
1003                 dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
1004                          tx_ring->size);
1005                 goto err;
1006         }
1007
1008         tx_ring->next_to_use = 0;
1009         tx_ring->next_to_clean = 0;
1010         return 0;
1011
1012 err:
1013         kfree(tx_ring->tx_bi);
1014         tx_ring->tx_bi = NULL;
1015         return -ENOMEM;
1016 }
1017
1018 /**
1019  * i40e_clean_rx_ring - Free Rx buffers
1020  * @rx_ring: ring to be cleaned
1021  **/
1022 void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
1023 {
1024         struct device *dev = rx_ring->dev;
1025         struct i40e_rx_buffer *rx_bi;
1026         unsigned long bi_size;
1027         u16 i;
1028
1029         /* ring already cleared, nothing to do */
1030         if (!rx_ring->rx_bi)
1031                 return;
1032
1033         if (ring_is_ps_enabled(rx_ring)) {
1034                 int bufsz = ALIGN(rx_ring->rx_hdr_len, 256) * rx_ring->count;
1035
1036                 rx_bi = &rx_ring->rx_bi[0];
1037                 if (rx_bi->hdr_buf) {
1038                         dma_free_coherent(dev,
1039                                           bufsz,
1040                                           rx_bi->hdr_buf,
1041                                           rx_bi->dma);
1042                         for (i = 0; i < rx_ring->count; i++) {
1043                                 rx_bi = &rx_ring->rx_bi[i];
1044                                 rx_bi->dma = 0;
1045                                 rx_bi->hdr_buf = NULL;
1046                         }
1047                 }
1048         }
1049         /* Free all the Rx ring sk_buffs */
1050         for (i = 0; i < rx_ring->count; i++) {
1051                 rx_bi = &rx_ring->rx_bi[i];
1052                 if (rx_bi->dma) {
1053                         dma_unmap_single(dev,
1054                                          rx_bi->dma,
1055                                          rx_ring->rx_buf_len,
1056                                          DMA_FROM_DEVICE);
1057                         rx_bi->dma = 0;
1058                 }
1059                 if (rx_bi->skb) {
1060                         dev_kfree_skb(rx_bi->skb);
1061                         rx_bi->skb = NULL;
1062                 }
1063                 if (rx_bi->page) {
1064                         if (rx_bi->page_dma) {
1065                                 dma_unmap_page(dev,
1066                                                rx_bi->page_dma,
1067                                                PAGE_SIZE,
1068                                                DMA_FROM_DEVICE);
1069                                 rx_bi->page_dma = 0;
1070                         }
1071                         __free_page(rx_bi->page);
1072                         rx_bi->page = NULL;
1073                         rx_bi->page_offset = 0;
1074                 }
1075         }
1076
1077         bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1078         memset(rx_ring->rx_bi, 0, bi_size);
1079
1080         /* Zero out the descriptor ring */
1081         memset(rx_ring->desc, 0, rx_ring->size);
1082
1083         rx_ring->next_to_clean = 0;
1084         rx_ring->next_to_use = 0;
1085 }
1086
1087 /**
1088  * i40e_free_rx_resources - Free Rx resources
1089  * @rx_ring: ring to clean the resources from
1090  *
1091  * Free all receive software resources
1092  **/
1093 void i40e_free_rx_resources(struct i40e_ring *rx_ring)
1094 {
1095         i40e_clean_rx_ring(rx_ring);
1096         kfree(rx_ring->rx_bi);
1097         rx_ring->rx_bi = NULL;
1098
1099         if (rx_ring->desc) {
1100                 dma_free_coherent(rx_ring->dev, rx_ring->size,
1101                                   rx_ring->desc, rx_ring->dma);
1102                 rx_ring->desc = NULL;
1103         }
1104 }
1105
1106 /**
1107  * i40e_alloc_rx_headers - allocate rx header buffers
1108  * @rx_ring: ring to alloc buffers
1109  *
1110  * Allocate rx header buffers for the entire ring. As these are static,
1111  * this is only called when setting up a new ring.
1112  **/
1113 void i40e_alloc_rx_headers(struct i40e_ring *rx_ring)
1114 {
1115         struct device *dev = rx_ring->dev;
1116         struct i40e_rx_buffer *rx_bi;
1117         dma_addr_t dma;
1118         void *buffer;
1119         int buf_size;
1120         int i;
1121
1122         if (rx_ring->rx_bi[0].hdr_buf)
1123                 return;
1124         /* Make sure the buffers don't cross cache line boundaries. */
1125         buf_size = ALIGN(rx_ring->rx_hdr_len, 256);
1126         buffer = dma_alloc_coherent(dev, buf_size * rx_ring->count,
1127                                     &dma, GFP_KERNEL);
1128         if (!buffer)
1129                 return;
1130         for (i = 0; i < rx_ring->count; i++) {
1131                 rx_bi = &rx_ring->rx_bi[i];
1132                 rx_bi->dma = dma + (i * buf_size);
1133                 rx_bi->hdr_buf = buffer + (i * buf_size);
1134         }
1135 }
1136
1137 /**
1138  * i40e_setup_rx_descriptors - Allocate Rx descriptors
1139  * @rx_ring: Rx descriptor ring (for a specific queue) to setup
1140  *
1141  * Returns 0 on success, negative on failure
1142  **/
1143 int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
1144 {
1145         struct device *dev = rx_ring->dev;
1146         int bi_size;
1147
1148         /* warn if we are about to overwrite the pointer */
1149         WARN_ON(rx_ring->rx_bi);
1150         bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1151         rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
1152         if (!rx_ring->rx_bi)
1153                 goto err;
1154
1155         u64_stats_init(&rx_ring->syncp);
1156
1157         /* Round up to nearest 4K */
1158         rx_ring->size = ring_is_16byte_desc_enabled(rx_ring)
1159                 ? rx_ring->count * sizeof(union i40e_16byte_rx_desc)
1160                 : rx_ring->count * sizeof(union i40e_32byte_rx_desc);
1161         rx_ring->size = ALIGN(rx_ring->size, 4096);
1162         rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
1163                                            &rx_ring->dma, GFP_KERNEL);
1164
1165         if (!rx_ring->desc) {
1166                 dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
1167                          rx_ring->size);
1168                 goto err;
1169         }
1170
1171         rx_ring->next_to_clean = 0;
1172         rx_ring->next_to_use = 0;
1173
1174         return 0;
1175 err:
1176         kfree(rx_ring->rx_bi);
1177         rx_ring->rx_bi = NULL;
1178         return -ENOMEM;
1179 }
1180
1181 /**
1182  * i40e_release_rx_desc - Store the new tail and head values
1183  * @rx_ring: ring to bump
1184  * @val: new head index
1185  **/
1186 static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
1187 {
1188         rx_ring->next_to_use = val;
1189         /* Force memory writes to complete before letting h/w
1190          * know there are new descriptors to fetch.  (Only
1191          * applicable for weak-ordered memory model archs,
1192          * such as IA-64).
1193          */
1194         wmb();
1195         writel(val, rx_ring->tail);
1196 }
1197
1198 /**
1199  * i40e_alloc_rx_buffers_ps - Replace used receive buffers; packet split
1200  * @rx_ring: ring to place buffers on
1201  * @cleaned_count: number of buffers to replace
1202  *
1203  * Returns true if any errors on allocation
1204  **/
1205 bool i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count)
1206 {
1207         u16 i = rx_ring->next_to_use;
1208         union i40e_rx_desc *rx_desc;
1209         struct i40e_rx_buffer *bi;
1210         const int current_node = numa_node_id();
1211
1212         /* do nothing if no valid netdev defined */
1213         if (!rx_ring->netdev || !cleaned_count)
1214                 return false;
1215
1216         while (cleaned_count--) {
1217                 rx_desc = I40E_RX_DESC(rx_ring, i);
1218                 bi = &rx_ring->rx_bi[i];
1219
1220                 if (bi->skb) /* desc is in use */
1221                         goto no_buffers;
1222
1223         /* If we've been moved to a different NUMA node, release the
1224          * page so we can get a new one on the current node.
1225          */
1226                 if (bi->page &&  page_to_nid(bi->page) != current_node) {
1227                         dma_unmap_page(rx_ring->dev,
1228                                        bi->page_dma,
1229                                        PAGE_SIZE,
1230                                        DMA_FROM_DEVICE);
1231                         __free_page(bi->page);
1232                         bi->page = NULL;
1233                         bi->page_dma = 0;
1234                         rx_ring->rx_stats.realloc_count++;
1235                 } else if (bi->page) {
1236                         rx_ring->rx_stats.page_reuse_count++;
1237                 }
1238
1239                 if (!bi->page) {
1240                         bi->page = alloc_page(GFP_ATOMIC);
1241                         if (!bi->page) {
1242                                 rx_ring->rx_stats.alloc_page_failed++;
1243                                 goto no_buffers;
1244                         }
1245                         bi->page_dma = dma_map_page(rx_ring->dev,
1246                                                     bi->page,
1247                                                     0,
1248                                                     PAGE_SIZE,
1249                                                     DMA_FROM_DEVICE);
1250                         if (dma_mapping_error(rx_ring->dev, bi->page_dma)) {
1251                                 rx_ring->rx_stats.alloc_page_failed++;
1252                                 __free_page(bi->page);
1253                                 bi->page = NULL;
1254                                 bi->page_dma = 0;
1255                                 bi->page_offset = 0;
1256                                 goto no_buffers;
1257                         }
1258                         bi->page_offset = 0;
1259                 }
1260
1261                 /* Refresh the desc even if buffer_addrs didn't change
1262                  * because each write-back erases this info.
1263                  */
1264                 rx_desc->read.pkt_addr =
1265                                 cpu_to_le64(bi->page_dma + bi->page_offset);
1266                 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
1267                 i++;
1268                 if (i == rx_ring->count)
1269                         i = 0;
1270         }
1271
1272         if (rx_ring->next_to_use != i)
1273                 i40e_release_rx_desc(rx_ring, i);
1274
1275         return false;
1276
1277 no_buffers:
1278         if (rx_ring->next_to_use != i)
1279                 i40e_release_rx_desc(rx_ring, i);
1280
1281         /* make sure to come back via polling to try again after
1282          * allocation failure
1283          */
1284         return true;
1285 }
1286
1287 /**
1288  * i40e_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer
1289  * @rx_ring: ring to place buffers on
1290  * @cleaned_count: number of buffers to replace
1291  *
1292  * Returns true if any errors on allocation
1293  **/
1294 bool i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
1295 {
1296         u16 i = rx_ring->next_to_use;
1297         union i40e_rx_desc *rx_desc;
1298         struct i40e_rx_buffer *bi;
1299         struct sk_buff *skb;
1300
1301         /* do nothing if no valid netdev defined */
1302         if (!rx_ring->netdev || !cleaned_count)
1303                 return false;
1304
1305         while (cleaned_count--) {
1306                 rx_desc = I40E_RX_DESC(rx_ring, i);
1307                 bi = &rx_ring->rx_bi[i];
1308                 skb = bi->skb;
1309
1310                 if (!skb) {
1311                         skb = __netdev_alloc_skb_ip_align(rx_ring->netdev,
1312                                                           rx_ring->rx_buf_len,
1313                                                           GFP_ATOMIC |
1314                                                           __GFP_NOWARN);
1315                         if (!skb) {
1316                                 rx_ring->rx_stats.alloc_buff_failed++;
1317                                 goto no_buffers;
1318                         }
1319                         /* initialize queue mapping */
1320                         skb_record_rx_queue(skb, rx_ring->queue_index);
1321                         bi->skb = skb;
1322                 }
1323
1324                 if (!bi->dma) {
1325                         bi->dma = dma_map_single(rx_ring->dev,
1326                                                  skb->data,
1327                                                  rx_ring->rx_buf_len,
1328                                                  DMA_FROM_DEVICE);
1329                         if (dma_mapping_error(rx_ring->dev, bi->dma)) {
1330                                 rx_ring->rx_stats.alloc_buff_failed++;
1331                                 bi->dma = 0;
1332                                 dev_kfree_skb(bi->skb);
1333                                 bi->skb = NULL;
1334                                 goto no_buffers;
1335                         }
1336                 }
1337
1338                 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
1339                 rx_desc->read.hdr_addr = 0;
1340                 i++;
1341                 if (i == rx_ring->count)
1342                         i = 0;
1343         }
1344
1345         if (rx_ring->next_to_use != i)
1346                 i40e_release_rx_desc(rx_ring, i);
1347
1348         return false;
1349
1350 no_buffers:
1351         if (rx_ring->next_to_use != i)
1352                 i40e_release_rx_desc(rx_ring, i);
1353
1354         /* make sure to come back via polling to try again after
1355          * allocation failure
1356          */
1357         return true;
1358 }
1359
1360 /**
1361  * i40e_receive_skb - Send a completed packet up the stack
1362  * @rx_ring:  rx ring in play
1363  * @skb: packet to send up
1364  * @vlan_tag: vlan tag for packet
1365  **/
1366 static void i40e_receive_skb(struct i40e_ring *rx_ring,
1367                              struct sk_buff *skb, u16 vlan_tag)
1368 {
1369         struct i40e_q_vector *q_vector = rx_ring->q_vector;
1370
1371         if (vlan_tag & VLAN_VID_MASK)
1372                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
1373
1374         napi_gro_receive(&q_vector->napi, skb);
1375 }
1376
1377 /**
1378  * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
1379  * @vsi: the VSI we care about
1380  * @skb: skb currently being received and modified
1381  * @rx_status: status value of last descriptor in packet
1382  * @rx_error: error value of last descriptor in packet
1383  * @rx_ptype: ptype value of last descriptor in packet
1384  **/
1385 static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
1386                                     struct sk_buff *skb,
1387                                     u32 rx_status,
1388                                     u32 rx_error,
1389                                     u16 rx_ptype)
1390 {
1391         struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype);
1392         bool ipv4, ipv6, ipv4_tunnel, ipv6_tunnel;
1393
1394         skb->ip_summed = CHECKSUM_NONE;
1395
1396         /* Rx csum enabled and ip headers found? */
1397         if (!(vsi->netdev->features & NETIF_F_RXCSUM))
1398                 return;
1399
1400         /* did the hardware decode the packet and checksum? */
1401         if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
1402                 return;
1403
1404         /* both known and outer_ip must be set for the below code to work */
1405         if (!(decoded.known && decoded.outer_ip))
1406                 return;
1407
1408         ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) &&
1409                (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4);
1410         ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) &&
1411                (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6);
1412
1413         if (ipv4 &&
1414             (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) |
1415                          BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT))))
1416                 goto checksum_fail;
1417
1418         /* likely incorrect csum if alternate IP extension headers found */
1419         if (ipv6 &&
1420             rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
1421                 /* don't increment checksum err here, non-fatal err */
1422                 return;
1423
1424         /* there was some L4 error, count error and punt packet to the stack */
1425         if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT))
1426                 goto checksum_fail;
1427
1428         /* handle packets that were not able to be checksummed due
1429          * to arrival speed, in this case the stack can compute
1430          * the csum.
1431          */
1432         if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
1433                 return;
1434
1435         /* The hardware supported by this driver does not validate outer
1436          * checksums for tunneled VXLAN or GENEVE frames.  I don't agree
1437          * with it but the specification states that you "MAY validate", it
1438          * doesn't make it a hard requirement so if we have validated the
1439          * inner checksum report CHECKSUM_UNNECESSARY.
1440          */
1441
1442         ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) &&
1443                      (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4);
1444         ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
1445                      (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
1446
1447         skb->ip_summed = CHECKSUM_UNNECESSARY;
1448         skb->csum_level = ipv4_tunnel || ipv6_tunnel;
1449
1450         return;
1451
1452 checksum_fail:
1453         vsi->back->hw_csum_rx_error++;
1454 }
1455
1456 /**
1457  * i40e_ptype_to_htype - get a hash type
1458  * @ptype: the ptype value from the descriptor
1459  *
1460  * Returns a hash type to be used by skb_set_hash
1461  **/
1462 static inline enum pkt_hash_types i40e_ptype_to_htype(u8 ptype)
1463 {
1464         struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
1465
1466         if (!decoded.known)
1467                 return PKT_HASH_TYPE_NONE;
1468
1469         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1470             decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
1471                 return PKT_HASH_TYPE_L4;
1472         else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1473                  decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
1474                 return PKT_HASH_TYPE_L3;
1475         else
1476                 return PKT_HASH_TYPE_L2;
1477 }
1478
1479 /**
1480  * i40e_rx_hash - set the hash value in the skb
1481  * @ring: descriptor ring
1482  * @rx_desc: specific descriptor
1483  **/
1484 static inline void i40e_rx_hash(struct i40e_ring *ring,
1485                                 union i40e_rx_desc *rx_desc,
1486                                 struct sk_buff *skb,
1487                                 u8 rx_ptype)
1488 {
1489         u32 hash;
1490         const __le64 rss_mask  =
1491                 cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH <<
1492                             I40E_RX_DESC_STATUS_FLTSTAT_SHIFT);
1493
1494         if (ring->netdev->features & NETIF_F_RXHASH)
1495                 return;
1496
1497         if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) {
1498                 hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
1499                 skb_set_hash(skb, hash, i40e_ptype_to_htype(rx_ptype));
1500         }
1501 }
1502
1503 /**
1504  * i40e_clean_rx_irq_ps - Reclaim resources after receive; packet split
1505  * @rx_ring:  rx ring to clean
1506  * @budget:   how many cleans we're allowed
1507  *
1508  * Returns true if there's any budget left (e.g. the clean is finished)
1509  **/
1510 static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, const int budget)
1511 {
1512         unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1513         u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
1514         u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1515         struct i40e_vsi *vsi = rx_ring->vsi;
1516         u16 i = rx_ring->next_to_clean;
1517         union i40e_rx_desc *rx_desc;
1518         u32 rx_error, rx_status;
1519         bool failure = false;
1520         u8 rx_ptype;
1521         u64 qword;
1522         u32 copysize;
1523
1524         if (budget <= 0)
1525                 return 0;
1526
1527         do {
1528                 struct i40e_rx_buffer *rx_bi;
1529                 struct sk_buff *skb;
1530                 u16 vlan_tag;
1531                 /* return some buffers to hardware, one at a time is too slow */
1532                 if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1533                         failure = failure ||
1534                                   i40e_alloc_rx_buffers_ps(rx_ring,
1535                                                            cleaned_count);
1536                         cleaned_count = 0;
1537                 }
1538
1539                 i = rx_ring->next_to_clean;
1540                 rx_desc = I40E_RX_DESC(rx_ring, i);
1541                 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1542                 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1543                         I40E_RXD_QW1_STATUS_SHIFT;
1544
1545                 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
1546                         break;
1547
1548                 /* This memory barrier is needed to keep us from reading
1549                  * any other fields out of the rx_desc until we know the
1550                  * DD bit is set.
1551                  */
1552                 dma_rmb();
1553                 /* sync header buffer for reading */
1554                 dma_sync_single_range_for_cpu(rx_ring->dev,
1555                                               rx_ring->rx_bi[0].dma,
1556                                               i * rx_ring->rx_hdr_len,
1557                                               rx_ring->rx_hdr_len,
1558                                               DMA_FROM_DEVICE);
1559                 if (i40e_rx_is_programming_status(qword)) {
1560                         i40e_clean_programming_status(rx_ring, rx_desc);
1561                         I40E_RX_INCREMENT(rx_ring, i);
1562                         continue;
1563                 }
1564                 rx_bi = &rx_ring->rx_bi[i];
1565                 skb = rx_bi->skb;
1566                 if (likely(!skb)) {
1567                         skb = __netdev_alloc_skb_ip_align(rx_ring->netdev,
1568                                                           rx_ring->rx_hdr_len,
1569                                                           GFP_ATOMIC |
1570                                                           __GFP_NOWARN);
1571                         if (!skb) {
1572                                 rx_ring->rx_stats.alloc_buff_failed++;
1573                                 failure = true;
1574                                 break;
1575                         }
1576
1577                         /* initialize queue mapping */
1578                         skb_record_rx_queue(skb, rx_ring->queue_index);
1579                         /* we are reusing so sync this buffer for CPU use */
1580                         dma_sync_single_range_for_cpu(rx_ring->dev,
1581                                                       rx_ring->rx_bi[0].dma,
1582                                                       i * rx_ring->rx_hdr_len,
1583                                                       rx_ring->rx_hdr_len,
1584                                                       DMA_FROM_DEVICE);
1585                 }
1586                 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1587                                 I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1588                 rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >>
1589                                 I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1590                 rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >>
1591                          I40E_RXD_QW1_LENGTH_SPH_SHIFT;
1592
1593                 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1594                            I40E_RXD_QW1_ERROR_SHIFT;
1595                 rx_hbo = rx_error & BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1596                 rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1597
1598                 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1599                            I40E_RXD_QW1_PTYPE_SHIFT;
1600                 /* sync half-page for reading */
1601                 dma_sync_single_range_for_cpu(rx_ring->dev,
1602                                               rx_bi->page_dma,
1603                                               rx_bi->page_offset,
1604                                               PAGE_SIZE / 2,
1605                                               DMA_FROM_DEVICE);
1606                 prefetch(page_address(rx_bi->page) + rx_bi->page_offset);
1607                 rx_bi->skb = NULL;
1608                 cleaned_count++;
1609                 copysize = 0;
1610                 if (rx_hbo || rx_sph) {
1611                         int len;
1612
1613                         if (rx_hbo)
1614                                 len = I40E_RX_HDR_SIZE;
1615                         else
1616                                 len = rx_header_len;
1617                         memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len);
1618                 } else if (skb->len == 0) {
1619                         int len;
1620                         unsigned char *va = page_address(rx_bi->page) +
1621                                             rx_bi->page_offset;
1622
1623                         len = min(rx_packet_len, rx_ring->rx_hdr_len);
1624                         memcpy(__skb_put(skb, len), va, len);
1625                         copysize = len;
1626                         rx_packet_len -= len;
1627                 }
1628                 /* Get the rest of the data if this was a header split */
1629                 if (rx_packet_len) {
1630                         skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
1631                                         rx_bi->page,
1632                                         rx_bi->page_offset + copysize,
1633                                         rx_packet_len, I40E_RXBUFFER_2048);
1634
1635                         /* If the page count is more than 2, then both halves
1636                          * of the page are used and we need to free it. Do it
1637                          * here instead of in the alloc code. Otherwise one
1638                          * of the half-pages might be released between now and
1639                          * then, and we wouldn't know which one to use.
1640                          * Don't call get_page and free_page since those are
1641                          * both expensive atomic operations that just change
1642                          * the refcount in opposite directions. Just give the
1643                          * page to the stack; he can have our refcount.
1644                          */
1645                         if (page_count(rx_bi->page) > 2) {
1646                                 dma_unmap_page(rx_ring->dev,
1647                                                rx_bi->page_dma,
1648                                                PAGE_SIZE,
1649                                                DMA_FROM_DEVICE);
1650                                 rx_bi->page = NULL;
1651                                 rx_bi->page_dma = 0;
1652                                 rx_ring->rx_stats.realloc_count++;
1653                         } else {
1654                                 get_page(rx_bi->page);
1655                                 /* switch to the other half-page here; the
1656                                  * allocation code programs the right addr
1657                                  * into HW. If we haven't used this half-page,
1658                                  * the address won't be changed, and HW can
1659                                  * just use it next time through.
1660                                  */
1661                                 rx_bi->page_offset ^= PAGE_SIZE / 2;
1662                         }
1663
1664                 }
1665                 I40E_RX_INCREMENT(rx_ring, i);
1666
1667                 if (unlikely(
1668                     !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1669                         struct i40e_rx_buffer *next_buffer;
1670
1671                         next_buffer = &rx_ring->rx_bi[i];
1672                         next_buffer->skb = skb;
1673                         rx_ring->rx_stats.non_eop_descs++;
1674                         continue;
1675                 }
1676
1677                 /* ERR_MASK will only have valid bits if EOP set */
1678                 if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1679                         dev_kfree_skb_any(skb);
1680                         continue;
1681                 }
1682
1683                 i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
1684
1685                 if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1686                         i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1687                                            I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1688                                            I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1689                         rx_ring->last_rx_timestamp = jiffies;
1690                 }
1691
1692                 /* probably a little skewed due to removing CRC */
1693                 total_rx_bytes += skb->len;
1694                 total_rx_packets++;
1695
1696                 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1697
1698                 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1699
1700                 vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1701                          ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1702                          : 0;
1703 #ifdef I40E_FCOE
1704                 if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1705                         dev_kfree_skb_any(skb);
1706                         continue;
1707                 }
1708 #endif
1709                 i40e_receive_skb(rx_ring, skb, vlan_tag);
1710
1711                 rx_desc->wb.qword1.status_error_len = 0;
1712
1713         } while (likely(total_rx_packets < budget));
1714
1715         u64_stats_update_begin(&rx_ring->syncp);
1716         rx_ring->stats.packets += total_rx_packets;
1717         rx_ring->stats.bytes += total_rx_bytes;
1718         u64_stats_update_end(&rx_ring->syncp);
1719         rx_ring->q_vector->rx.total_packets += total_rx_packets;
1720         rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1721
1722         return failure ? budget : total_rx_packets;
1723 }
1724
1725 /**
1726  * i40e_clean_rx_irq_1buf - Reclaim resources after receive; single buffer
1727  * @rx_ring:  rx ring to clean
1728  * @budget:   how many cleans we're allowed
1729  *
1730  * Returns number of packets cleaned
1731  **/
1732 static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
1733 {
1734         unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1735         u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1736         struct i40e_vsi *vsi = rx_ring->vsi;
1737         union i40e_rx_desc *rx_desc;
1738         u32 rx_error, rx_status;
1739         u16 rx_packet_len;
1740         bool failure = false;
1741         u8 rx_ptype;
1742         u64 qword;
1743         u16 i;
1744
1745         do {
1746                 struct i40e_rx_buffer *rx_bi;
1747                 struct sk_buff *skb;
1748                 u16 vlan_tag;
1749                 /* return some buffers to hardware, one at a time is too slow */
1750                 if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1751                         failure = failure ||
1752                                   i40e_alloc_rx_buffers_1buf(rx_ring,
1753                                                              cleaned_count);
1754                         cleaned_count = 0;
1755                 }
1756
1757                 i = rx_ring->next_to_clean;
1758                 rx_desc = I40E_RX_DESC(rx_ring, i);
1759                 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1760                 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1761                         I40E_RXD_QW1_STATUS_SHIFT;
1762
1763                 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
1764                         break;
1765
1766                 /* This memory barrier is needed to keep us from reading
1767                  * any other fields out of the rx_desc until we know the
1768                  * DD bit is set.
1769                  */
1770                 dma_rmb();
1771
1772                 if (i40e_rx_is_programming_status(qword)) {
1773                         i40e_clean_programming_status(rx_ring, rx_desc);
1774                         I40E_RX_INCREMENT(rx_ring, i);
1775                         continue;
1776                 }
1777                 rx_bi = &rx_ring->rx_bi[i];
1778                 skb = rx_bi->skb;
1779                 prefetch(skb->data);
1780
1781                 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1782                                 I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1783
1784                 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1785                            I40E_RXD_QW1_ERROR_SHIFT;
1786                 rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1787
1788                 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1789                            I40E_RXD_QW1_PTYPE_SHIFT;
1790                 rx_bi->skb = NULL;
1791                 cleaned_count++;
1792
1793                 /* Get the header and possibly the whole packet
1794                  * If this is an skb from previous receive dma will be 0
1795                  */
1796                 skb_put(skb, rx_packet_len);
1797                 dma_unmap_single(rx_ring->dev, rx_bi->dma, rx_ring->rx_buf_len,
1798                                  DMA_FROM_DEVICE);
1799                 rx_bi->dma = 0;
1800
1801                 I40E_RX_INCREMENT(rx_ring, i);
1802
1803                 if (unlikely(
1804                     !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1805                         rx_ring->rx_stats.non_eop_descs++;
1806                         continue;
1807                 }
1808
1809                 /* ERR_MASK will only have valid bits if EOP set */
1810                 if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1811                         dev_kfree_skb_any(skb);
1812                         continue;
1813                 }
1814
1815                 i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
1816                 if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1817                         i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1818                                            I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1819                                            I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1820                         rx_ring->last_rx_timestamp = jiffies;
1821                 }
1822
1823                 /* probably a little skewed due to removing CRC */
1824                 total_rx_bytes += skb->len;
1825                 total_rx_packets++;
1826
1827                 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1828
1829                 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1830
1831                 vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1832                          ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1833                          : 0;
1834 #ifdef I40E_FCOE
1835                 if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1836                         dev_kfree_skb_any(skb);
1837                         continue;
1838                 }
1839 #endif
1840                 i40e_receive_skb(rx_ring, skb, vlan_tag);
1841
1842                 rx_desc->wb.qword1.status_error_len = 0;
1843         } while (likely(total_rx_packets < budget));
1844
1845         u64_stats_update_begin(&rx_ring->syncp);
1846         rx_ring->stats.packets += total_rx_packets;
1847         rx_ring->stats.bytes += total_rx_bytes;
1848         u64_stats_update_end(&rx_ring->syncp);
1849         rx_ring->q_vector->rx.total_packets += total_rx_packets;
1850         rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1851
1852         return failure ? budget : total_rx_packets;
1853 }
1854
1855 static u32 i40e_buildreg_itr(const int type, const u16 itr)
1856 {
1857         u32 val;
1858
1859         val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
1860               /* Don't clear PBA because that can cause lost interrupts that
1861                * came in while we were cleaning/polling
1862                */
1863               (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
1864               (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
1865
1866         return val;
1867 }
1868
1869 /* a small macro to shorten up some long lines */
1870 #define INTREG I40E_PFINT_DYN_CTLN
1871
1872 /**
1873  * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
1874  * @vsi: the VSI we care about
1875  * @q_vector: q_vector for which itr is being updated and interrupt enabled
1876  *
1877  **/
1878 static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
1879                                           struct i40e_q_vector *q_vector)
1880 {
1881         struct i40e_hw *hw = &vsi->back->hw;
1882         bool rx = false, tx = false;
1883         u32 rxval, txval;
1884         int vector;
1885
1886         vector = (q_vector->v_idx + vsi->base_vector);
1887
1888         /* avoid dynamic calculation if in countdown mode OR if
1889          * all dynamic is disabled
1890          */
1891         rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
1892
1893         if (q_vector->itr_countdown > 0 ||
1894             (!ITR_IS_DYNAMIC(vsi->rx_itr_setting) &&
1895              !ITR_IS_DYNAMIC(vsi->tx_itr_setting))) {
1896                 goto enable_int;
1897         }
1898
1899         if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) {
1900                 rx = i40e_set_new_dynamic_itr(&q_vector->rx);
1901                 rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
1902         }
1903
1904         if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) {
1905                 tx = i40e_set_new_dynamic_itr(&q_vector->tx);
1906                 txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
1907         }
1908
1909         if (rx || tx) {
1910                 /* get the higher of the two ITR adjustments and
1911                  * use the same value for both ITR registers
1912                  * when in adaptive mode (Rx and/or Tx)
1913                  */
1914                 u16 itr = max(q_vector->tx.itr, q_vector->rx.itr);
1915
1916                 q_vector->tx.itr = q_vector->rx.itr = itr;
1917                 txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
1918                 tx = true;
1919                 rxval = i40e_buildreg_itr(I40E_RX_ITR, itr);
1920                 rx = true;
1921         }
1922
1923         /* only need to enable the interrupt once, but need
1924          * to possibly update both ITR values
1925          */
1926         if (rx) {
1927                 /* set the INTENA_MSK_MASK so that this first write
1928                  * won't actually enable the interrupt, instead just
1929                  * updating the ITR (it's bit 31 PF and VF)
1930                  */
1931                 rxval |= BIT(31);
1932                 /* don't check _DOWN because interrupt isn't being enabled */
1933                 wr32(hw, INTREG(vector - 1), rxval);
1934         }
1935
1936 enable_int:
1937         if (!test_bit(__I40E_DOWN, &vsi->state))
1938                 wr32(hw, INTREG(vector - 1), txval);
1939
1940         if (q_vector->itr_countdown)
1941                 q_vector->itr_countdown--;
1942         else
1943                 q_vector->itr_countdown = ITR_COUNTDOWN_START;
1944 }
1945
1946 /**
1947  * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine
1948  * @napi: napi struct with our devices info in it
1949  * @budget: amount of work driver is allowed to do this pass, in packets
1950  *
1951  * This function will clean all queues associated with a q_vector.
1952  *
1953  * Returns the amount of work done
1954  **/
1955 int i40e_napi_poll(struct napi_struct *napi, int budget)
1956 {
1957         struct i40e_q_vector *q_vector =
1958                                container_of(napi, struct i40e_q_vector, napi);
1959         struct i40e_vsi *vsi = q_vector->vsi;
1960         struct i40e_ring *ring;
1961         bool clean_complete = true;
1962         bool arm_wb = false;
1963         int budget_per_ring;
1964         int work_done = 0;
1965
1966         if (test_bit(__I40E_DOWN, &vsi->state)) {
1967                 napi_complete(napi);
1968                 return 0;
1969         }
1970
1971         /* Clear hung_detected bit */
1972         clear_bit(I40E_Q_VECTOR_HUNG_DETECT, &q_vector->hung_detected);
1973         /* Since the actual Tx work is minimal, we can give the Tx a larger
1974          * budget and be more aggressive about cleaning up the Tx descriptors.
1975          */
1976         i40e_for_each_ring(ring, q_vector->tx) {
1977                 clean_complete = clean_complete &&
1978                                  i40e_clean_tx_irq(ring, vsi->work_limit);
1979                 arm_wb = arm_wb || ring->arm_wb;
1980                 ring->arm_wb = false;
1981         }
1982
1983         /* Handle case where we are called by netpoll with a budget of 0 */
1984         if (budget <= 0)
1985                 goto tx_only;
1986
1987         /* We attempt to distribute budget to each Rx queue fairly, but don't
1988          * allow the budget to go below 1 because that would exit polling early.
1989          */
1990         budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
1991
1992         i40e_for_each_ring(ring, q_vector->rx) {
1993                 int cleaned;
1994
1995                 if (ring_is_ps_enabled(ring))
1996                         cleaned = i40e_clean_rx_irq_ps(ring, budget_per_ring);
1997                 else
1998                         cleaned = i40e_clean_rx_irq_1buf(ring, budget_per_ring);
1999
2000                 work_done += cleaned;
2001                 /* if we didn't clean as many as budgeted, we must be done */
2002                 clean_complete = clean_complete && (budget_per_ring > cleaned);
2003         }
2004
2005         /* If work not completed, return budget and polling will return */
2006         if (!clean_complete) {
2007 tx_only:
2008                 if (arm_wb) {
2009                         q_vector->tx.ring[0].tx_stats.tx_force_wb++;
2010                         i40e_enable_wb_on_itr(vsi, q_vector);
2011                 }
2012                 return budget;
2013         }
2014
2015         if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR)
2016                 q_vector->arm_wb_state = false;
2017
2018         /* Work is done so exit the polling mode and re-enable the interrupt */
2019         napi_complete_done(napi, work_done);
2020         if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
2021                 i40e_update_enable_itr(vsi, q_vector);
2022         } else { /* Legacy mode */
2023                 i40e_irq_dynamic_enable_icr0(vsi->back, false);
2024         }
2025         return 0;
2026 }
2027
2028 /**
2029  * i40e_atr - Add a Flow Director ATR filter
2030  * @tx_ring:  ring to add programming descriptor to
2031  * @skb:      send buffer
2032  * @tx_flags: send tx flags
2033  **/
2034 static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
2035                      u32 tx_flags)
2036 {
2037         struct i40e_filter_program_desc *fdir_desc;
2038         struct i40e_pf *pf = tx_ring->vsi->back;
2039         union {
2040                 unsigned char *network;
2041                 struct iphdr *ipv4;
2042                 struct ipv6hdr *ipv6;
2043         } hdr;
2044         struct tcphdr *th;
2045         unsigned int hlen;
2046         u32 flex_ptype, dtype_cmd;
2047         int l4_proto;
2048         u16 i;
2049
2050         /* make sure ATR is enabled */
2051         if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED))
2052                 return;
2053
2054         if ((pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
2055                 return;
2056
2057         /* if sampling is disabled do nothing */
2058         if (!tx_ring->atr_sample_rate)
2059                 return;
2060
2061         /* Currently only IPv4/IPv6 with TCP is supported */
2062         if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6)))
2063                 return;
2064
2065         /* snag network header to get L4 type and address */
2066         hdr.network = (tx_flags & I40E_TX_FLAGS_UDP_TUNNEL) ?
2067                       skb_inner_network_header(skb) : skb_network_header(skb);
2068
2069         /* Note: tx_flags gets modified to reflect inner protocols in
2070          * tx_enable_csum function if encap is enabled.
2071          */
2072         if (tx_flags & I40E_TX_FLAGS_IPV4) {
2073                 /* access ihl as u8 to avoid unaligned access on ia64 */
2074                 hlen = (hdr.network[0] & 0x0F) << 2;
2075                 l4_proto = hdr.ipv4->protocol;
2076         } else {
2077                 hlen = hdr.network - skb->data;
2078                 l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL);
2079                 hlen -= hdr.network - skb->data;
2080         }
2081
2082         if (l4_proto != IPPROTO_TCP)
2083                 return;
2084
2085         th = (struct tcphdr *)(hdr.network + hlen);
2086
2087         /* Due to lack of space, no more new filters can be programmed */
2088         if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
2089                 return;
2090         if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) &&
2091             (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) {
2092                 /* HW ATR eviction will take care of removing filters on FIN
2093                  * and RST packets.
2094                  */
2095                 if (th->fin || th->rst)
2096                         return;
2097         }
2098
2099         tx_ring->atr_count++;
2100
2101         /* sample on all syn/fin/rst packets or once every atr sample rate */
2102         if (!th->fin &&
2103             !th->syn &&
2104             !th->rst &&
2105             (tx_ring->atr_count < tx_ring->atr_sample_rate))
2106                 return;
2107
2108         tx_ring->atr_count = 0;
2109
2110         /* grab the next descriptor */
2111         i = tx_ring->next_to_use;
2112         fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
2113
2114         i++;
2115         tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2116
2117         flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
2118                       I40E_TXD_FLTR_QW0_QINDEX_MASK;
2119         flex_ptype |= (tx_flags & I40E_TX_FLAGS_IPV4) ?
2120                       (I40E_FILTER_PCTYPE_NONF_IPV4_TCP <<
2121                        I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) :
2122                       (I40E_FILTER_PCTYPE_NONF_IPV6_TCP <<
2123                        I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
2124
2125         flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
2126
2127         dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
2128
2129         dtype_cmd |= (th->fin || th->rst) ?
2130                      (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
2131                       I40E_TXD_FLTR_QW1_PCMD_SHIFT) :
2132                      (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
2133                       I40E_TXD_FLTR_QW1_PCMD_SHIFT);
2134
2135         dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX <<
2136                      I40E_TXD_FLTR_QW1_DEST_SHIFT;
2137
2138         dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID <<
2139                      I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
2140
2141         dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
2142         if (!(tx_flags & I40E_TX_FLAGS_UDP_TUNNEL))
2143                 dtype_cmd |=
2144                         ((u32)I40E_FD_ATR_STAT_IDX(pf->hw.pf_id) <<
2145                         I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2146                         I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2147         else
2148                 dtype_cmd |=
2149                         ((u32)I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id) <<
2150                         I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2151                         I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2152
2153         if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) &&
2154             (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)))
2155                 dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK;
2156
2157         fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
2158         fdir_desc->rsvd = cpu_to_le32(0);
2159         fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
2160         fdir_desc->fd_id = cpu_to_le32(0);
2161 }
2162
2163 /**
2164  * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
2165  * @skb:     send buffer
2166  * @tx_ring: ring to send buffer on
2167  * @flags:   the tx flags to be set
2168  *
2169  * Checks the skb and set up correspondingly several generic transmit flags
2170  * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
2171  *
2172  * Returns error code indicate the frame should be dropped upon error and the
2173  * otherwise  returns 0 to indicate the flags has been set properly.
2174  **/
2175 #ifdef I40E_FCOE
2176 inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2177                                       struct i40e_ring *tx_ring,
2178                                       u32 *flags)
2179 #else
2180 static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2181                                              struct i40e_ring *tx_ring,
2182                                              u32 *flags)
2183 #endif
2184 {
2185         __be16 protocol = skb->protocol;
2186         u32  tx_flags = 0;
2187
2188         if (protocol == htons(ETH_P_8021Q) &&
2189             !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) {
2190                 /* When HW VLAN acceleration is turned off by the user the
2191                  * stack sets the protocol to 8021q so that the driver
2192                  * can take any steps required to support the SW only
2193                  * VLAN handling.  In our case the driver doesn't need
2194                  * to take any further steps so just set the protocol
2195                  * to the encapsulated ethertype.
2196                  */
2197                 skb->protocol = vlan_get_protocol(skb);
2198                 goto out;
2199         }
2200
2201         /* if we have a HW VLAN tag being added, default to the HW one */
2202         if (skb_vlan_tag_present(skb)) {
2203                 tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
2204                 tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2205         /* else if it is a SW VLAN, check the next protocol and store the tag */
2206         } else if (protocol == htons(ETH_P_8021Q)) {
2207                 struct vlan_hdr *vhdr, _vhdr;
2208
2209                 vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
2210                 if (!vhdr)
2211                         return -EINVAL;
2212
2213                 protocol = vhdr->h_vlan_encapsulated_proto;
2214                 tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT;
2215                 tx_flags |= I40E_TX_FLAGS_SW_VLAN;
2216         }
2217
2218         if (!(tx_ring->vsi->back->flags & I40E_FLAG_DCB_ENABLED))
2219                 goto out;
2220
2221         /* Insert 802.1p priority into VLAN header */
2222         if ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) ||
2223             (skb->priority != TC_PRIO_CONTROL)) {
2224                 tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK;
2225                 tx_flags |= (skb->priority & 0x7) <<
2226                                 I40E_TX_FLAGS_VLAN_PRIO_SHIFT;
2227                 if (tx_flags & I40E_TX_FLAGS_SW_VLAN) {
2228                         struct vlan_ethhdr *vhdr;
2229                         int rc;
2230
2231                         rc = skb_cow_head(skb, 0);
2232                         if (rc < 0)
2233                                 return rc;
2234                         vhdr = (struct vlan_ethhdr *)skb->data;
2235                         vhdr->h_vlan_TCI = htons(tx_flags >>
2236                                                  I40E_TX_FLAGS_VLAN_SHIFT);
2237                 } else {
2238                         tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2239                 }
2240         }
2241
2242 out:
2243         *flags = tx_flags;
2244         return 0;
2245 }
2246
2247 /**
2248  * i40e_tso - set up the tso context descriptor
2249  * @tx_ring:  ptr to the ring to send
2250  * @skb:      ptr to the skb we're sending
2251  * @hdr_len:  ptr to the size of the packet header
2252  * @cd_type_cmd_tso_mss: Quad Word 1
2253  *
2254  * Returns 0 if no TSO can happen, 1 if tso is going, or error
2255  **/
2256 static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
2257                     u8 *hdr_len, u64 *cd_type_cmd_tso_mss)
2258 {
2259         u64 cd_cmd, cd_tso_len, cd_mss;
2260         union {
2261                 struct iphdr *v4;
2262                 struct ipv6hdr *v6;
2263                 unsigned char *hdr;
2264         } ip;
2265         union {
2266                 struct tcphdr *tcp;
2267                 struct udphdr *udp;
2268                 unsigned char *hdr;
2269         } l4;
2270         u32 paylen, l4_offset;
2271         int err;
2272
2273         if (skb->ip_summed != CHECKSUM_PARTIAL)
2274                 return 0;
2275
2276         if (!skb_is_gso(skb))
2277                 return 0;
2278
2279         err = skb_cow_head(skb, 0);
2280         if (err < 0)
2281                 return err;
2282
2283         ip.hdr = skb_network_header(skb);
2284         l4.hdr = skb_transport_header(skb);
2285
2286         /* initialize outer IP header fields */
2287         if (ip.v4->version == 4) {
2288                 ip.v4->tot_len = 0;
2289                 ip.v4->check = 0;
2290         } else {
2291                 ip.v6->payload_len = 0;
2292         }
2293
2294         if (skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE |
2295                                          SKB_GSO_UDP_TUNNEL_CSUM)) {
2296                 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) {
2297                         /* determine offset of outer transport header */
2298                         l4_offset = l4.hdr - skb->data;
2299
2300                         /* remove payload length from outer checksum */
2301                         paylen = (__force u16)l4.udp->check;
2302                         paylen += ntohs(1) * (u16)~(skb->len - l4_offset);
2303                         l4.udp->check = ~csum_fold((__force __wsum)paylen);
2304                 }
2305
2306                 /* reset pointers to inner headers */
2307                 ip.hdr = skb_inner_network_header(skb);
2308                 l4.hdr = skb_inner_transport_header(skb);
2309
2310                 /* initialize inner IP header fields */
2311                 if (ip.v4->version == 4) {
2312                         ip.v4->tot_len = 0;
2313                         ip.v4->check = 0;
2314                 } else {
2315                         ip.v6->payload_len = 0;
2316                 }
2317         }
2318
2319         /* determine offset of inner transport header */
2320         l4_offset = l4.hdr - skb->data;
2321
2322         /* remove payload length from inner checksum */
2323         paylen = (__force u16)l4.tcp->check;
2324         paylen += ntohs(1) * (u16)~(skb->len - l4_offset);
2325         l4.tcp->check = ~csum_fold((__force __wsum)paylen);
2326
2327         /* compute length of segmentation header */
2328         *hdr_len = (l4.tcp->doff * 4) + l4_offset;
2329
2330         /* find the field values */
2331         cd_cmd = I40E_TX_CTX_DESC_TSO;
2332         cd_tso_len = skb->len - *hdr_len;
2333         cd_mss = skb_shinfo(skb)->gso_size;
2334         *cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
2335                                 (cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
2336                                 (cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
2337         return 1;
2338 }
2339
2340 /**
2341  * i40e_tsyn - set up the tsyn context descriptor
2342  * @tx_ring:  ptr to the ring to send
2343  * @skb:      ptr to the skb we're sending
2344  * @tx_flags: the collected send information
2345  * @cd_type_cmd_tso_mss: Quad Word 1
2346  *
2347  * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen
2348  **/
2349 static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
2350                      u32 tx_flags, u64 *cd_type_cmd_tso_mss)
2351 {
2352         struct i40e_pf *pf;
2353
2354         if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
2355                 return 0;
2356
2357         /* Tx timestamps cannot be sampled when doing TSO */
2358         if (tx_flags & I40E_TX_FLAGS_TSO)
2359                 return 0;
2360
2361         /* only timestamp the outbound packet if the user has requested it and
2362          * we are not already transmitting a packet to be timestamped
2363          */
2364         pf = i40e_netdev_to_pf(tx_ring->netdev);
2365         if (!(pf->flags & I40E_FLAG_PTP))
2366                 return 0;
2367
2368         if (pf->ptp_tx &&
2369             !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, &pf->state)) {
2370                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
2371                 pf->ptp_tx_skb = skb_get(skb);
2372         } else {
2373                 return 0;
2374         }
2375
2376         *cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN <<
2377                                 I40E_TXD_CTX_QW1_CMD_SHIFT;
2378
2379         return 1;
2380 }
2381
2382 /**
2383  * i40e_tx_enable_csum - Enable Tx checksum offloads
2384  * @skb: send buffer
2385  * @tx_flags: pointer to Tx flags currently set
2386  * @td_cmd: Tx descriptor command bits to set
2387  * @td_offset: Tx descriptor header offsets to set
2388  * @tx_ring: Tx descriptor ring
2389  * @cd_tunneling: ptr to context desc bits
2390  **/
2391 static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
2392                                u32 *td_cmd, u32 *td_offset,
2393                                struct i40e_ring *tx_ring,
2394                                u32 *cd_tunneling)
2395 {
2396         union {
2397                 struct iphdr *v4;
2398                 struct ipv6hdr *v6;
2399                 unsigned char *hdr;
2400         } ip;
2401         union {
2402                 struct tcphdr *tcp;
2403                 struct udphdr *udp;
2404                 unsigned char *hdr;
2405         } l4;
2406         unsigned char *exthdr;
2407         u32 offset, cmd = 0, tunnel = 0;
2408         __be16 frag_off;
2409         u8 l4_proto = 0;
2410
2411         if (skb->ip_summed != CHECKSUM_PARTIAL)
2412                 return 0;
2413
2414         ip.hdr = skb_network_header(skb);
2415         l4.hdr = skb_transport_header(skb);
2416
2417         /* compute outer L2 header size */
2418         offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
2419
2420         if (skb->encapsulation) {
2421                 /* define outer network header type */
2422                 if (*tx_flags & I40E_TX_FLAGS_IPV4) {
2423                         tunnel |= (*tx_flags & I40E_TX_FLAGS_TSO) ?
2424                                   I40E_TX_CTX_EXT_IP_IPV4 :
2425                                   I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
2426
2427                         l4_proto = ip.v4->protocol;
2428                 } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
2429                         tunnel |= I40E_TX_CTX_EXT_IP_IPV6;
2430
2431                         exthdr = ip.hdr + sizeof(*ip.v6);
2432                         l4_proto = ip.v6->nexthdr;
2433                         if (l4.hdr != exthdr)
2434                                 ipv6_skip_exthdr(skb, exthdr - skb->data,
2435                                                  &l4_proto, &frag_off);
2436                 }
2437
2438                 /* compute outer L3 header size */
2439                 tunnel |= ((l4.hdr - ip.hdr) / 4) <<
2440                           I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT;
2441
2442                 /* switch IP header pointer from outer to inner header */
2443                 ip.hdr = skb_inner_network_header(skb);
2444
2445                 /* define outer transport */
2446                 switch (l4_proto) {
2447                 case IPPROTO_UDP:
2448                         tunnel |= I40E_TXD_CTX_UDP_TUNNELING;
2449                         *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
2450                         break;
2451                 case IPPROTO_GRE:
2452                         tunnel |= I40E_TXD_CTX_GRE_TUNNELING;
2453                         *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
2454                         break;
2455                 default:
2456                         if (*tx_flags & I40E_TX_FLAGS_TSO)
2457                                 return -1;
2458
2459                         skb_checksum_help(skb);
2460                         return 0;
2461                 }
2462
2463                 /* compute tunnel header size */
2464                 tunnel |= ((ip.hdr - l4.hdr) / 2) <<
2465                           I40E_TXD_CTX_QW0_NATLEN_SHIFT;
2466
2467                 /* indicate if we need to offload outer UDP header */
2468                 if ((*tx_flags & I40E_TX_FLAGS_TSO) &&
2469                     (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM))
2470                         tunnel |= I40E_TXD_CTX_QW0_L4T_CS_MASK;
2471
2472                 /* record tunnel offload values */
2473                 *cd_tunneling |= tunnel;
2474
2475                 /* switch L4 header pointer from outer to inner */
2476                 l4.hdr = skb_inner_transport_header(skb);
2477                 l4_proto = 0;
2478
2479                 /* reset type as we transition from outer to inner headers */
2480                 *tx_flags &= ~(I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6);
2481                 if (ip.v4->version == 4)
2482                         *tx_flags |= I40E_TX_FLAGS_IPV4;
2483                 if (ip.v6->version == 6)
2484                         *tx_flags |= I40E_TX_FLAGS_IPV6;
2485         }
2486
2487         /* Enable IP checksum offloads */
2488         if (*tx_flags & I40E_TX_FLAGS_IPV4) {
2489                 l4_proto = ip.v4->protocol;
2490                 /* the stack computes the IP header already, the only time we
2491                  * need the hardware to recompute it is in the case of TSO.
2492                  */
2493                 cmd |= (*tx_flags & I40E_TX_FLAGS_TSO) ?
2494                        I40E_TX_DESC_CMD_IIPT_IPV4_CSUM :
2495                        I40E_TX_DESC_CMD_IIPT_IPV4;
2496         } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
2497                 cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
2498
2499                 exthdr = ip.hdr + sizeof(*ip.v6);
2500                 l4_proto = ip.v6->nexthdr;
2501                 if (l4.hdr != exthdr)
2502                         ipv6_skip_exthdr(skb, exthdr - skb->data,
2503                                          &l4_proto, &frag_off);
2504         }
2505
2506         /* compute inner L3 header size */
2507         offset |= ((l4.hdr - ip.hdr) / 4) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
2508
2509         /* Enable L4 checksum offloads */
2510         switch (l4_proto) {
2511         case IPPROTO_TCP:
2512                 /* enable checksum offloads */
2513                 cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
2514                 offset |= l4.tcp->doff << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2515                 break;
2516         case IPPROTO_SCTP:
2517                 /* enable SCTP checksum offload */
2518                 cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
2519                 offset |= (sizeof(struct sctphdr) >> 2) <<
2520                           I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2521                 break;
2522         case IPPROTO_UDP:
2523                 /* enable UDP checksum offload */
2524                 cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
2525                 offset |= (sizeof(struct udphdr) >> 2) <<
2526                           I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2527                 break;
2528         default:
2529                 if (*tx_flags & I40E_TX_FLAGS_TSO)
2530                         return -1;
2531                 skb_checksum_help(skb);
2532                 return 0;
2533         }
2534
2535         *td_cmd |= cmd;
2536         *td_offset |= offset;
2537
2538         return 1;
2539 }
2540
2541 /**
2542  * i40e_create_tx_ctx Build the Tx context descriptor
2543  * @tx_ring:  ring to create the descriptor on
2544  * @cd_type_cmd_tso_mss: Quad Word 1
2545  * @cd_tunneling: Quad Word 0 - bits 0-31
2546  * @cd_l2tag2: Quad Word 0 - bits 32-63
2547  **/
2548 static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
2549                                const u64 cd_type_cmd_tso_mss,
2550                                const u32 cd_tunneling, const u32 cd_l2tag2)
2551 {
2552         struct i40e_tx_context_desc *context_desc;
2553         int i = tx_ring->next_to_use;
2554
2555         if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) &&
2556             !cd_tunneling && !cd_l2tag2)
2557                 return;
2558
2559         /* grab the next descriptor */
2560         context_desc = I40E_TX_CTXTDESC(tx_ring, i);
2561
2562         i++;
2563         tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2564
2565         /* cpu_to_le32 and assign to struct fields */
2566         context_desc->tunneling_params = cpu_to_le32(cd_tunneling);
2567         context_desc->l2tag2 = cpu_to_le16(cd_l2tag2);
2568         context_desc->rsvd = cpu_to_le16(0);
2569         context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
2570 }
2571
2572 /**
2573  * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions
2574  * @tx_ring: the ring to be checked
2575  * @size:    the size buffer we want to assure is available
2576  *
2577  * Returns -EBUSY if a stop is needed, else 0
2578  **/
2579 int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2580 {
2581         netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
2582         /* Memory barrier before checking head and tail */
2583         smp_mb();
2584
2585         /* Check again in a case another CPU has just made room available. */
2586         if (likely(I40E_DESC_UNUSED(tx_ring) < size))
2587                 return -EBUSY;
2588
2589         /* A reprieve! - use start_queue because it doesn't call schedule */
2590         netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
2591         ++tx_ring->tx_stats.restart_queue;
2592         return 0;
2593 }
2594
2595 /**
2596  * i40e_chk_linearize - Check if there are more than 8 fragments per packet
2597  * @skb:      send buffer
2598  * @tx_flags: collected send information
2599  *
2600  * Note: Our HW can't scatter-gather more than 8 fragments to build
2601  * a packet on the wire and so we need to figure out the cases where we
2602  * need to linearize the skb.
2603  **/
2604 static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags)
2605 {
2606         struct skb_frag_struct *frag;
2607         bool linearize = false;
2608         unsigned int size = 0;
2609         u16 num_frags;
2610         u16 gso_segs;
2611
2612         num_frags = skb_shinfo(skb)->nr_frags;
2613         gso_segs = skb_shinfo(skb)->gso_segs;
2614
2615         if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) {
2616                 u16 j = 0;
2617
2618                 if (num_frags < (I40E_MAX_BUFFER_TXD))
2619                         goto linearize_chk_done;
2620                 /* try the simple math, if we have too many frags per segment */
2621                 if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) >
2622                     I40E_MAX_BUFFER_TXD) {
2623                         linearize = true;
2624                         goto linearize_chk_done;
2625                 }
2626                 frag = &skb_shinfo(skb)->frags[0];
2627                 /* we might still have more fragments per segment */
2628                 do {
2629                         size += skb_frag_size(frag);
2630                         frag++; j++;
2631                         if ((size >= skb_shinfo(skb)->gso_size) &&
2632                             (j < I40E_MAX_BUFFER_TXD)) {
2633                                 size = (size % skb_shinfo(skb)->gso_size);
2634                                 j = (size) ? 1 : 0;
2635                         }
2636                         if (j == I40E_MAX_BUFFER_TXD) {
2637                                 linearize = true;
2638                                 break;
2639                         }
2640                         num_frags--;
2641                 } while (num_frags);
2642         } else {
2643                 if (num_frags >= I40E_MAX_BUFFER_TXD)
2644                         linearize = true;
2645         }
2646
2647 linearize_chk_done:
2648         return linearize;
2649 }
2650
2651 /**
2652  * i40e_tx_map - Build the Tx descriptor
2653  * @tx_ring:  ring to send buffer on
2654  * @skb:      send buffer
2655  * @first:    first buffer info buffer to use
2656  * @tx_flags: collected send information
2657  * @hdr_len:  size of the packet header
2658  * @td_cmd:   the command field in the descriptor
2659  * @td_offset: offset for checksum or crc
2660  **/
2661 #ifdef I40E_FCOE
2662 inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2663                         struct i40e_tx_buffer *first, u32 tx_flags,
2664                         const u8 hdr_len, u32 td_cmd, u32 td_offset)
2665 #else
2666 static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2667                                struct i40e_tx_buffer *first, u32 tx_flags,
2668                                const u8 hdr_len, u32 td_cmd, u32 td_offset)
2669 #endif
2670 {
2671         unsigned int data_len = skb->data_len;
2672         unsigned int size = skb_headlen(skb);
2673         struct skb_frag_struct *frag;
2674         struct i40e_tx_buffer *tx_bi;
2675         struct i40e_tx_desc *tx_desc;
2676         u16 i = tx_ring->next_to_use;
2677         u32 td_tag = 0;
2678         dma_addr_t dma;
2679         u16 gso_segs;
2680         u16 desc_count = 0;
2681         bool tail_bump = true;
2682         bool do_rs = false;
2683
2684         if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
2685                 td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
2686                 td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >>
2687                          I40E_TX_FLAGS_VLAN_SHIFT;
2688         }
2689
2690         if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO))
2691                 gso_segs = skb_shinfo(skb)->gso_segs;
2692         else
2693                 gso_segs = 1;
2694
2695         /* multiply data chunks by size of headers */
2696         first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len);
2697         first->gso_segs = gso_segs;
2698         first->skb = skb;
2699         first->tx_flags = tx_flags;
2700
2701         dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
2702
2703         tx_desc = I40E_TX_DESC(tx_ring, i);
2704         tx_bi = first;
2705
2706         for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
2707                 if (dma_mapping_error(tx_ring->dev, dma))
2708                         goto dma_error;
2709
2710                 /* record length, and DMA address */
2711                 dma_unmap_len_set(tx_bi, len, size);
2712                 dma_unmap_addr_set(tx_bi, dma, dma);
2713
2714                 tx_desc->buffer_addr = cpu_to_le64(dma);
2715
2716                 while (unlikely(size > I40E_MAX_DATA_PER_TXD)) {
2717                         tx_desc->cmd_type_offset_bsz =
2718                                 build_ctob(td_cmd, td_offset,
2719                                            I40E_MAX_DATA_PER_TXD, td_tag);
2720
2721                         tx_desc++;
2722                         i++;
2723                         desc_count++;
2724
2725                         if (i == tx_ring->count) {
2726                                 tx_desc = I40E_TX_DESC(tx_ring, 0);
2727                                 i = 0;
2728                         }
2729
2730                         dma += I40E_MAX_DATA_PER_TXD;
2731                         size -= I40E_MAX_DATA_PER_TXD;
2732
2733                         tx_desc->buffer_addr = cpu_to_le64(dma);
2734                 }
2735
2736                 if (likely(!data_len))
2737                         break;
2738
2739                 tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
2740                                                           size, td_tag);
2741
2742                 tx_desc++;
2743                 i++;
2744                 desc_count++;
2745
2746                 if (i == tx_ring->count) {
2747                         tx_desc = I40E_TX_DESC(tx_ring, 0);
2748                         i = 0;
2749                 }
2750
2751                 size = skb_frag_size(frag);
2752                 data_len -= size;
2753
2754                 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
2755                                        DMA_TO_DEVICE);
2756
2757                 tx_bi = &tx_ring->tx_bi[i];
2758         }
2759
2760         /* set next_to_watch value indicating a packet is present */
2761         first->next_to_watch = tx_desc;
2762
2763         i++;
2764         if (i == tx_ring->count)
2765                 i = 0;
2766
2767         tx_ring->next_to_use = i;
2768
2769         netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
2770                                                  tx_ring->queue_index),
2771                                                  first->bytecount);
2772         i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
2773
2774         /* Algorithm to optimize tail and RS bit setting:
2775          * if xmit_more is supported
2776          *      if xmit_more is true
2777          *              do not update tail and do not mark RS bit.
2778          *      if xmit_more is false and last xmit_more was false
2779          *              if every packet spanned less than 4 desc
2780          *                      then set RS bit on 4th packet and update tail
2781          *                      on every packet
2782          *              else
2783          *                      update tail and set RS bit on every packet.
2784          *      if xmit_more is false and last_xmit_more was true
2785          *              update tail and set RS bit.
2786          *
2787          * Optimization: wmb to be issued only in case of tail update.
2788          * Also optimize the Descriptor WB path for RS bit with the same
2789          * algorithm.
2790          *
2791          * Note: If there are less than 4 packets
2792          * pending and interrupts were disabled the service task will
2793          * trigger a force WB.
2794          */
2795         if (skb->xmit_more  &&
2796             !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
2797                                                     tx_ring->queue_index))) {
2798                 tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
2799                 tail_bump = false;
2800         } else if (!skb->xmit_more &&
2801                    !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
2802                                                        tx_ring->queue_index)) &&
2803                    (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) &&
2804                    (tx_ring->packet_stride < WB_STRIDE) &&
2805                    (desc_count < WB_STRIDE)) {
2806                 tx_ring->packet_stride++;
2807         } else {
2808                 tx_ring->packet_stride = 0;
2809                 tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
2810                 do_rs = true;
2811         }
2812         if (do_rs)
2813                 tx_ring->packet_stride = 0;
2814
2815         tx_desc->cmd_type_offset_bsz =
2816                         build_ctob(td_cmd, td_offset, size, td_tag) |
2817                         cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD :
2818                                                   I40E_TX_DESC_CMD_EOP) <<
2819                                                   I40E_TXD_QW1_CMD_SHIFT);
2820
2821         /* notify HW of packet */
2822         if (!tail_bump)
2823                 prefetchw(tx_desc + 1);
2824
2825         if (tail_bump) {
2826                 /* Force memory writes to complete before letting h/w
2827                  * know there are new descriptors to fetch.  (Only
2828                  * applicable for weak-ordered memory model archs,
2829                  * such as IA-64).
2830                  */
2831                 wmb();
2832                 writel(i, tx_ring->tail);
2833         }
2834
2835         return;
2836
2837 dma_error:
2838         dev_info(tx_ring->dev, "TX DMA map failed\n");
2839
2840         /* clear dma mappings for failed tx_bi map */
2841         for (;;) {
2842                 tx_bi = &tx_ring->tx_bi[i];
2843                 i40e_unmap_and_free_tx_resource(tx_ring, tx_bi);
2844                 if (tx_bi == first)
2845                         break;
2846                 if (i == 0)
2847                         i = tx_ring->count;
2848                 i--;
2849         }
2850
2851         tx_ring->next_to_use = i;
2852 }
2853
2854 /**
2855  * i40e_xmit_frame_ring - Sends buffer on Tx ring
2856  * @skb:     send buffer
2857  * @tx_ring: ring to send buffer on
2858  *
2859  * Returns NETDEV_TX_OK if sent, else an error code
2860  **/
2861 static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
2862                                         struct i40e_ring *tx_ring)
2863 {
2864         u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT;
2865         u32 cd_tunneling = 0, cd_l2tag2 = 0;
2866         struct i40e_tx_buffer *first;
2867         u32 td_offset = 0;
2868         u32 tx_flags = 0;
2869         __be16 protocol;
2870         u32 td_cmd = 0;
2871         u8 hdr_len = 0;
2872         int tso, count;
2873         int tsyn;
2874
2875         /* prefetch the data, we'll need it later */
2876         prefetch(skb->data);
2877
2878         count = i40e_xmit_descriptor_count(skb);
2879
2880         /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
2881          *       + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
2882          *       + 4 desc gap to avoid the cache line where head is,
2883          *       + 1 desc for context descriptor,
2884          * otherwise try next time
2885          */
2886         if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
2887                 tx_ring->tx_stats.tx_busy++;
2888                 return NETDEV_TX_BUSY;
2889         }
2890
2891         /* prepare the xmit flags */
2892         if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
2893                 goto out_drop;
2894
2895         /* obtain protocol of skb */
2896         protocol = vlan_get_protocol(skb);
2897
2898         /* record the location of the first descriptor for this packet */
2899         first = &tx_ring->tx_bi[tx_ring->next_to_use];
2900
2901         /* setup IPv4/IPv6 offloads */
2902         if (protocol == htons(ETH_P_IP))
2903                 tx_flags |= I40E_TX_FLAGS_IPV4;
2904         else if (protocol == htons(ETH_P_IPV6))
2905                 tx_flags |= I40E_TX_FLAGS_IPV6;
2906
2907         tso = i40e_tso(tx_ring, skb, &hdr_len, &cd_type_cmd_tso_mss);
2908
2909         if (tso < 0)
2910                 goto out_drop;
2911         else if (tso)
2912                 tx_flags |= I40E_TX_FLAGS_TSO;
2913
2914         tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss);
2915
2916         if (tsyn)
2917                 tx_flags |= I40E_TX_FLAGS_TSYN;
2918
2919         if (i40e_chk_linearize(skb, tx_flags)) {
2920                 if (skb_linearize(skb))
2921                         goto out_drop;
2922                 tx_ring->tx_stats.tx_linearize++;
2923         }
2924         skb_tx_timestamp(skb);
2925
2926         /* always enable CRC insertion offload */
2927         td_cmd |= I40E_TX_DESC_CMD_ICRC;
2928
2929         /* Always offload the checksum, since it's in the data descriptor */
2930         tso = i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset,
2931                                   tx_ring, &cd_tunneling);
2932         if (tso < 0)
2933                 goto out_drop;
2934
2935         i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
2936                            cd_tunneling, cd_l2tag2);
2937
2938         /* Add Flow Director ATR if it's enabled.
2939          *
2940          * NOTE: this must always be directly before the data descriptor.
2941          */
2942         i40e_atr(tx_ring, skb, tx_flags);
2943
2944         i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
2945                     td_cmd, td_offset);
2946
2947         return NETDEV_TX_OK;
2948
2949 out_drop:
2950         dev_kfree_skb_any(skb);
2951         return NETDEV_TX_OK;
2952 }
2953
2954 /**
2955  * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer
2956  * @skb:    send buffer
2957  * @netdev: network interface device structure
2958  *
2959  * Returns NETDEV_TX_OK if sent, else an error code
2960  **/
2961 netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
2962 {
2963         struct i40e_netdev_priv *np = netdev_priv(netdev);
2964         struct i40e_vsi *vsi = np->vsi;
2965         struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
2966
2967         /* hardware can't handle really short frames, hardware padding works
2968          * beyond this point
2969          */
2970         if (skb_put_padto(skb, I40E_MIN_TX_LEN))
2971                 return NETDEV_TX_OK;
2972
2973         return i40e_xmit_frame_ring(skb, tx_ring);
2974 }