hv_netvsc: introduce {net, hv}_device_to_netvsc_device() helpers
[cascardo/linux.git] / drivers / net / hyperv / netvsc.c
1 /*
2  * Copyright (c) 2009, Microsoft Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, see <http://www.gnu.org/licenses/>.
15  *
16  * Authors:
17  *   Haiyang Zhang <haiyangz@microsoft.com>
18  *   Hank Janssen  <hjanssen@microsoft.com>
19  */
20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21
22 #include <linux/kernel.h>
23 #include <linux/sched.h>
24 #include <linux/wait.h>
25 #include <linux/mm.h>
26 #include <linux/delay.h>
27 #include <linux/io.h>
28 #include <linux/slab.h>
29 #include <linux/netdevice.h>
30 #include <linux/if_ether.h>
31 #include <linux/vmalloc.h>
32 #include <asm/sync_bitops.h>
33
34 #include "hyperv_net.h"
35
36 /*
37  * Switch the data path from the synthetic interface to the VF
38  * interface.
39  */
40 void netvsc_switch_datapath(struct net_device *ndev, bool vf)
41 {
42         struct net_device_context *net_device_ctx = netdev_priv(ndev);
43         struct hv_device *dev = net_device_ctx->device_ctx;
44         struct netvsc_device *nv_dev = net_device_ctx->nvdev;
45         struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt;
46
47         memset(init_pkt, 0, sizeof(struct nvsp_message));
48         init_pkt->hdr.msg_type = NVSP_MSG4_TYPE_SWITCH_DATA_PATH;
49         if (vf)
50                 init_pkt->msg.v4_msg.active_dp.active_datapath =
51                         NVSP_DATAPATH_VF;
52         else
53                 init_pkt->msg.v4_msg.active_dp.active_datapath =
54                         NVSP_DATAPATH_SYNTHETIC;
55
56         vmbus_sendpacket(dev->channel, init_pkt,
57                                sizeof(struct nvsp_message),
58                                (unsigned long)init_pkt,
59                                VM_PKT_DATA_INBAND, 0);
60 }
61
62
63 static struct netvsc_device *alloc_net_device(void)
64 {
65         struct netvsc_device *net_device;
66
67         net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL);
68         if (!net_device)
69                 return NULL;
70
71         net_device->cb_buffer = kzalloc(NETVSC_PACKET_SIZE, GFP_KERNEL);
72         if (!net_device->cb_buffer) {
73                 kfree(net_device);
74                 return NULL;
75         }
76
77         init_waitqueue_head(&net_device->wait_drain);
78         net_device->destroy = false;
79         atomic_set(&net_device->open_cnt, 0);
80         atomic_set(&net_device->vf_use_cnt, 0);
81         net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
82         net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
83
84         net_device->vf_netdev = NULL;
85         net_device->vf_inject = false;
86
87         return net_device;
88 }
89
90 static void free_netvsc_device(struct netvsc_device *nvdev)
91 {
92         kfree(nvdev->cb_buffer);
93         kfree(nvdev);
94 }
95
96 static struct netvsc_device *get_outbound_net_device(struct hv_device *device)
97 {
98         struct netvsc_device *net_device = hv_device_to_netvsc_device(device);
99
100         if (net_device && net_device->destroy)
101                 net_device = NULL;
102
103         return net_device;
104 }
105
106 static struct netvsc_device *get_inbound_net_device(struct hv_device *device)
107 {
108         struct netvsc_device *net_device = hv_device_to_netvsc_device(device);
109
110         if (!net_device)
111                 goto get_in_err;
112
113         if (net_device->destroy &&
114                 atomic_read(&net_device->num_outstanding_sends) == 0)
115                 net_device = NULL;
116
117 get_in_err:
118         return net_device;
119 }
120
121
122 static int netvsc_destroy_buf(struct hv_device *device)
123 {
124         struct nvsp_message *revoke_packet;
125         int ret = 0;
126         struct net_device *ndev = hv_get_drvdata(device);
127         struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
128
129         /*
130          * If we got a section count, it means we received a
131          * SendReceiveBufferComplete msg (ie sent
132          * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
133          * to send a revoke msg here
134          */
135         if (net_device->recv_section_cnt) {
136                 /* Send the revoke receive buffer */
137                 revoke_packet = &net_device->revoke_packet;
138                 memset(revoke_packet, 0, sizeof(struct nvsp_message));
139
140                 revoke_packet->hdr.msg_type =
141                         NVSP_MSG1_TYPE_REVOKE_RECV_BUF;
142                 revoke_packet->msg.v1_msg.
143                 revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
144
145                 ret = vmbus_sendpacket(device->channel,
146                                        revoke_packet,
147                                        sizeof(struct nvsp_message),
148                                        (unsigned long)revoke_packet,
149                                        VM_PKT_DATA_INBAND, 0);
150                 /*
151                  * If we failed here, we might as well return and
152                  * have a leak rather than continue and a bugchk
153                  */
154                 if (ret != 0) {
155                         netdev_err(ndev, "unable to send "
156                                 "revoke receive buffer to netvsp\n");
157                         return ret;
158                 }
159         }
160
161         /* Teardown the gpadl on the vsp end */
162         if (net_device->recv_buf_gpadl_handle) {
163                 ret = vmbus_teardown_gpadl(device->channel,
164                                            net_device->recv_buf_gpadl_handle);
165
166                 /* If we failed here, we might as well return and have a leak
167                  * rather than continue and a bugchk
168                  */
169                 if (ret != 0) {
170                         netdev_err(ndev,
171                                    "unable to teardown receive buffer's gpadl\n");
172                         return ret;
173                 }
174                 net_device->recv_buf_gpadl_handle = 0;
175         }
176
177         if (net_device->recv_buf) {
178                 /* Free up the receive buffer */
179                 vfree(net_device->recv_buf);
180                 net_device->recv_buf = NULL;
181         }
182
183         if (net_device->recv_section) {
184                 net_device->recv_section_cnt = 0;
185                 kfree(net_device->recv_section);
186                 net_device->recv_section = NULL;
187         }
188
189         /* Deal with the send buffer we may have setup.
190          * If we got a  send section size, it means we received a
191          * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
192          * NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need
193          * to send a revoke msg here
194          */
195         if (net_device->send_section_size) {
196                 /* Send the revoke receive buffer */
197                 revoke_packet = &net_device->revoke_packet;
198                 memset(revoke_packet, 0, sizeof(struct nvsp_message));
199
200                 revoke_packet->hdr.msg_type =
201                         NVSP_MSG1_TYPE_REVOKE_SEND_BUF;
202                 revoke_packet->msg.v1_msg.revoke_send_buf.id =
203                         NETVSC_SEND_BUFFER_ID;
204
205                 ret = vmbus_sendpacket(device->channel,
206                                        revoke_packet,
207                                        sizeof(struct nvsp_message),
208                                        (unsigned long)revoke_packet,
209                                        VM_PKT_DATA_INBAND, 0);
210                 /* If we failed here, we might as well return and
211                  * have a leak rather than continue and a bugchk
212                  */
213                 if (ret != 0) {
214                         netdev_err(ndev, "unable to send "
215                                    "revoke send buffer to netvsp\n");
216                         return ret;
217                 }
218         }
219         /* Teardown the gpadl on the vsp end */
220         if (net_device->send_buf_gpadl_handle) {
221                 ret = vmbus_teardown_gpadl(device->channel,
222                                            net_device->send_buf_gpadl_handle);
223
224                 /* If we failed here, we might as well return and have a leak
225                  * rather than continue and a bugchk
226                  */
227                 if (ret != 0) {
228                         netdev_err(ndev,
229                                    "unable to teardown send buffer's gpadl\n");
230                         return ret;
231                 }
232                 net_device->send_buf_gpadl_handle = 0;
233         }
234         if (net_device->send_buf) {
235                 /* Free up the send buffer */
236                 vfree(net_device->send_buf);
237                 net_device->send_buf = NULL;
238         }
239         kfree(net_device->send_section_map);
240
241         return ret;
242 }
243
244 static int netvsc_init_buf(struct hv_device *device)
245 {
246         int ret = 0;
247         unsigned long t;
248         struct netvsc_device *net_device;
249         struct nvsp_message *init_packet;
250         struct net_device *ndev;
251         int node;
252
253         net_device = get_outbound_net_device(device);
254         if (!net_device)
255                 return -ENODEV;
256         ndev = hv_get_drvdata(device);
257
258         node = cpu_to_node(device->channel->target_cpu);
259         net_device->recv_buf = vzalloc_node(net_device->recv_buf_size, node);
260         if (!net_device->recv_buf)
261                 net_device->recv_buf = vzalloc(net_device->recv_buf_size);
262
263         if (!net_device->recv_buf) {
264                 netdev_err(ndev, "unable to allocate receive "
265                         "buffer of size %d\n", net_device->recv_buf_size);
266                 ret = -ENOMEM;
267                 goto cleanup;
268         }
269
270         /*
271          * Establish the gpadl handle for this buffer on this
272          * channel.  Note: This call uses the vmbus connection rather
273          * than the channel to establish the gpadl handle.
274          */
275         ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf,
276                                     net_device->recv_buf_size,
277                                     &net_device->recv_buf_gpadl_handle);
278         if (ret != 0) {
279                 netdev_err(ndev,
280                         "unable to establish receive buffer's gpadl\n");
281                 goto cleanup;
282         }
283
284
285         /* Notify the NetVsp of the gpadl handle */
286         init_packet = &net_device->channel_init_pkt;
287
288         memset(init_packet, 0, sizeof(struct nvsp_message));
289
290         init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF;
291         init_packet->msg.v1_msg.send_recv_buf.
292                 gpadl_handle = net_device->recv_buf_gpadl_handle;
293         init_packet->msg.v1_msg.
294                 send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
295
296         /* Send the gpadl notification request */
297         ret = vmbus_sendpacket(device->channel, init_packet,
298                                sizeof(struct nvsp_message),
299                                (unsigned long)init_packet,
300                                VM_PKT_DATA_INBAND,
301                                VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
302         if (ret != 0) {
303                 netdev_err(ndev,
304                         "unable to send receive buffer's gpadl to netvsp\n");
305                 goto cleanup;
306         }
307
308         t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
309         BUG_ON(t == 0);
310
311
312         /* Check the response */
313         if (init_packet->msg.v1_msg.
314             send_recv_buf_complete.status != NVSP_STAT_SUCCESS) {
315                 netdev_err(ndev, "Unable to complete receive buffer "
316                            "initialization with NetVsp - status %d\n",
317                            init_packet->msg.v1_msg.
318                            send_recv_buf_complete.status);
319                 ret = -EINVAL;
320                 goto cleanup;
321         }
322
323         /* Parse the response */
324
325         net_device->recv_section_cnt = init_packet->msg.
326                 v1_msg.send_recv_buf_complete.num_sections;
327
328         net_device->recv_section = kmemdup(
329                 init_packet->msg.v1_msg.send_recv_buf_complete.sections,
330                 net_device->recv_section_cnt *
331                 sizeof(struct nvsp_1_receive_buffer_section),
332                 GFP_KERNEL);
333         if (net_device->recv_section == NULL) {
334                 ret = -EINVAL;
335                 goto cleanup;
336         }
337
338         /*
339          * For 1st release, there should only be 1 section that represents the
340          * entire receive buffer
341          */
342         if (net_device->recv_section_cnt != 1 ||
343             net_device->recv_section->offset != 0) {
344                 ret = -EINVAL;
345                 goto cleanup;
346         }
347
348         /* Now setup the send buffer.
349          */
350         net_device->send_buf = vzalloc_node(net_device->send_buf_size, node);
351         if (!net_device->send_buf)
352                 net_device->send_buf = vzalloc(net_device->send_buf_size);
353         if (!net_device->send_buf) {
354                 netdev_err(ndev, "unable to allocate send "
355                            "buffer of size %d\n", net_device->send_buf_size);
356                 ret = -ENOMEM;
357                 goto cleanup;
358         }
359
360         /* Establish the gpadl handle for this buffer on this
361          * channel.  Note: This call uses the vmbus connection rather
362          * than the channel to establish the gpadl handle.
363          */
364         ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
365                                     net_device->send_buf_size,
366                                     &net_device->send_buf_gpadl_handle);
367         if (ret != 0) {
368                 netdev_err(ndev,
369                            "unable to establish send buffer's gpadl\n");
370                 goto cleanup;
371         }
372
373         /* Notify the NetVsp of the gpadl handle */
374         init_packet = &net_device->channel_init_pkt;
375         memset(init_packet, 0, sizeof(struct nvsp_message));
376         init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF;
377         init_packet->msg.v1_msg.send_send_buf.gpadl_handle =
378                 net_device->send_buf_gpadl_handle;
379         init_packet->msg.v1_msg.send_send_buf.id = NETVSC_SEND_BUFFER_ID;
380
381         /* Send the gpadl notification request */
382         ret = vmbus_sendpacket(device->channel, init_packet,
383                                sizeof(struct nvsp_message),
384                                (unsigned long)init_packet,
385                                VM_PKT_DATA_INBAND,
386                                VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
387         if (ret != 0) {
388                 netdev_err(ndev,
389                            "unable to send send buffer's gpadl to netvsp\n");
390                 goto cleanup;
391         }
392
393         t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
394         BUG_ON(t == 0);
395
396         /* Check the response */
397         if (init_packet->msg.v1_msg.
398             send_send_buf_complete.status != NVSP_STAT_SUCCESS) {
399                 netdev_err(ndev, "Unable to complete send buffer "
400                            "initialization with NetVsp - status %d\n",
401                            init_packet->msg.v1_msg.
402                            send_send_buf_complete.status);
403                 ret = -EINVAL;
404                 goto cleanup;
405         }
406
407         /* Parse the response */
408         net_device->send_section_size = init_packet->msg.
409                                 v1_msg.send_send_buf_complete.section_size;
410
411         /* Section count is simply the size divided by the section size.
412          */
413         net_device->send_section_cnt =
414                 net_device->send_buf_size/net_device->send_section_size;
415
416         dev_info(&device->device, "Send section size: %d, Section count:%d\n",
417                  net_device->send_section_size, net_device->send_section_cnt);
418
419         /* Setup state for managing the send buffer. */
420         net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt,
421                                              BITS_PER_LONG);
422
423         net_device->send_section_map =
424                 kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL);
425         if (net_device->send_section_map == NULL) {
426                 ret = -ENOMEM;
427                 goto cleanup;
428         }
429
430         goto exit;
431
432 cleanup:
433         netvsc_destroy_buf(device);
434
435 exit:
436         return ret;
437 }
438
439
440 /* Negotiate NVSP protocol version */
441 static int negotiate_nvsp_ver(struct hv_device *device,
442                               struct netvsc_device *net_device,
443                               struct nvsp_message *init_packet,
444                               u32 nvsp_ver)
445 {
446         struct net_device *ndev = hv_get_drvdata(device);
447         int ret;
448         unsigned long t;
449
450         memset(init_packet, 0, sizeof(struct nvsp_message));
451         init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT;
452         init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
453         init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
454
455         /* Send the init request */
456         ret = vmbus_sendpacket(device->channel, init_packet,
457                                sizeof(struct nvsp_message),
458                                (unsigned long)init_packet,
459                                VM_PKT_DATA_INBAND,
460                                VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
461
462         if (ret != 0)
463                 return ret;
464
465         t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
466
467         if (t == 0)
468                 return -ETIMEDOUT;
469
470         if (init_packet->msg.init_msg.init_complete.status !=
471             NVSP_STAT_SUCCESS)
472                 return -EINVAL;
473
474         if (nvsp_ver == NVSP_PROTOCOL_VERSION_1)
475                 return 0;
476
477         /* NVSPv2 or later: Send NDIS config */
478         memset(init_packet, 0, sizeof(struct nvsp_message));
479         init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG;
480         init_packet->msg.v2_msg.send_ndis_config.mtu = ndev->mtu + ETH_HLEN;
481         init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1;
482
483         if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5)
484                 init_packet->msg.v2_msg.send_ndis_config.capability.sriov = 1;
485
486         ret = vmbus_sendpacket(device->channel, init_packet,
487                                 sizeof(struct nvsp_message),
488                                 (unsigned long)init_packet,
489                                 VM_PKT_DATA_INBAND, 0);
490
491         return ret;
492 }
493
494 static int netvsc_connect_vsp(struct hv_device *device)
495 {
496         int ret;
497         struct netvsc_device *net_device;
498         struct nvsp_message *init_packet;
499         int ndis_version;
500         u32 ver_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
501                 NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 };
502         int i, num_ver = 4; /* number of different NVSP versions */
503
504         net_device = get_outbound_net_device(device);
505         if (!net_device)
506                 return -ENODEV;
507
508         init_packet = &net_device->channel_init_pkt;
509
510         /* Negotiate the latest NVSP protocol supported */
511         for (i = num_ver - 1; i >= 0; i--)
512                 if (negotiate_nvsp_ver(device, net_device, init_packet,
513                                        ver_list[i])  == 0) {
514                         net_device->nvsp_version = ver_list[i];
515                         break;
516                 }
517
518         if (i < 0) {
519                 ret = -EPROTO;
520                 goto cleanup;
521         }
522
523         pr_debug("Negotiated NVSP version:%x\n", net_device->nvsp_version);
524
525         /* Send the ndis version */
526         memset(init_packet, 0, sizeof(struct nvsp_message));
527
528         if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_4)
529                 ndis_version = 0x00060001;
530         else
531                 ndis_version = 0x0006001e;
532
533         init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER;
534         init_packet->msg.v1_msg.
535                 send_ndis_ver.ndis_major_ver =
536                                 (ndis_version & 0xFFFF0000) >> 16;
537         init_packet->msg.v1_msg.
538                 send_ndis_ver.ndis_minor_ver =
539                                 ndis_version & 0xFFFF;
540
541         /* Send the init request */
542         ret = vmbus_sendpacket(device->channel, init_packet,
543                                 sizeof(struct nvsp_message),
544                                 (unsigned long)init_packet,
545                                 VM_PKT_DATA_INBAND, 0);
546         if (ret != 0)
547                 goto cleanup;
548
549         /* Post the big receive buffer to NetVSP */
550         if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
551                 net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
552         else
553                 net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
554         net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
555
556         ret = netvsc_init_buf(device);
557
558 cleanup:
559         return ret;
560 }
561
562 static void netvsc_disconnect_vsp(struct hv_device *device)
563 {
564         netvsc_destroy_buf(device);
565 }
566
567 /*
568  * netvsc_device_remove - Callback when the root bus device is removed
569  */
570 int netvsc_device_remove(struct hv_device *device)
571 {
572         struct net_device *ndev = hv_get_drvdata(device);
573         struct net_device_context *net_device_ctx = netdev_priv(ndev);
574         struct netvsc_device *net_device = net_device_ctx->nvdev;
575
576         netvsc_disconnect_vsp(device);
577
578         net_device_ctx->nvdev = NULL;
579
580         /*
581          * At this point, no one should be accessing net_device
582          * except in here
583          */
584         dev_notice(&device->device, "net device safe to remove\n");
585
586         /* Now, we can close the channel safely */
587         vmbus_close(device->channel);
588
589         /* Release all resources */
590         vfree(net_device->sub_cb_buf);
591         free_netvsc_device(net_device);
592         return 0;
593 }
594
595
596 #define RING_AVAIL_PERCENT_HIWATER 20
597 #define RING_AVAIL_PERCENT_LOWATER 10
598
599 /*
600  * Get the percentage of available bytes to write in the ring.
601  * The return value is in range from 0 to 100.
602  */
603 static inline u32 hv_ringbuf_avail_percent(
604                 struct hv_ring_buffer_info *ring_info)
605 {
606         u32 avail_read, avail_write;
607
608         hv_get_ringbuffer_availbytes(ring_info, &avail_read, &avail_write);
609
610         return avail_write * 100 / ring_info->ring_datasize;
611 }
612
613 static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
614                                          u32 index)
615 {
616         sync_change_bit(index, net_device->send_section_map);
617 }
618
619 static void netvsc_send_completion(struct netvsc_device *net_device,
620                                    struct vmbus_channel *incoming_channel,
621                                    struct hv_device *device,
622                                    struct vmpacket_descriptor *packet)
623 {
624         struct nvsp_message *nvsp_packet;
625         struct hv_netvsc_packet *nvsc_packet;
626         struct net_device *ndev = hv_get_drvdata(device);
627         struct net_device_context *net_device_ctx = netdev_priv(ndev);
628         u32 send_index;
629         struct sk_buff *skb;
630
631         nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
632                         (packet->offset8 << 3));
633
634         if ((nvsp_packet->hdr.msg_type == NVSP_MSG_TYPE_INIT_COMPLETE) ||
635             (nvsp_packet->hdr.msg_type ==
636              NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) ||
637             (nvsp_packet->hdr.msg_type ==
638              NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE) ||
639             (nvsp_packet->hdr.msg_type ==
640              NVSP_MSG5_TYPE_SUBCHANNEL)) {
641                 /* Copy the response back */
642                 memcpy(&net_device->channel_init_pkt, nvsp_packet,
643                        sizeof(struct nvsp_message));
644                 complete(&net_device->channel_init_wait);
645         } else if (nvsp_packet->hdr.msg_type ==
646                    NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) {
647                 int num_outstanding_sends;
648                 u16 q_idx = 0;
649                 struct vmbus_channel *channel = device->channel;
650                 int queue_sends;
651
652                 /* Get the send context */
653                 skb = (struct sk_buff *)(unsigned long)packet->trans_id;
654
655                 /* Notify the layer above us */
656                 if (skb) {
657                         nvsc_packet = (struct hv_netvsc_packet *) skb->cb;
658                         send_index = nvsc_packet->send_buf_index;
659                         if (send_index != NETVSC_INVALID_INDEX)
660                                 netvsc_free_send_slot(net_device, send_index);
661                         q_idx = nvsc_packet->q_idx;
662                         channel = incoming_channel;
663                         dev_kfree_skb_any(skb);
664                 }
665
666                 num_outstanding_sends =
667                         atomic_dec_return(&net_device->num_outstanding_sends);
668                 queue_sends = atomic_dec_return(&net_device->
669                                                 queue_sends[q_idx]);
670
671                 if (net_device->destroy && num_outstanding_sends == 0)
672                         wake_up(&net_device->wait_drain);
673
674                 if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
675                     !net_device_ctx->start_remove &&
676                     (hv_ringbuf_avail_percent(&channel->outbound) >
677                      RING_AVAIL_PERCENT_HIWATER || queue_sends < 1))
678                                 netif_tx_wake_queue(netdev_get_tx_queue(
679                                                     ndev, q_idx));
680         } else {
681                 netdev_err(ndev, "Unknown send completion packet type- "
682                            "%d received!!\n", nvsp_packet->hdr.msg_type);
683         }
684
685 }
686
687 static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
688 {
689         unsigned long index;
690         u32 max_words = net_device->map_words;
691         unsigned long *map_addr = (unsigned long *)net_device->send_section_map;
692         u32 section_cnt = net_device->send_section_cnt;
693         int ret_val = NETVSC_INVALID_INDEX;
694         int i;
695         int prev_val;
696
697         for (i = 0; i < max_words; i++) {
698                 if (!~(map_addr[i]))
699                         continue;
700                 index = ffz(map_addr[i]);
701                 prev_val = sync_test_and_set_bit(index, &map_addr[i]);
702                 if (prev_val)
703                         continue;
704                 if ((index + (i * BITS_PER_LONG)) >= section_cnt)
705                         break;
706                 ret_val = (index + (i * BITS_PER_LONG));
707                 break;
708         }
709         return ret_val;
710 }
711
712 static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
713                                    unsigned int section_index,
714                                    u32 pend_size,
715                                    struct hv_netvsc_packet *packet,
716                                    struct rndis_message *rndis_msg,
717                                    struct hv_page_buffer **pb,
718                                    struct sk_buff *skb)
719 {
720         char *start = net_device->send_buf;
721         char *dest = start + (section_index * net_device->send_section_size)
722                      + pend_size;
723         int i;
724         bool is_data_pkt = (skb != NULL) ? true : false;
725         bool xmit_more = (skb != NULL) ? skb->xmit_more : false;
726         u32 msg_size = 0;
727         u32 padding = 0;
728         u32 remain = packet->total_data_buflen % net_device->pkt_align;
729         u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt :
730                 packet->page_buf_cnt;
731
732         /* Add padding */
733         if (is_data_pkt && xmit_more && remain &&
734             !packet->cp_partial) {
735                 padding = net_device->pkt_align - remain;
736                 rndis_msg->msg_len += padding;
737                 packet->total_data_buflen += padding;
738         }
739
740         for (i = 0; i < page_count; i++) {
741                 char *src = phys_to_virt((*pb)[i].pfn << PAGE_SHIFT);
742                 u32 offset = (*pb)[i].offset;
743                 u32 len = (*pb)[i].len;
744
745                 memcpy(dest, (src + offset), len);
746                 msg_size += len;
747                 dest += len;
748         }
749
750         if (padding) {
751                 memset(dest, 0, padding);
752                 msg_size += padding;
753         }
754
755         return msg_size;
756 }
757
758 static inline int netvsc_send_pkt(
759         struct hv_device *device,
760         struct hv_netvsc_packet *packet,
761         struct netvsc_device *net_device,
762         struct hv_page_buffer **pb,
763         struct sk_buff *skb)
764 {
765         struct nvsp_message nvmsg;
766         u16 q_idx = packet->q_idx;
767         struct vmbus_channel *out_channel = net_device->chn_table[q_idx];
768         struct net_device *ndev = hv_get_drvdata(device);
769         u64 req_id;
770         int ret;
771         struct hv_page_buffer *pgbuf;
772         u32 ring_avail = hv_ringbuf_avail_percent(&out_channel->outbound);
773         bool xmit_more = (skb != NULL) ? skb->xmit_more : false;
774
775         nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
776         if (skb != NULL) {
777                 /* 0 is RMC_DATA; */
778                 nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 0;
779         } else {
780                 /* 1 is RMC_CONTROL; */
781                 nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 1;
782         }
783
784         nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
785                 packet->send_buf_index;
786         if (packet->send_buf_index == NETVSC_INVALID_INDEX)
787                 nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
788         else
789                 nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size =
790                         packet->total_data_buflen;
791
792         req_id = (ulong)skb;
793
794         if (out_channel->rescind)
795                 return -ENODEV;
796
797         /*
798          * It is possible that once we successfully place this packet
799          * on the ringbuffer, we may stop the queue. In that case, we want
800          * to notify the host independent of the xmit_more flag. We don't
801          * need to be precise here; in the worst case we may signal the host
802          * unnecessarily.
803          */
804         if (ring_avail < (RING_AVAIL_PERCENT_LOWATER + 1))
805                 xmit_more = false;
806
807         if (packet->page_buf_cnt) {
808                 pgbuf = packet->cp_partial ? (*pb) +
809                         packet->rmsg_pgcnt : (*pb);
810                 ret = vmbus_sendpacket_pagebuffer_ctl(out_channel,
811                                                       pgbuf,
812                                                       packet->page_buf_cnt,
813                                                       &nvmsg,
814                                                       sizeof(struct nvsp_message),
815                                                       req_id,
816                                                       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED,
817                                                       !xmit_more);
818         } else {
819                 ret = vmbus_sendpacket_ctl(out_channel, &nvmsg,
820                                            sizeof(struct nvsp_message),
821                                            req_id,
822                                            VM_PKT_DATA_INBAND,
823                                            VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED,
824                                            !xmit_more);
825         }
826
827         if (ret == 0) {
828                 atomic_inc(&net_device->num_outstanding_sends);
829                 atomic_inc(&net_device->queue_sends[q_idx]);
830
831                 if (ring_avail < RING_AVAIL_PERCENT_LOWATER) {
832                         netif_tx_stop_queue(netdev_get_tx_queue(ndev, q_idx));
833
834                         if (atomic_read(&net_device->
835                                 queue_sends[q_idx]) < 1)
836                                 netif_tx_wake_queue(netdev_get_tx_queue(
837                                                     ndev, q_idx));
838                 }
839         } else if (ret == -EAGAIN) {
840                 netif_tx_stop_queue(netdev_get_tx_queue(
841                                     ndev, q_idx));
842                 if (atomic_read(&net_device->queue_sends[q_idx]) < 1) {
843                         netif_tx_wake_queue(netdev_get_tx_queue(
844                                             ndev, q_idx));
845                         ret = -ENOSPC;
846                 }
847         } else {
848                 netdev_err(ndev, "Unable to send packet %p ret %d\n",
849                            packet, ret);
850         }
851
852         return ret;
853 }
854
855 /* Move packet out of multi send data (msd), and clear msd */
856 static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send,
857                                 struct sk_buff **msd_skb,
858                                 struct multi_send_data *msdp)
859 {
860         *msd_skb = msdp->skb;
861         *msd_send = msdp->pkt;
862         msdp->skb = NULL;
863         msdp->pkt = NULL;
864         msdp->count = 0;
865 }
866
867 int netvsc_send(struct hv_device *device,
868                 struct hv_netvsc_packet *packet,
869                 struct rndis_message *rndis_msg,
870                 struct hv_page_buffer **pb,
871                 struct sk_buff *skb)
872 {
873         struct netvsc_device *net_device;
874         int ret = 0, m_ret = 0;
875         struct vmbus_channel *out_channel;
876         u16 q_idx = packet->q_idx;
877         u32 pktlen = packet->total_data_buflen, msd_len = 0;
878         unsigned int section_index = NETVSC_INVALID_INDEX;
879         struct multi_send_data *msdp;
880         struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL;
881         struct sk_buff *msd_skb = NULL;
882         bool try_batch;
883         bool xmit_more = (skb != NULL) ? skb->xmit_more : false;
884
885         net_device = get_outbound_net_device(device);
886         if (!net_device)
887                 return -ENODEV;
888
889         out_channel = net_device->chn_table[q_idx];
890
891         packet->send_buf_index = NETVSC_INVALID_INDEX;
892         packet->cp_partial = false;
893
894         /* Send control message directly without accessing msd (Multi-Send
895          * Data) field which may be changed during data packet processing.
896          */
897         if (!skb) {
898                 cur_send = packet;
899                 goto send_now;
900         }
901
902         msdp = &net_device->msd[q_idx];
903
904         /* batch packets in send buffer if possible */
905         if (msdp->pkt)
906                 msd_len = msdp->pkt->total_data_buflen;
907
908         try_batch = (skb != NULL) && msd_len > 0 && msdp->count <
909                     net_device->max_pkt;
910
911         if (try_batch && msd_len + pktlen + net_device->pkt_align <
912             net_device->send_section_size) {
913                 section_index = msdp->pkt->send_buf_index;
914
915         } else if (try_batch && msd_len + packet->rmsg_size <
916                    net_device->send_section_size) {
917                 section_index = msdp->pkt->send_buf_index;
918                 packet->cp_partial = true;
919
920         } else if ((skb != NULL) && pktlen + net_device->pkt_align <
921                    net_device->send_section_size) {
922                 section_index = netvsc_get_next_send_section(net_device);
923                 if (section_index != NETVSC_INVALID_INDEX) {
924                         move_pkt_msd(&msd_send, &msd_skb, msdp);
925                         msd_len = 0;
926                 }
927         }
928
929         if (section_index != NETVSC_INVALID_INDEX) {
930                 netvsc_copy_to_send_buf(net_device,
931                                         section_index, msd_len,
932                                         packet, rndis_msg, pb, skb);
933
934                 packet->send_buf_index = section_index;
935
936                 if (packet->cp_partial) {
937                         packet->page_buf_cnt -= packet->rmsg_pgcnt;
938                         packet->total_data_buflen = msd_len + packet->rmsg_size;
939                 } else {
940                         packet->page_buf_cnt = 0;
941                         packet->total_data_buflen += msd_len;
942                 }
943
944                 if (msdp->skb)
945                         dev_kfree_skb_any(msdp->skb);
946
947                 if (xmit_more && !packet->cp_partial) {
948                         msdp->skb = skb;
949                         msdp->pkt = packet;
950                         msdp->count++;
951                 } else {
952                         cur_send = packet;
953                         msdp->skb = NULL;
954                         msdp->pkt = NULL;
955                         msdp->count = 0;
956                 }
957         } else {
958                 move_pkt_msd(&msd_send, &msd_skb, msdp);
959                 cur_send = packet;
960         }
961
962         if (msd_send) {
963                 m_ret = netvsc_send_pkt(device, msd_send, net_device,
964                                         NULL, msd_skb);
965
966                 if (m_ret != 0) {
967                         netvsc_free_send_slot(net_device,
968                                               msd_send->send_buf_index);
969                         dev_kfree_skb_any(msd_skb);
970                 }
971         }
972
973 send_now:
974         if (cur_send)
975                 ret = netvsc_send_pkt(device, cur_send, net_device, pb, skb);
976
977         if (ret != 0 && section_index != NETVSC_INVALID_INDEX)
978                 netvsc_free_send_slot(net_device, section_index);
979
980         return ret;
981 }
982
983 static void netvsc_send_recv_completion(struct hv_device *device,
984                                         struct vmbus_channel *channel,
985                                         struct netvsc_device *net_device,
986                                         u64 transaction_id, u32 status)
987 {
988         struct nvsp_message recvcompMessage;
989         int retries = 0;
990         int ret;
991         struct net_device *ndev = hv_get_drvdata(device);
992
993         recvcompMessage.hdr.msg_type =
994                                 NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE;
995
996         recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status = status;
997
998 retry_send_cmplt:
999         /* Send the completion */
1000         ret = vmbus_sendpacket(channel, &recvcompMessage,
1001                                sizeof(struct nvsp_message), transaction_id,
1002                                VM_PKT_COMP, 0);
1003         if (ret == 0) {
1004                 /* success */
1005                 /* no-op */
1006         } else if (ret == -EAGAIN) {
1007                 /* no more room...wait a bit and attempt to retry 3 times */
1008                 retries++;
1009                 netdev_err(ndev, "unable to send receive completion pkt"
1010                         " (tid %llx)...retrying %d\n", transaction_id, retries);
1011
1012                 if (retries < 4) {
1013                         udelay(100);
1014                         goto retry_send_cmplt;
1015                 } else {
1016                         netdev_err(ndev, "unable to send receive "
1017                                 "completion pkt (tid %llx)...give up retrying\n",
1018                                 transaction_id);
1019                 }
1020         } else {
1021                 netdev_err(ndev, "unable to send receive "
1022                         "completion pkt - %llx\n", transaction_id);
1023         }
1024 }
1025
1026 static void netvsc_receive(struct netvsc_device *net_device,
1027                         struct vmbus_channel *channel,
1028                         struct hv_device *device,
1029                         struct vmpacket_descriptor *packet)
1030 {
1031         struct vmtransfer_page_packet_header *vmxferpage_packet;
1032         struct nvsp_message *nvsp_packet;
1033         struct hv_netvsc_packet nv_pkt;
1034         struct hv_netvsc_packet *netvsc_packet = &nv_pkt;
1035         u32 status = NVSP_STAT_SUCCESS;
1036         int i;
1037         int count = 0;
1038         struct net_device *ndev = hv_get_drvdata(device);
1039         void *data;
1040
1041         /*
1042          * All inbound packets other than send completion should be xfer page
1043          * packet
1044          */
1045         if (packet->type != VM_PKT_DATA_USING_XFER_PAGES) {
1046                 netdev_err(ndev, "Unknown packet type received - %d\n",
1047                            packet->type);
1048                 return;
1049         }
1050
1051         nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
1052                         (packet->offset8 << 3));
1053
1054         /* Make sure this is a valid nvsp packet */
1055         if (nvsp_packet->hdr.msg_type !=
1056             NVSP_MSG1_TYPE_SEND_RNDIS_PKT) {
1057                 netdev_err(ndev, "Unknown nvsp packet type received-"
1058                         " %d\n", nvsp_packet->hdr.msg_type);
1059                 return;
1060         }
1061
1062         vmxferpage_packet = (struct vmtransfer_page_packet_header *)packet;
1063
1064         if (vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID) {
1065                 netdev_err(ndev, "Invalid xfer page set id - "
1066                            "expecting %x got %x\n", NETVSC_RECEIVE_BUFFER_ID,
1067                            vmxferpage_packet->xfer_pageset_id);
1068                 return;
1069         }
1070
1071         count = vmxferpage_packet->range_cnt;
1072
1073         /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
1074         for (i = 0; i < count; i++) {
1075                 /* Initialize the netvsc packet */
1076                 data = (void *)((unsigned long)net_device->
1077                         recv_buf + vmxferpage_packet->ranges[i].byte_offset);
1078                 netvsc_packet->total_data_buflen =
1079                                         vmxferpage_packet->ranges[i].byte_count;
1080
1081                 /* Pass it to the upper layer */
1082                 status = rndis_filter_receive(device, netvsc_packet, &data,
1083                                               channel);
1084
1085         }
1086
1087         netvsc_send_recv_completion(device, channel, net_device,
1088                                     vmxferpage_packet->d.trans_id, status);
1089 }
1090
1091
1092 static void netvsc_send_table(struct hv_device *hdev,
1093                               struct nvsp_message *nvmsg)
1094 {
1095         struct netvsc_device *nvscdev;
1096         struct net_device *ndev = hv_get_drvdata(hdev);
1097         int i;
1098         u32 count, *tab;
1099
1100         nvscdev = get_outbound_net_device(hdev);
1101         if (!nvscdev)
1102                 return;
1103
1104         count = nvmsg->msg.v5_msg.send_table.count;
1105         if (count != VRSS_SEND_TAB_SIZE) {
1106                 netdev_err(ndev, "Received wrong send-table size:%u\n", count);
1107                 return;
1108         }
1109
1110         tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table +
1111                       nvmsg->msg.v5_msg.send_table.offset);
1112
1113         for (i = 0; i < count; i++)
1114                 nvscdev->send_table[i] = tab[i];
1115 }
1116
1117 static void netvsc_send_vf(struct netvsc_device *nvdev,
1118                            struct nvsp_message *nvmsg)
1119 {
1120         nvdev->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
1121         nvdev->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
1122 }
1123
1124 static inline void netvsc_receive_inband(struct hv_device *hdev,
1125                                          struct netvsc_device *nvdev,
1126                                          struct nvsp_message *nvmsg)
1127 {
1128         switch (nvmsg->hdr.msg_type) {
1129         case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE:
1130                 netvsc_send_table(hdev, nvmsg);
1131                 break;
1132
1133         case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION:
1134                 netvsc_send_vf(nvdev, nvmsg);
1135                 break;
1136         }
1137 }
1138
1139 void netvsc_channel_cb(void *context)
1140 {
1141         int ret;
1142         struct vmbus_channel *channel = (struct vmbus_channel *)context;
1143         struct hv_device *device;
1144         struct netvsc_device *net_device;
1145         u32 bytes_recvd;
1146         u64 request_id;
1147         struct vmpacket_descriptor *desc;
1148         unsigned char *buffer;
1149         int bufferlen = NETVSC_PACKET_SIZE;
1150         struct net_device *ndev;
1151         struct nvsp_message *nvmsg;
1152
1153         if (channel->primary_channel != NULL)
1154                 device = channel->primary_channel->device_obj;
1155         else
1156                 device = channel->device_obj;
1157
1158         net_device = get_inbound_net_device(device);
1159         if (!net_device)
1160                 return;
1161         ndev = hv_get_drvdata(device);
1162         buffer = get_per_channel_state(channel);
1163
1164         do {
1165                 ret = vmbus_recvpacket_raw(channel, buffer, bufferlen,
1166                                            &bytes_recvd, &request_id);
1167                 if (ret == 0) {
1168                         if (bytes_recvd > 0) {
1169                                 desc = (struct vmpacket_descriptor *)buffer;
1170                                 nvmsg = (struct nvsp_message *)((unsigned long)
1171                                          desc + (desc->offset8 << 3));
1172                                 switch (desc->type) {
1173                                 case VM_PKT_COMP:
1174                                         netvsc_send_completion(net_device,
1175                                                                 channel,
1176                                                                 device, desc);
1177                                         break;
1178
1179                                 case VM_PKT_DATA_USING_XFER_PAGES:
1180                                         netvsc_receive(net_device, channel,
1181                                                        device, desc);
1182                                         break;
1183
1184                                 case VM_PKT_DATA_INBAND:
1185                                         netvsc_receive_inband(device,
1186                                                               net_device,
1187                                                               nvmsg);
1188                                         break;
1189
1190                                 default:
1191                                         netdev_err(ndev,
1192                                                    "unhandled packet type %d, "
1193                                                    "tid %llx len %d\n",
1194                                                    desc->type, request_id,
1195                                                    bytes_recvd);
1196                                         break;
1197                                 }
1198
1199                         } else {
1200                                 /*
1201                                  * We are done for this pass.
1202                                  */
1203                                 break;
1204                         }
1205
1206                 } else if (ret == -ENOBUFS) {
1207                         if (bufferlen > NETVSC_PACKET_SIZE)
1208                                 kfree(buffer);
1209                         /* Handle large packet */
1210                         buffer = kmalloc(bytes_recvd, GFP_ATOMIC);
1211                         if (buffer == NULL) {
1212                                 /* Try again next time around */
1213                                 netdev_err(ndev,
1214                                            "unable to allocate buffer of size "
1215                                            "(%d)!!\n", bytes_recvd);
1216                                 break;
1217                         }
1218
1219                         bufferlen = bytes_recvd;
1220                 }
1221         } while (1);
1222
1223         if (bufferlen > NETVSC_PACKET_SIZE)
1224                 kfree(buffer);
1225         return;
1226 }
1227
1228 /*
1229  * netvsc_device_add - Callback when the device belonging to this
1230  * driver is added
1231  */
1232 int netvsc_device_add(struct hv_device *device, void *additional_info)
1233 {
1234         int i, ret = 0;
1235         int ring_size =
1236         ((struct netvsc_device_info *)additional_info)->ring_size;
1237         struct netvsc_device *net_device;
1238         struct net_device *ndev = hv_get_drvdata(device);
1239         struct net_device_context *net_device_ctx = netdev_priv(ndev);
1240
1241         net_device = alloc_net_device();
1242         if (!net_device)
1243                 return -ENOMEM;
1244
1245         net_device->ring_size = ring_size;
1246
1247         /* Initialize the NetVSC channel extension */
1248         init_completion(&net_device->channel_init_wait);
1249
1250         set_per_channel_state(device->channel, net_device->cb_buffer);
1251
1252         /* Open the channel */
1253         ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
1254                          ring_size * PAGE_SIZE, NULL, 0,
1255                          netvsc_channel_cb, device->channel);
1256
1257         if (ret != 0) {
1258                 netdev_err(ndev, "unable to open channel: %d\n", ret);
1259                 goto cleanup;
1260         }
1261
1262         /* Channel is opened */
1263         pr_info("hv_netvsc channel opened successfully\n");
1264
1265         /* If we're reopening the device we may have multiple queues, fill the
1266          * chn_table with the default channel to use it before subchannels are
1267          * opened.
1268          */
1269         for (i = 0; i < VRSS_CHANNEL_MAX; i++)
1270                 net_device->chn_table[i] = device->channel;
1271
1272         /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
1273          * populated.
1274          */
1275         wmb();
1276
1277         net_device_ctx->nvdev = net_device;
1278
1279         /* Connect with the NetVsp */
1280         ret = netvsc_connect_vsp(device);
1281         if (ret != 0) {
1282                 netdev_err(ndev,
1283                         "unable to connect to NetVSP - %d\n", ret);
1284                 goto close;
1285         }
1286
1287         return ret;
1288
1289 close:
1290         /* Now, we can close the channel safely */
1291         vmbus_close(device->channel);
1292
1293 cleanup:
1294         free_netvsc_device(net_device);
1295
1296         return ret;
1297 }