2 * Virtio PCI driver - modern (virtio 1.0) device support
4 * This module allows virtio devices to be used over a virtual PCI device.
5 * This can be used with QEMU based VMMs like KVM or Xen.
7 * Copyright IBM Corp. 2007
8 * Copyright Red Hat, Inc. 2014
11 * Anthony Liguori <aliguori@us.ibm.com>
12 * Rusty Russell <rusty@rustcorp.com.au>
13 * Michael S. Tsirkin <mst@redhat.com>
15 * This work is licensed under the terms of the GNU GPL, version 2 or later.
16 * See the COPYING file in the top-level directory.
20 #define VIRTIO_PCI_NO_LEGACY
21 #include "virtio_pci_common.h"
24 * Type-safe wrappers for io accesses.
25 * Use these to enforce at compile time the following spec requirement:
27 * The driver MUST access each field using the “natural” access
28 * method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses
29 * for 16-bit fields and 8-bit accesses for 8-bit fields.
31 static inline u8 vp_ioread8(u8 __iomem *addr)
35 static inline u16 vp_ioread16 (u16 __iomem *addr)
37 return ioread16(addr);
40 static inline u32 vp_ioread32(u32 __iomem *addr)
42 return ioread32(addr);
45 static inline void vp_iowrite8(u8 value, u8 __iomem *addr)
47 iowrite8(value, addr);
50 static inline void vp_iowrite16(u16 value, u16 __iomem *addr)
52 iowrite16(value, addr);
55 static inline void vp_iowrite32(u32 value, u32 __iomem *addr)
57 iowrite32(value, addr);
60 static void vp_iowrite64_twopart(u64 val,
61 __le32 __iomem *lo, __le32 __iomem *hi)
63 vp_iowrite32((u32)val, lo);
64 vp_iowrite32(val >> 32, hi);
67 static void __iomem *map_capability(struct pci_dev *dev, int off,
77 pci_read_config_byte(dev, off + offsetof(struct virtio_pci_cap,
80 pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, offset),
82 pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, length),
85 if (length <= start) {
87 "virtio_pci: bad capability len %u (>%u expected)\n",
92 if (length - start < minlen) {
94 "virtio_pci: bad capability len %u (>=%zu expected)\n",
101 if (start + offset < offset) {
103 "virtio_pci: map wrap-around %u+%u\n",
110 if (offset & (align - 1)) {
112 "virtio_pci: offset %u not aligned to %u\n",
123 if (minlen + offset < minlen ||
124 minlen + offset > pci_resource_len(dev, bar)) {
126 "virtio_pci: map virtio %zu@%u "
127 "out of range on bar %i length %lu\n",
129 bar, (unsigned long)pci_resource_len(dev, bar));
133 p = pci_iomap_range(dev, bar, offset, length);
136 "virtio_pci: unable to map virtio %u@%u on bar %i\n",
137 length, offset, bar);
141 /* virtio config->get_features() implementation */
142 static u64 vp_get_features(struct virtio_device *vdev)
144 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
147 vp_iowrite32(0, &vp_dev->common->device_feature_select);
148 features = vp_ioread32(&vp_dev->common->device_feature);
149 vp_iowrite32(1, &vp_dev->common->device_feature_select);
150 features |= ((u64)vp_ioread32(&vp_dev->common->device_feature) << 32);
155 /* virtio config->finalize_features() implementation */
156 static int vp_finalize_features(struct virtio_device *vdev)
158 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
160 /* Give virtio_ring a chance to accept features. */
161 vring_transport_features(vdev);
163 if (!__virtio_test_bit(vdev, VIRTIO_F_VERSION_1)) {
164 dev_err(&vdev->dev, "virtio: device uses modern interface "
165 "but does not have VIRTIO_F_VERSION_1\n");
169 vp_iowrite32(0, &vp_dev->common->guest_feature_select);
170 vp_iowrite32((u32)vdev->features, &vp_dev->common->guest_feature);
171 vp_iowrite32(1, &vp_dev->common->guest_feature_select);
172 vp_iowrite32(vdev->features >> 32, &vp_dev->common->guest_feature);
177 /* virtio config->get() implementation */
178 static void vp_get(struct virtio_device *vdev, unsigned offset,
179 void *buf, unsigned len)
181 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
186 BUG_ON(offset + len > vp_dev->device_len);
190 b = ioread8(vp_dev->device + offset);
191 memcpy(buf, &b, sizeof b);
194 w = cpu_to_le16(ioread16(vp_dev->device + offset));
195 memcpy(buf, &w, sizeof w);
198 l = cpu_to_le32(ioread32(vp_dev->device + offset));
199 memcpy(buf, &l, sizeof l);
202 l = cpu_to_le32(ioread32(vp_dev->device + offset));
203 memcpy(buf, &l, sizeof l);
204 l = cpu_to_le32(ioread32(vp_dev->device + offset + sizeof l));
205 memcpy(buf + sizeof l, &l, sizeof l);
212 /* the config->set() implementation. it's symmetric to the config->get()
214 static void vp_set(struct virtio_device *vdev, unsigned offset,
215 const void *buf, unsigned len)
217 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
222 BUG_ON(offset + len > vp_dev->device_len);
226 memcpy(&b, buf, sizeof b);
227 iowrite8(b, vp_dev->device + offset);
230 memcpy(&w, buf, sizeof w);
231 iowrite16(le16_to_cpu(w), vp_dev->device + offset);
234 memcpy(&l, buf, sizeof l);
235 iowrite32(le32_to_cpu(l), vp_dev->device + offset);
238 memcpy(&l, buf, sizeof l);
239 iowrite32(le32_to_cpu(l), vp_dev->device + offset);
240 memcpy(&l, buf + sizeof l, sizeof l);
241 iowrite32(le32_to_cpu(l), vp_dev->device + offset + sizeof l);
248 static u32 vp_generation(struct virtio_device *vdev)
250 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
251 return vp_ioread8(&vp_dev->common->config_generation);
254 /* config->{get,set}_status() implementations */
255 static u8 vp_get_status(struct virtio_device *vdev)
257 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
258 return vp_ioread8(&vp_dev->common->device_status);
261 static void vp_set_status(struct virtio_device *vdev, u8 status)
263 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
264 /* We should never be setting status to 0. */
266 vp_iowrite8(status, &vp_dev->common->device_status);
269 static void vp_reset(struct virtio_device *vdev)
271 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
272 /* 0 status means a reset. */
273 vp_iowrite8(0, &vp_dev->common->device_status);
274 /* Flush out the status write, and flush in device writes,
275 * including MSI-X interrupts, if any. */
276 vp_ioread8(&vp_dev->common->device_status);
277 /* Flush pending VQ/configuration callbacks. */
278 vp_synchronize_vectors(vdev);
281 static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
283 /* Setup the vector used for configuration events */
284 vp_iowrite16(vector, &vp_dev->common->msix_config);
285 /* Verify we had enough resources to assign the vector */
286 /* Will also flush the write out to device */
287 return vp_ioread16(&vp_dev->common->msix_config);
290 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
291 struct virtio_pci_vq_info *info,
293 void (*callback)(struct virtqueue *vq),
297 struct virtio_pci_common_cfg __iomem *cfg = vp_dev->common;
298 struct virtqueue *vq;
302 if (index >= vp_ioread16(&cfg->num_queues))
303 return ERR_PTR(-ENOENT);
305 /* Select the queue we're interested in */
306 vp_iowrite16(index, &cfg->queue_select);
308 /* Check if queue is either not available or already active. */
309 num = vp_ioread16(&cfg->queue_size);
310 if (!num || vp_ioread16(&cfg->queue_enable))
311 return ERR_PTR(-ENOENT);
313 if (num & (num - 1)) {
314 dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", num);
315 return ERR_PTR(-EINVAL);
318 /* get offset of notification word for this vq */
319 off = vp_ioread16(&cfg->queue_notify_off);
321 info->msix_vector = msix_vec;
323 /* create the vring */
324 vq = vring_create_virtqueue(index, num,
325 SMP_CACHE_BYTES, &vp_dev->vdev,
326 true, true, vp_notify, callback, name);
328 return ERR_PTR(-ENOMEM);
330 /* activate the queue */
331 vp_iowrite16(virtqueue_get_vring_size(vq), &cfg->queue_size);
332 vp_iowrite64_twopart(virtqueue_get_desc_addr(vq),
333 &cfg->queue_desc_lo, &cfg->queue_desc_hi);
334 vp_iowrite64_twopart(virtqueue_get_avail_addr(vq),
335 &cfg->queue_avail_lo, &cfg->queue_avail_hi);
336 vp_iowrite64_twopart(virtqueue_get_used_addr(vq),
337 &cfg->queue_used_lo, &cfg->queue_used_hi);
339 if (vp_dev->notify_base) {
340 /* offset should not wrap */
341 if ((u64)off * vp_dev->notify_offset_multiplier + 2
342 > vp_dev->notify_len) {
343 dev_warn(&vp_dev->pci_dev->dev,
344 "bad notification offset %u (x %u) "
345 "for queue %u > %zd",
346 off, vp_dev->notify_offset_multiplier,
347 index, vp_dev->notify_len);
351 vq->priv = (void __force *)vp_dev->notify_base +
352 off * vp_dev->notify_offset_multiplier;
354 vq->priv = (void __force *)map_capability(vp_dev->pci_dev,
355 vp_dev->notify_map_cap, 2, 2,
356 off * vp_dev->notify_offset_multiplier, 2,
365 if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
366 vp_iowrite16(msix_vec, &cfg->queue_msix_vector);
367 msix_vec = vp_ioread16(&cfg->queue_msix_vector);
368 if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
370 goto err_assign_vector;
377 if (!vp_dev->notify_base)
378 pci_iounmap(vp_dev->pci_dev, (void __iomem __force *)vq->priv);
380 vring_del_virtqueue(vq);
384 static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
385 struct virtqueue *vqs[],
386 vq_callback_t *callbacks[],
387 const char * const names[])
389 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
390 struct virtqueue *vq;
391 int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names);
396 /* Select and activate all queues. Has to be done last: once we do
397 * this, there's no way to go back except reset.
399 list_for_each_entry(vq, &vdev->vqs, list) {
400 vp_iowrite16(vq->index, &vp_dev->common->queue_select);
401 vp_iowrite16(1, &vp_dev->common->queue_enable);
407 static void del_vq(struct virtio_pci_vq_info *info)
409 struct virtqueue *vq = info->vq;
410 struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
412 vp_iowrite16(vq->index, &vp_dev->common->queue_select);
414 if (vp_dev->msix_enabled) {
415 vp_iowrite16(VIRTIO_MSI_NO_VECTOR,
416 &vp_dev->common->queue_msix_vector);
417 /* Flush the write out to device */
418 vp_ioread16(&vp_dev->common->queue_msix_vector);
421 if (!vp_dev->notify_base)
422 pci_iounmap(vp_dev->pci_dev, (void __force __iomem *)vq->priv);
424 vring_del_virtqueue(vq);
427 static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
430 .generation = vp_generation,
431 .get_status = vp_get_status,
432 .set_status = vp_set_status,
434 .find_vqs = vp_modern_find_vqs,
435 .del_vqs = vp_del_vqs,
436 .get_features = vp_get_features,
437 .finalize_features = vp_finalize_features,
438 .bus_name = vp_bus_name,
439 .set_vq_affinity = vp_set_vq_affinity,
442 static const struct virtio_config_ops virtio_pci_config_ops = {
445 .generation = vp_generation,
446 .get_status = vp_get_status,
447 .set_status = vp_set_status,
449 .find_vqs = vp_modern_find_vqs,
450 .del_vqs = vp_del_vqs,
451 .get_features = vp_get_features,
452 .finalize_features = vp_finalize_features,
453 .bus_name = vp_bus_name,
454 .set_vq_affinity = vp_set_vq_affinity,
458 * virtio_pci_find_capability - walk capabilities to find device info.
459 * @dev: the pci device
460 * @cfg_type: the VIRTIO_PCI_CAP_* value we seek
461 * @ioresource_types: IORESOURCE_MEM and/or IORESOURCE_IO.
463 * Returns offset of the capability, or 0.
465 static inline int virtio_pci_find_capability(struct pci_dev *dev, u8 cfg_type,
466 u32 ioresource_types, int *bars)
470 for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR);
472 pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) {
474 pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
477 pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
481 /* Ignore structures with reserved BAR values */
485 if (type == cfg_type) {
486 if (pci_resource_len(dev, bar) &&
487 pci_resource_flags(dev, bar) & ioresource_types) {
496 /* This is part of the ABI. Don't screw with it. */
497 static inline void check_offsets(void)
499 /* Note: disk space was harmed in compilation of this function. */
500 BUILD_BUG_ON(VIRTIO_PCI_CAP_VNDR !=
501 offsetof(struct virtio_pci_cap, cap_vndr));
502 BUILD_BUG_ON(VIRTIO_PCI_CAP_NEXT !=
503 offsetof(struct virtio_pci_cap, cap_next));
504 BUILD_BUG_ON(VIRTIO_PCI_CAP_LEN !=
505 offsetof(struct virtio_pci_cap, cap_len));
506 BUILD_BUG_ON(VIRTIO_PCI_CAP_CFG_TYPE !=
507 offsetof(struct virtio_pci_cap, cfg_type));
508 BUILD_BUG_ON(VIRTIO_PCI_CAP_BAR !=
509 offsetof(struct virtio_pci_cap, bar));
510 BUILD_BUG_ON(VIRTIO_PCI_CAP_OFFSET !=
511 offsetof(struct virtio_pci_cap, offset));
512 BUILD_BUG_ON(VIRTIO_PCI_CAP_LENGTH !=
513 offsetof(struct virtio_pci_cap, length));
514 BUILD_BUG_ON(VIRTIO_PCI_NOTIFY_CAP_MULT !=
515 offsetof(struct virtio_pci_notify_cap,
516 notify_off_multiplier));
517 BUILD_BUG_ON(VIRTIO_PCI_COMMON_DFSELECT !=
518 offsetof(struct virtio_pci_common_cfg,
519 device_feature_select));
520 BUILD_BUG_ON(VIRTIO_PCI_COMMON_DF !=
521 offsetof(struct virtio_pci_common_cfg, device_feature));
522 BUILD_BUG_ON(VIRTIO_PCI_COMMON_GFSELECT !=
523 offsetof(struct virtio_pci_common_cfg,
524 guest_feature_select));
525 BUILD_BUG_ON(VIRTIO_PCI_COMMON_GF !=
526 offsetof(struct virtio_pci_common_cfg, guest_feature));
527 BUILD_BUG_ON(VIRTIO_PCI_COMMON_MSIX !=
528 offsetof(struct virtio_pci_common_cfg, msix_config));
529 BUILD_BUG_ON(VIRTIO_PCI_COMMON_NUMQ !=
530 offsetof(struct virtio_pci_common_cfg, num_queues));
531 BUILD_BUG_ON(VIRTIO_PCI_COMMON_STATUS !=
532 offsetof(struct virtio_pci_common_cfg, device_status));
533 BUILD_BUG_ON(VIRTIO_PCI_COMMON_CFGGENERATION !=
534 offsetof(struct virtio_pci_common_cfg, config_generation));
535 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_SELECT !=
536 offsetof(struct virtio_pci_common_cfg, queue_select));
537 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_SIZE !=
538 offsetof(struct virtio_pci_common_cfg, queue_size));
539 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_MSIX !=
540 offsetof(struct virtio_pci_common_cfg, queue_msix_vector));
541 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_ENABLE !=
542 offsetof(struct virtio_pci_common_cfg, queue_enable));
543 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_NOFF !=
544 offsetof(struct virtio_pci_common_cfg, queue_notify_off));
545 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_DESCLO !=
546 offsetof(struct virtio_pci_common_cfg, queue_desc_lo));
547 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_DESCHI !=
548 offsetof(struct virtio_pci_common_cfg, queue_desc_hi));
549 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_AVAILLO !=
550 offsetof(struct virtio_pci_common_cfg, queue_avail_lo));
551 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_AVAILHI !=
552 offsetof(struct virtio_pci_common_cfg, queue_avail_hi));
553 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDLO !=
554 offsetof(struct virtio_pci_common_cfg, queue_used_lo));
555 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDHI !=
556 offsetof(struct virtio_pci_common_cfg, queue_used_hi));
559 /* the PCI probing function */
560 int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
562 struct pci_dev *pci_dev = vp_dev->pci_dev;
563 int err, common, isr, notify, device;
569 /* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */
570 if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f)
573 if (pci_dev->device < 0x1040) {
574 /* Transitional devices: use the PCI subsystem device id as
575 * virtio device id, same as legacy driver always did.
577 vp_dev->vdev.id.device = pci_dev->subsystem_device;
579 /* Modern devices: simply use PCI device id, but start from 0x1040. */
580 vp_dev->vdev.id.device = pci_dev->device - 0x1040;
582 vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
584 /* check for a common config: if not, use legacy mode (bar 0). */
585 common = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG,
586 IORESOURCE_IO | IORESOURCE_MEM,
587 &vp_dev->modern_bars);
589 dev_info(&pci_dev->dev,
590 "virtio_pci: leaving for legacy driver\n");
594 /* If common is there, these should be too... */
595 isr = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_ISR_CFG,
596 IORESOURCE_IO | IORESOURCE_MEM,
597 &vp_dev->modern_bars);
598 notify = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_NOTIFY_CFG,
599 IORESOURCE_IO | IORESOURCE_MEM,
600 &vp_dev->modern_bars);
601 if (!isr || !notify) {
602 dev_err(&pci_dev->dev,
603 "virtio_pci: missing capabilities %i/%i/%i\n",
604 common, isr, notify);
608 err = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64));
610 err = dma_set_mask_and_coherent(&pci_dev->dev,
613 dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n");
615 /* Device capability is only mandatory for devices that have
616 * device-specific configuration.
618 device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG,
619 IORESOURCE_IO | IORESOURCE_MEM,
620 &vp_dev->modern_bars);
622 err = pci_request_selected_regions(pci_dev, vp_dev->modern_bars,
623 "virtio-pci-modern");
628 vp_dev->common = map_capability(pci_dev, common,
629 sizeof(struct virtio_pci_common_cfg), 4,
630 0, sizeof(struct virtio_pci_common_cfg),
634 vp_dev->isr = map_capability(pci_dev, isr, sizeof(u8), 1,
640 /* Read notify_off_multiplier from config space. */
641 pci_read_config_dword(pci_dev,
642 notify + offsetof(struct virtio_pci_notify_cap,
643 notify_off_multiplier),
644 &vp_dev->notify_offset_multiplier);
645 /* Read notify length and offset from config space. */
646 pci_read_config_dword(pci_dev,
647 notify + offsetof(struct virtio_pci_notify_cap,
651 pci_read_config_dword(pci_dev,
652 notify + offsetof(struct virtio_pci_notify_cap,
656 /* We don't know how many VQs we'll map, ahead of the time.
657 * If notify length is small, map it all now.
658 * Otherwise, map each VQ individually later.
660 if ((u64)notify_length + (notify_offset % PAGE_SIZE) <= PAGE_SIZE) {
661 vp_dev->notify_base = map_capability(pci_dev, notify, 2, 2,
663 &vp_dev->notify_len);
664 if (!vp_dev->notify_base)
667 vp_dev->notify_map_cap = notify;
670 /* Again, we don't know how much we should map, but PAGE_SIZE
671 * is more than enough for all existing devices.
674 vp_dev->device = map_capability(pci_dev, device, 0, 4,
676 &vp_dev->device_len);
680 vp_dev->vdev.config = &virtio_pci_config_ops;
682 vp_dev->vdev.config = &virtio_pci_config_nodev_ops;
685 vp_dev->config_vector = vp_config_vector;
686 vp_dev->setup_vq = setup_vq;
687 vp_dev->del_vq = del_vq;
692 if (vp_dev->notify_base)
693 pci_iounmap(pci_dev, vp_dev->notify_base);
695 pci_iounmap(pci_dev, vp_dev->isr);
697 pci_iounmap(pci_dev, vp_dev->common);
702 void virtio_pci_modern_remove(struct virtio_pci_device *vp_dev)
704 struct pci_dev *pci_dev = vp_dev->pci_dev;
707 pci_iounmap(pci_dev, vp_dev->device);
708 if (vp_dev->notify_base)
709 pci_iounmap(pci_dev, vp_dev->notify_base);
710 pci_iounmap(pci_dev, vp_dev->isr);
711 pci_iounmap(pci_dev, vp_dev->common);
712 pci_release_selected_regions(pci_dev, vp_dev->modern_bars);