diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-12 09:31:52 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-12 09:31:52 -0700 |
commit | 16ffc3eeaa00d513b0076b7b2b96419f28acc912 (patch) | |
tree | 82503d25fb9091fc6628961d953ffc0d305adf27 /drivers/virtio | |
parent | c34752bc8b3196aee3115d1aa41972604ab0aea8 (diff) | |
parent | e3353853730eb99c56b7b0aed1667d51c0e3699a (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-virtio
* git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-virtio:
virtio: enhance id_matching for virtio drivers
virtio: fix id_matching for virtio drivers
virtio: handle short buffers in virtio_rng.
virtio_blk: add missing __dev{init,exit} markings
virtio: indirect ring entries (VIRTIO_RING_F_INDIRECT_DESC)
virtio: teach virtio_has_feature() about transport features
virtio: expose features in sysfs
virtio_pci: optional MSI-X support
virtio_pci: split up vp_interrupt
virtio: find_vqs/del_vqs virtio operations
virtio: add names to virtqueue struct, mapping from devices to queues.
virtio: meet virtio spec by finalizing features before using device
virtio: fix obsolete documentation on probe function
Diffstat (limited to 'drivers/virtio')
-rw-r--r-- | drivers/virtio/virtio.c | 29 | ||||
-rw-r--r-- | drivers/virtio/virtio_balloon.c | 27 | ||||
-rw-r--r-- | drivers/virtio/virtio_pci.c | 307 | ||||
-rw-r--r-- | drivers/virtio/virtio_ring.c | 102 |
4 files changed, 396 insertions, 69 deletions
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 018c070a357..3a43ebf83a4 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -31,21 +31,37 @@ static ssize_t modalias_show(struct device *_d, return sprintf(buf, "virtio:d%08Xv%08X\n", dev->id.device, dev->id.vendor); } +static ssize_t features_show(struct device *_d, + struct device_attribute *attr, char *buf) +{ + struct virtio_device *dev = container_of(_d, struct virtio_device, dev); + unsigned int i; + ssize_t len = 0; + + /* We actually represent this as a bitstring, as it could be + * arbitrary length in future. */ + for (i = 0; i < ARRAY_SIZE(dev->features)*BITS_PER_LONG; i++) + len += sprintf(buf+len, "%c", + test_bit(i, dev->features) ? '1' : '0'); + len += sprintf(buf+len, "\n"); + return len; +} static struct device_attribute virtio_dev_attrs[] = { __ATTR_RO(device), __ATTR_RO(vendor), __ATTR_RO(status), __ATTR_RO(modalias), + __ATTR_RO(features), __ATTR_NULL }; static inline int virtio_id_match(const struct virtio_device *dev, const struct virtio_device_id *id) { - if (id->device != dev->id.device) + if (id->device != dev->id.device && id->device != VIRTIO_DEV_ANY_ID) return 0; - return id->vendor == VIRTIO_DEV_ANY_ID || id->vendor != dev->id.vendor; + return id->vendor == VIRTIO_DEV_ANY_ID || id->vendor == dev->id.vendor; } /* This looks through all the IDs a driver claims to support. If any of them @@ -118,13 +134,14 @@ static int virtio_dev_probe(struct device *_d) if (device_features & (1 << i)) set_bit(i, dev->features); + dev->config->finalize_features(dev); + err = drv->probe(dev); if (err) add_status(dev, VIRTIO_CONFIG_S_FAILED); - else { - dev->config->finalize_features(dev); + else add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); - } + return err; } @@ -185,6 +202,8 @@ int register_virtio_device(struct virtio_device *dev) /* Acknowledge that we've seen the device. */ add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); + INIT_LIST_HEAD(&dev->vqs); + /* device_register() causes the bus infrastructure to look for a * matching driver. */ err = device_register(&dev->dev); diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 9c76a061a04..26b27826479 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -204,6 +204,9 @@ static int balloon(void *_vballoon) static int virtballoon_probe(struct virtio_device *vdev) { struct virtio_balloon *vb; + struct virtqueue *vqs[2]; + vq_callback_t *callbacks[] = { balloon_ack, balloon_ack }; + const char *names[] = { "inflate", "deflate" }; int err; vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL); @@ -218,22 +221,17 @@ static int virtballoon_probe(struct virtio_device *vdev) vb->vdev = vdev; /* We expect two virtqueues. */ - vb->inflate_vq = vdev->config->find_vq(vdev, 0, balloon_ack); - if (IS_ERR(vb->inflate_vq)) { - err = PTR_ERR(vb->inflate_vq); + err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names); + if (err) goto out_free_vb; - } - vb->deflate_vq = vdev->config->find_vq(vdev, 1, balloon_ack); - if (IS_ERR(vb->deflate_vq)) { - err = PTR_ERR(vb->deflate_vq); - goto out_del_inflate_vq; - } + vb->inflate_vq = vqs[0]; + vb->deflate_vq = vqs[1]; vb->thread = kthread_run(balloon, vb, "vballoon"); if (IS_ERR(vb->thread)) { err = PTR_ERR(vb->thread); - goto out_del_deflate_vq; + goto out_del_vqs; } vb->tell_host_first @@ -241,10 +239,8 @@ static int virtballoon_probe(struct virtio_device *vdev) return 0; -out_del_deflate_vq: - vdev->config->del_vq(vb->deflate_vq); -out_del_inflate_vq: - vdev->config->del_vq(vb->inflate_vq); +out_del_vqs: + vdev->config->del_vqs(vdev); out_free_vb: kfree(vb); out: @@ -264,8 +260,7 @@ static void virtballoon_remove(struct virtio_device *vdev) /* Now we reset the device so we can clean up the queues. */ vdev->config->reset(vdev); - vdev->config->del_vq(vb->deflate_vq); - vdev->config->del_vq(vb->inflate_vq); + vdev->config->del_vqs(vdev); kfree(vb); } diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index 330aacbdec1..193c8f0e5cc 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -42,6 +42,26 @@ struct virtio_pci_device /* a list of queues so we can dispatch IRQs */ spinlock_t lock; struct list_head virtqueues; + + /* MSI-X support */ + int msix_enabled; + int intx_enabled; + struct msix_entry *msix_entries; + /* Name strings for interrupts. This size should be enough, + * and I'm too lazy to allocate each name separately. */ + char (*msix_names)[256]; + /* Number of available vectors */ + unsigned msix_vectors; + /* Vectors allocated */ + unsigned msix_used_vectors; +}; + +/* Constants for MSI-X */ +/* Use first vector for configuration changes, second and the rest for + * virtqueues Thus, we need at least 2 vectors for MSI. */ +enum { + VP_MSIX_CONFIG_VECTOR = 0, + VP_MSIX_VQ_VECTOR = 1, }; struct virtio_pci_vq_info @@ -60,6 +80,9 @@ struct virtio_pci_vq_info /* the list node for the virtqueues list */ struct list_head node; + + /* MSI-X vector (or none) */ + unsigned vector; }; /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */ @@ -109,7 +132,8 @@ static void vp_get(struct virtio_device *vdev, unsigned offset, void *buf, unsigned len) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - void __iomem *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset; + void __iomem *ioaddr = vp_dev->ioaddr + + VIRTIO_PCI_CONFIG(vp_dev) + offset; u8 *ptr = buf; int i; @@ -123,7 +147,8 @@ static void vp_set(struct virtio_device *vdev, unsigned offset, const void *buf, unsigned len) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - void __iomem *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset; + void __iomem *ioaddr = vp_dev->ioaddr + + VIRTIO_PCI_CONFIG(vp_dev) + offset; const u8 *ptr = buf; int i; @@ -164,6 +189,37 @@ static void vp_notify(struct virtqueue *vq) iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY); } +/* Handle a configuration change: Tell driver if it wants to know. */ +static irqreturn_t vp_config_changed(int irq, void *opaque) +{ + struct virtio_pci_device *vp_dev = opaque; + struct virtio_driver *drv; + drv = container_of(vp_dev->vdev.dev.driver, + struct virtio_driver, driver); + + if (drv && drv->config_changed) + drv->config_changed(&vp_dev->vdev); + return IRQ_HANDLED; +} + +/* Notify all virtqueues on an interrupt. */ +static irqreturn_t vp_vring_interrupt(int irq, void *opaque) +{ + struct virtio_pci_device *vp_dev = opaque; + struct virtio_pci_vq_info *info; + irqreturn_t ret = IRQ_NONE; + unsigned long flags; + + spin_lock_irqsave(&vp_dev->lock, flags); + list_for_each_entry(info, &vp_dev->virtqueues, node) { + if (vring_interrupt(irq, info->vq) == IRQ_HANDLED) + ret = IRQ_HANDLED; + } + spin_unlock_irqrestore(&vp_dev->lock, flags); + + return ret; +} + /* A small wrapper to also acknowledge the interrupt when it's handled. * I really need an EIO hook for the vring so I can ack the interrupt once we * know that we'll be handling the IRQ but before we invoke the callback since @@ -173,9 +229,6 @@ static void vp_notify(struct virtqueue *vq) static irqreturn_t vp_interrupt(int irq, void *opaque) { struct virtio_pci_device *vp_dev = opaque; - struct virtio_pci_vq_info *info; - irqreturn_t ret = IRQ_NONE; - unsigned long flags; u8 isr; /* reading the ISR has the effect of also clearing it so it's very @@ -187,34 +240,137 @@ static irqreturn_t vp_interrupt(int irq, void *opaque) return IRQ_NONE; /* Configuration change? Tell driver if it wants to know. */ - if (isr & VIRTIO_PCI_ISR_CONFIG) { - struct virtio_driver *drv; - drv = container_of(vp_dev->vdev.dev.driver, - struct virtio_driver, driver); + if (isr & VIRTIO_PCI_ISR_CONFIG) + vp_config_changed(irq, opaque); - if (drv && drv->config_changed) - drv->config_changed(&vp_dev->vdev); + return vp_vring_interrupt(irq, opaque); +} + +static void vp_free_vectors(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + int i; + + if (vp_dev->intx_enabled) { + free_irq(vp_dev->pci_dev->irq, vp_dev); + vp_dev->intx_enabled = 0; } - spin_lock_irqsave(&vp_dev->lock, flags); - list_for_each_entry(info, &vp_dev->virtqueues, node) { - if (vring_interrupt(irq, info->vq) == IRQ_HANDLED) - ret = IRQ_HANDLED; + for (i = 0; i < vp_dev->msix_used_vectors; ++i) + free_irq(vp_dev->msix_entries[i].vector, vp_dev); + vp_dev->msix_used_vectors = 0; + + if (vp_dev->msix_enabled) { + /* Disable the vector used for configuration */ + iowrite16(VIRTIO_MSI_NO_VECTOR, + vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); + /* Flush the write out to device */ + ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); + + vp_dev->msix_enabled = 0; + pci_disable_msix(vp_dev->pci_dev); } - spin_unlock_irqrestore(&vp_dev->lock, flags); +} - return ret; +static int vp_enable_msix(struct pci_dev *dev, struct msix_entry *entries, + int *options, int noptions) +{ + int i; + for (i = 0; i < noptions; ++i) + if (!pci_enable_msix(dev, entries, options[i])) + return options[i]; + return -EBUSY; +} + +static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + const char *name = dev_name(&vp_dev->vdev.dev); + unsigned i, v; + int err = -ENOMEM; + /* We want at most one vector per queue and one for config changes. + * Fallback to separate vectors for config and a shared for queues. + * Finally fall back to regular interrupts. */ + int options[] = { max_vqs + 1, 2 }; + int nvectors = max(options[0], options[1]); + + vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries, + GFP_KERNEL); + if (!vp_dev->msix_entries) + goto error_entries; + vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names, + GFP_KERNEL); + if (!vp_dev->msix_names) + goto error_names; + + for (i = 0; i < nvectors; ++i) + vp_dev->msix_entries[i].entry = i; + + err = vp_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries, + options, ARRAY_SIZE(options)); + if (err < 0) { + /* Can't allocate enough MSI-X vectors, use regular interrupt */ + vp_dev->msix_vectors = 0; + err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, + IRQF_SHARED, name, vp_dev); + if (err) + goto error_irq; + vp_dev->intx_enabled = 1; + } else { + vp_dev->msix_vectors = err; + vp_dev->msix_enabled = 1; + + /* Set the vector used for configuration */ + v = vp_dev->msix_used_vectors; + snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, + "%s-config", name); + err = request_irq(vp_dev->msix_entries[v].vector, + vp_config_changed, 0, vp_dev->msix_names[v], + vp_dev); + if (err) + goto error_irq; + ++vp_dev->msix_used_vectors; + + iowrite16(v, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); + /* Verify we had enough resources to assign the vector */ + v = ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); + if (v == VIRTIO_MSI_NO_VECTOR) { + err = -EBUSY; + goto error_irq; + } + } + + if (vp_dev->msix_vectors && vp_dev->msix_vectors != max_vqs + 1) { + /* Shared vector for all VQs */ + v = vp_dev->msix_used_vectors; + snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, + "%s-virtqueues", name); + err = request_irq(vp_dev->msix_entries[v].vector, + vp_vring_interrupt, 0, vp_dev->msix_names[v], + vp_dev); + if (err) + goto error_irq; + ++vp_dev->msix_used_vectors; + } + return 0; +error_irq: + vp_free_vectors(vdev); + kfree(vp_dev->msix_names); +error_names: + kfree(vp_dev->msix_entries); +error_entries: + return err; } -/* the config->find_vq() implementation */ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, - void (*callback)(struct virtqueue *vq)) + void (*callback)(struct virtqueue *vq), + const char *name) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info; struct virtqueue *vq; unsigned long flags, size; - u16 num; + u16 num, vector; int err; /* Select the queue we're interested in */ @@ -233,6 +389,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, info->queue_index = index; info->num = num; + info->vector = VIRTIO_MSI_NO_VECTOR; size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN)); info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO); @@ -247,7 +404,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, /* create the vring */ vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN, - vdev, info->queue, vp_notify, callback); + vdev, info->queue, vp_notify, callback, name); if (!vq) { err = -ENOMEM; goto out_activate_queue; @@ -256,12 +413,43 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, vq->priv = info; info->vq = vq; + /* allocate per-vq vector if available and necessary */ + if (callback && vp_dev->msix_used_vectors < vp_dev->msix_vectors) { + vector = vp_dev->msix_used_vectors; + snprintf(vp_dev->msix_names[vector], sizeof *vp_dev->msix_names, + "%s-%s", dev_name(&vp_dev->vdev.dev), name); + err = request_irq(vp_dev->msix_entries[vector].vector, + vring_interrupt, 0, + vp_dev->msix_names[vector], vq); + if (err) + goto out_request_irq; + info->vector = vector; + ++vp_dev->msix_used_vectors; + } else + vector = VP_MSIX_VQ_VECTOR; + + if (callback && vp_dev->msix_enabled) { + iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); + vector = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); + if (vector == VIRTIO_MSI_NO_VECTOR) { + err = -EBUSY; + goto out_assign; + } + } + spin_lock_irqsave(&vp_dev->lock, flags); list_add(&info->node, &vp_dev->virtqueues); spin_unlock_irqrestore(&vp_dev->lock, flags); return vq; +out_assign: + if (info->vector != VIRTIO_MSI_NO_VECTOR) { + free_irq(vp_dev->msix_entries[info->vector].vector, vq); + --vp_dev->msix_used_vectors; + } +out_request_irq: + vring_del_virtqueue(vq); out_activate_queue: iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); free_pages_exact(info->queue, size); @@ -270,21 +458,27 @@ out_info: return ERR_PTR(err); } -/* the config->del_vq() implementation */ static void vp_del_vq(struct virtqueue *vq) { struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); struct virtio_pci_vq_info *info = vq->priv; - unsigned long flags, size; + unsigned long size; - spin_lock_irqsave(&vp_dev->lock, flags); - list_del(&info->node); - spin_unlock_irqrestore(&vp_dev->lock, flags); + iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); + + if (info->vector != VIRTIO_MSI_NO_VECTOR) + free_irq(vp_dev->msix_entries[info->vector].vector, vq); + + if (vp_dev->msix_enabled) { + iowrite16(VIRTIO_MSI_NO_VECTOR, + vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); + /* Flush the write out to device */ + ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR); + } vring_del_virtqueue(vq); /* Select and deactivate the queue */ - iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN)); @@ -292,14 +486,57 @@ static void vp_del_vq(struct virtqueue *vq) kfree(info); } +/* the config->del_vqs() implementation */ +static void vp_del_vqs(struct virtio_device *vdev) +{ + struct virtqueue *vq, *n; + + list_for_each_entry_safe(vq, n, &vdev->vqs, list) + vp_del_vq(vq); + + vp_free_vectors(vdev); +} + +/* the config->find_vqs() implementation */ +static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char *names[]) +{ + int vectors = 0; + int i, err; + + /* How many vectors would we like? */ + for (i = 0; i < nvqs; ++i) + if (callbacks[i]) + ++vectors; + + err = vp_request_vectors(vdev, vectors); + if (err) + goto error_request; + + for (i = 0; i < nvqs; ++i) { + vqs[i] = vp_find_vq(vdev, i, callbacks[i], names[i]); + if (IS_ERR(vqs[i])) + goto error_find; + } + return 0; + +error_find: + vp_del_vqs(vdev); + +error_request: + return PTR_ERR(vqs[i]); +} + static struct virtio_config_ops virtio_pci_config_ops = { .get = vp_get, .set = vp_set, .get_status = vp_get_status, .set_status = vp_set_status, .reset = vp_reset, - .find_vq = vp_find_vq, - .del_vq = vp_del_vq, + .find_vqs = vp_find_vqs, + .del_vqs = vp_del_vqs, .get_features = vp_get_features, .finalize_features = vp_finalize_features, }; @@ -310,7 +547,7 @@ static void virtio_pci_release_dev(struct device *_d) struct virtio_pci_device *vp_dev = to_vp_device(dev); struct pci_dev *pci_dev = vp_dev->pci_dev; - free_irq(pci_dev->irq, vp_dev); + vp_del_vqs(dev); pci_set_drvdata(pci_dev, NULL); pci_iounmap(pci_dev, vp_dev->ioaddr); pci_release_regions(pci_dev); @@ -369,21 +606,13 @@ static int __devinit virtio_pci_probe(struct pci_dev *pci_dev, vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor; vp_dev->vdev.id.device = pci_dev->subsystem_device; - /* register a handler for the queue with the PCI device's interrupt */ - err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, - dev_name(&vp_dev->vdev.dev), vp_dev); - if (err) - goto out_set_drvdata; - /* finally register the virtio device */ err = register_virtio_device(&vp_dev->vdev); if (err) - goto out_req_irq; + goto out_set_drvdata; return 0; -out_req_irq: - free_irq(pci_dev->irq, vp_dev); out_set_drvdata: pci_set_drvdata(pci_dev, NULL); pci_iounmap(pci_dev, vp_dev->ioaddr); diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 5c52369ab9b..a882f260651 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -23,21 +23,30 @@ #ifdef DEBUG /* For development, we want to crash whenever the ring is screwed. */ -#define BAD_RING(_vq, fmt...) \ - do { dev_err(&(_vq)->vq.vdev->dev, fmt); BUG(); } while(0) +#define BAD_RING(_vq, fmt, args...) \ + do { \ + dev_err(&(_vq)->vq.vdev->dev, \ + "%s:"fmt, (_vq)->vq.name, ##args); \ + BUG(); \ + } while (0) /* Caller is supposed to guarantee no reentry. */ #define START_USE(_vq) \ do { \ if ((_vq)->in_use) \ - panic("in_use = %i\n", (_vq)->in_use); \ + panic("%s:in_use = %i\n", \ + (_vq)->vq.name, (_vq)->in_use); \ (_vq)->in_use = __LINE__; \ mb(); \ - } while(0) + } while (0) #define END_USE(_vq) \ do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; mb(); } while(0) #else -#define BAD_RING(_vq, fmt...) \ - do { dev_err(&_vq->vq.vdev->dev, fmt); (_vq)->broken = true; } while(0) +#define BAD_RING(_vq, fmt, args...) \ + do { \ + dev_err(&_vq->vq.vdev->dev, \ + "%s:"fmt, (_vq)->vq.name, ##args); \ + (_vq)->broken = true; \ + } while (0) #define START_USE(vq) #define END_USE(vq) #endif @@ -52,6 +61,9 @@ struct vring_virtqueue /* Other side has made a mess, don't try any more. */ bool broken; + /* Host supports indirect buffers */ + bool indirect; + /* Number of free buffers */ unsigned int num_free; /* Head of free buffer list. */ @@ -76,6 +88,55 @@ struct vring_virtqueue #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) +/* Set up an indirect table of descriptors and add it to the queue. */ +static int vring_add_indirect(struct vring_virtqueue *vq, + struct scatterlist sg[], + unsigned int out, + unsigned int in) +{ + struct vring_desc *desc; + unsigned head; + int i; + + desc = kmalloc((out + in) * sizeof(struct vring_desc), GFP_ATOMIC); + if (!desc) + return vq->vring.num; + + /* Transfer entries from the sg list into the indirect page */ + for (i = 0; i < out; i++) { + desc[i].flags = VRING_DESC_F_NEXT; + desc[i].addr = sg_phys(sg); + desc[i].len = sg->length; + desc[i].next = i+1; + sg++; + } + for (; i < (out + in); i++) { + desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; + desc[i].addr = sg_phys(sg); + desc[i].len = sg->length; + desc[i].next = i+1; + sg++; + } + + /* Last one doesn't continue. */ + desc[i-1].flags &= ~VRING_DESC_F_NEXT; + desc[i-1].next = 0; + + /* We're about to use a buffer */ + vq->num_free--; + + /* Use a single buffer which doesn't continue */ + head = vq->free_head; + vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT; + vq->vring.desc[head].addr = virt_to_phys(desc); + vq->vring.desc[head].len = i * sizeof(struct vring_desc); + + /* Update free pointer */ + vq->free_head = vq->vring.desc[head].next; + + return head; +} + static int vring_add_buf(struct virtqueue *_vq, struct scatterlist sg[], unsigned int out, @@ -85,12 +146,21 @@ static int vring_add_buf(struct virtqueue *_vq, struct vring_virtqueue *vq = to_vvq(_vq); unsigned int i, avail, head, uninitialized_var(prev); + START_USE(vq); + BUG_ON(data == NULL); + + /* If the host supports indirect descriptor tables, and we have multiple + * buffers, then go indirect. FIXME: tune this threshold */ + if (vq->indirect && (out + in) > 1 && vq->num_free) { + head = vring_add_indirect(vq, sg, out, in); + if (head != vq->vring.num) + goto add_head; + } + BUG_ON(out + in > vq->vring.num); BUG_ON(out + in == 0); - START_USE(vq); - if (vq->num_free < out + in) { pr_debug("Can't add buf len %i - avail = %i\n", out + in, vq->num_free); @@ -127,6 +197,7 @@ static int vring_add_buf(struct virtqueue *_vq, /* Update free pointer */ vq->free_head = i; +add_head: /* Set token. */ vq->data[head] = data; @@ -170,6 +241,11 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head) /* Put back on free list: find end */ i = head; + + /* Free the indirect table */ + if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT) + kfree(phys_to_virt(vq->vring.desc[i].addr)); + while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) { i = vq->vring.desc[i].next; vq->num_free++; @@ -284,7 +360,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, struct virtio_device *vdev, void *pages, void (*notify)(struct virtqueue *), - void (*callback)(struct virtqueue *)) + void (*callback)(struct virtqueue *), + const char *name) { struct vring_virtqueue *vq; unsigned int i; @@ -303,14 +380,18 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, vq->vq.callback = callback; vq->vq.vdev = vdev; vq->vq.vq_ops = &vring_vq_ops; + vq->vq.name = name; vq->notify = notify; vq->broken = false; vq->last_used_idx = 0; vq->num_added = 0; + list_add_tail(&vq->vq.list, &vdev->vqs); #ifdef DEBUG vq->in_use = false; #endif + vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC); + /* No callback? Tell other side not to bother us. */ if (!callback) vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; @@ -327,6 +408,7 @@ EXPORT_SYMBOL_GPL(vring_new_virtqueue); void vring_del_virtqueue(struct virtqueue *vq) { + list_del(&vq->list); kfree(to_vvq(vq)); } EXPORT_SYMBOL_GPL(vring_del_virtqueue); @@ -338,6 +420,8 @@ void vring_transport_features(struct virtio_device *vdev) for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { switch (i) { + case VIRTIO_RING_F_INDIRECT_DESC: + break; default: /* We don't understand this bit. */ clear_bit(i, vdev->features); |