Commit 93890b71 authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus

* git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus: (25 commits)
  virtio: balloon driver
  virtio: Use PCI revision field to indicate virtio PCI ABI version
  virtio: PCI device
  virtio_blk: implement naming for vda-vdz,vdaa-vdzz,vdaaa-vdzzz
  virtio_blk: Dont waste major numbers
  virtio_blk: provide getgeo
  virtio_net: parametrize the napi_weight for virtio receive queue.
  virtio: free transmit skbs when notified, not on next xmit.
  virtio: flush buffers on open
  virtnet: remove double ether_setup
  virtio: Allow virtio to be modular and used by modules
  virtio: Use the sg_phys convenience function.
  virtio: Put the virtio under the virtualization menu
  virtio: handle interrupts after callbacks turned off
  virtio: reset function
  virtio: populate network rings in the probe routine, not open
  virtio: Tweak virtio_net defines
  virtio: Net header needs hdr_len
  virtio: remove unused id field from struct virtio_blk_outhdr
  virtio: clarify NO_NOTIFY flag usage
  ...
parents f5bb3a5e 6b35e407
This diff is collapsed.
...@@ -53,5 +53,6 @@ config KVM_AMD ...@@ -53,5 +53,6 @@ config KVM_AMD
# OK, it's a little counter-intuitive to do this, but it puts it neatly under # OK, it's a little counter-intuitive to do this, but it puts it neatly under
# the virtualization menu. # the virtualization menu.
source drivers/lguest/Kconfig source drivers/lguest/Kconfig
source drivers/virtio/Kconfig
endif # VIRTUALIZATION endif # VIRTUALIZATION
...@@ -91,6 +91,4 @@ source "drivers/dca/Kconfig" ...@@ -91,6 +91,4 @@ source "drivers/dca/Kconfig"
source "drivers/auxdisplay/Kconfig" source "drivers/auxdisplay/Kconfig"
source "drivers/uio/Kconfig" source "drivers/uio/Kconfig"
source "drivers/virtio/Kconfig"
endmenu endmenu
...@@ -440,6 +440,7 @@ config VIRTIO_BLK ...@@ -440,6 +440,7 @@ config VIRTIO_BLK
tristate "Virtio block driver (EXPERIMENTAL)" tristate "Virtio block driver (EXPERIMENTAL)"
depends on EXPERIMENTAL && VIRTIO depends on EXPERIMENTAL && VIRTIO
---help--- ---help---
This is the virtual block driver for lguest. Say Y or M. This is the virtual block driver for virtio. It can be used with
lguest or QEMU based VMMs (like KVM or Xen). Say Y or M.
endif # BLK_DEV endif # BLK_DEV
...@@ -7,8 +7,10 @@ ...@@ -7,8 +7,10 @@
#include <linux/scatterlist.h> #include <linux/scatterlist.h>
#define VIRTIO_MAX_SG (3+MAX_PHYS_SEGMENTS) #define VIRTIO_MAX_SG (3+MAX_PHYS_SEGMENTS)
#define PART_BITS 4
static int major, index;
static unsigned char virtblk_index = 'a';
struct virtio_blk struct virtio_blk
{ {
spinlock_t lock; spinlock_t lock;
...@@ -36,7 +38,7 @@ struct virtblk_req ...@@ -36,7 +38,7 @@ struct virtblk_req
struct virtio_blk_inhdr in_hdr; struct virtio_blk_inhdr in_hdr;
}; };
static bool blk_done(struct virtqueue *vq) static void blk_done(struct virtqueue *vq)
{ {
struct virtio_blk *vblk = vq->vdev->priv; struct virtio_blk *vblk = vq->vdev->priv;
struct virtblk_req *vbr; struct virtblk_req *vbr;
...@@ -65,7 +67,6 @@ static bool blk_done(struct virtqueue *vq) ...@@ -65,7 +67,6 @@ static bool blk_done(struct virtqueue *vq)
/* In case queue is stopped waiting for more buffers. */ /* In case queue is stopped waiting for more buffers. */
blk_start_queue(vblk->disk->queue); blk_start_queue(vblk->disk->queue);
spin_unlock_irqrestore(&vblk->lock, flags); spin_unlock_irqrestore(&vblk->lock, flags);
return true;
} }
static bool do_req(struct request_queue *q, struct virtio_blk *vblk, static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
...@@ -153,20 +154,37 @@ static int virtblk_ioctl(struct inode *inode, struct file *filp, ...@@ -153,20 +154,37 @@ static int virtblk_ioctl(struct inode *inode, struct file *filp,
(void __user *)data); (void __user *)data);
} }
/* We provide getgeo only to please some old bootloader/partitioning tools */
static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
{
/* some standard values, similar to sd */
geo->heads = 1 << 6;
geo->sectors = 1 << 5;
geo->cylinders = get_capacity(bd->bd_disk) >> 11;
return 0;
}
static struct block_device_operations virtblk_fops = { static struct block_device_operations virtblk_fops = {
.ioctl = virtblk_ioctl, .ioctl = virtblk_ioctl,
.owner = THIS_MODULE, .owner = THIS_MODULE,
.getgeo = virtblk_getgeo,
}; };
static int index_to_minor(int index)
{
return index << PART_BITS;
}
static int virtblk_probe(struct virtio_device *vdev) static int virtblk_probe(struct virtio_device *vdev)
{ {
struct virtio_blk *vblk; struct virtio_blk *vblk;
int err, major; int err;
void *token;
unsigned int len;
u64 cap; u64 cap;
u32 v; u32 v;
if (index_to_minor(index) >= 1 << MINORBITS)
return -ENOSPC;
vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
if (!vblk) { if (!vblk) {
err = -ENOMEM; err = -ENOMEM;
...@@ -178,7 +196,7 @@ static int virtblk_probe(struct virtio_device *vdev) ...@@ -178,7 +196,7 @@ static int virtblk_probe(struct virtio_device *vdev)
vblk->vdev = vdev; vblk->vdev = vdev;
/* We expect one virtqueue, for output. */ /* We expect one virtqueue, for output. */
vblk->vq = vdev->config->find_vq(vdev, blk_done); vblk->vq = vdev->config->find_vq(vdev, 0, blk_done);
if (IS_ERR(vblk->vq)) { if (IS_ERR(vblk->vq)) {
err = PTR_ERR(vblk->vq); err = PTR_ERR(vblk->vq);
goto out_free_vblk; goto out_free_vblk;
...@@ -190,17 +208,11 @@ static int virtblk_probe(struct virtio_device *vdev) ...@@ -190,17 +208,11 @@ static int virtblk_probe(struct virtio_device *vdev)
goto out_free_vq; goto out_free_vq;
} }
major = register_blkdev(0, "virtblk");
if (major < 0) {
err = major;
goto out_mempool;
}
/* FIXME: How many partitions? How long is a piece of string? */ /* FIXME: How many partitions? How long is a piece of string? */
vblk->disk = alloc_disk(1 << 4); vblk->disk = alloc_disk(1 << PART_BITS);
if (!vblk->disk) { if (!vblk->disk) {
err = -ENOMEM; err = -ENOMEM;
goto out_unregister_blkdev; goto out_mempool;
} }
vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock); vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock);
...@@ -209,22 +221,32 @@ static int virtblk_probe(struct virtio_device *vdev) ...@@ -209,22 +221,32 @@ static int virtblk_probe(struct virtio_device *vdev)
goto out_put_disk; goto out_put_disk;
} }
sprintf(vblk->disk->disk_name, "vd%c", virtblk_index++); if (index < 26) {
sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26);
} else if (index < (26 + 1) * 26) {
sprintf(vblk->disk->disk_name, "vd%c%c",
'a' + index / 26 - 1, 'a' + index % 26);
} else {
const unsigned int m1 = (index / 26 - 1) / 26 - 1;
const unsigned int m2 = (index / 26 - 1) % 26;
const unsigned int m3 = index % 26;
sprintf(vblk->disk->disk_name, "vd%c%c%c",
'a' + m1, 'a' + m2, 'a' + m3);
}
vblk->disk->major = major; vblk->disk->major = major;
vblk->disk->first_minor = 0; vblk->disk->first_minor = index_to_minor(index);
vblk->disk->private_data = vblk; vblk->disk->private_data = vblk;
vblk->disk->fops = &virtblk_fops; vblk->disk->fops = &virtblk_fops;
index++;
/* If barriers are supported, tell block layer that queue is ordered */ /* If barriers are supported, tell block layer that queue is ordered */
token = vdev->config->find(vdev, VIRTIO_CONFIG_BLK_F, &len); if (vdev->config->feature(vdev, VIRTIO_BLK_F_BARRIER))
if (virtio_use_bit(vdev, token, len, VIRTIO_BLK_F_BARRIER))
blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL); blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL);
err = virtio_config_val(vdev, VIRTIO_CONFIG_BLK_F_CAPACITY, &cap); /* Host must always specify the capacity. */
if (err) { __virtio_config_val(vdev, offsetof(struct virtio_blk_config, capacity),
dev_err(&vdev->dev, "Bad/missing capacity in config\n"); &cap);
goto out_cleanup_queue;
}
/* If capacity is too big, truncate with warning. */ /* If capacity is too big, truncate with warning. */
if ((sector_t)cap != cap) { if ((sector_t)cap != cap) {
...@@ -234,31 +256,25 @@ static int virtblk_probe(struct virtio_device *vdev) ...@@ -234,31 +256,25 @@ static int virtblk_probe(struct virtio_device *vdev)
} }
set_capacity(vblk->disk, cap); set_capacity(vblk->disk, cap);
err = virtio_config_val(vdev, VIRTIO_CONFIG_BLK_F_SIZE_MAX, &v); /* Host can optionally specify maximum segment size and number of
* segments. */
err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX,
offsetof(struct virtio_blk_config, size_max),
&v);
if (!err) if (!err)
blk_queue_max_segment_size(vblk->disk->queue, v); blk_queue_max_segment_size(vblk->disk->queue, v);
else if (err != -ENOENT) {
dev_err(&vdev->dev, "Bad SIZE_MAX in config\n");
goto out_cleanup_queue;
}
err = virtio_config_val(vdev, VIRTIO_CONFIG_BLK_F_SEG_MAX, &v); err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX,
offsetof(struct virtio_blk_config, seg_max),
&v);
if (!err) if (!err)
blk_queue_max_hw_segments(vblk->disk->queue, v); blk_queue_max_hw_segments(vblk->disk->queue, v);
else if (err != -ENOENT) {
dev_err(&vdev->dev, "Bad SEG_MAX in config\n");
goto out_cleanup_queue;
}
add_disk(vblk->disk); add_disk(vblk->disk);
return 0; return 0;
out_cleanup_queue:
blk_cleanup_queue(vblk->disk->queue);
out_put_disk: out_put_disk:
put_disk(vblk->disk); put_disk(vblk->disk);
out_unregister_blkdev:
unregister_blkdev(major, "virtblk");
out_mempool: out_mempool:
mempool_destroy(vblk->pool); mempool_destroy(vblk->pool);
out_free_vq: out_free_vq:
...@@ -274,12 +290,16 @@ static void virtblk_remove(struct virtio_device *vdev) ...@@ -274,12 +290,16 @@ static void virtblk_remove(struct virtio_device *vdev)
struct virtio_blk *vblk = vdev->priv; struct virtio_blk *vblk = vdev->priv;
int major = vblk->disk->major; int major = vblk->disk->major;
/* Nothing should be pending. */
BUG_ON(!list_empty(&vblk->reqs)); BUG_ON(!list_empty(&vblk->reqs));
/* Stop all the virtqueues. */
vdev->config->reset(vdev);
blk_cleanup_queue(vblk->disk->queue); blk_cleanup_queue(vblk->disk->queue);
put_disk(vblk->disk); put_disk(vblk->disk);
unregister_blkdev(major, "virtblk"); unregister_blkdev(major, "virtblk");
mempool_destroy(vblk->pool); mempool_destroy(vblk->pool);
/* There should be nothing in the queue now, so no need to shutdown */
vdev->config->del_vq(vblk->vq); vdev->config->del_vq(vblk->vq);
kfree(vblk); kfree(vblk);
} }
...@@ -299,11 +319,15 @@ static struct virtio_driver virtio_blk = { ...@@ -299,11 +319,15 @@ static struct virtio_driver virtio_blk = {
static int __init init(void) static int __init init(void)
{ {
major = register_blkdev(0, "virtblk");
if (major < 0)
return major;
return register_virtio_driver(&virtio_blk); return register_virtio_driver(&virtio_blk);
} }
static void __exit fini(void) static void __exit fini(void)
{ {
unregister_blkdev(major, "virtblk");
unregister_virtio_driver(&virtio_blk); unregister_virtio_driver(&virtio_blk);
} }
module_init(init); module_init(init);
......
...@@ -158,13 +158,13 @@ static int __devinit virtcons_probe(struct virtio_device *dev) ...@@ -158,13 +158,13 @@ static int __devinit virtcons_probe(struct virtio_device *dev)
/* Find the input queue. */ /* Find the input queue. */
/* FIXME: This is why we want to wean off hvc: we do nothing /* FIXME: This is why we want to wean off hvc: we do nothing
* when input comes in. */ * when input comes in. */
in_vq = vdev->config->find_vq(vdev, NULL); in_vq = vdev->config->find_vq(vdev, 0, NULL);
if (IS_ERR(in_vq)) { if (IS_ERR(in_vq)) {
err = PTR_ERR(in_vq); err = PTR_ERR(in_vq);
goto free; goto free;
} }
out_vq = vdev->config->find_vq(vdev, NULL); out_vq = vdev->config->find_vq(vdev, 1, NULL);
if (IS_ERR(out_vq)) { if (IS_ERR(out_vq)) {
err = PTR_ERR(out_vq); err = PTR_ERR(out_vq);
goto free_in_vq; goto free_in_vq;
......
...@@ -52,57 +52,82 @@ struct lguest_device { ...@@ -52,57 +52,82 @@ struct lguest_device {
/*D:130 /*D:130
* Device configurations * Device configurations
* *
* The configuration information for a device consists of a series of fields. * The configuration information for a device consists of one or more
* We don't really care what they are: the Launcher set them up, and the driver * virtqueues, a feature bitmaks, and some configuration bytes. The
* will look at them during setup. * configuration bytes don't really matter to us: the Launcher sets them up, and
* the driver will look at them during setup.
* *
* For us these fields come immediately after that device's descriptor in the * A convenient routine to return the device's virtqueue config array:
* lguest_devices page. * immediately after the descriptor. */
* static struct lguest_vqconfig *lg_vq(const struct lguest_device_desc *desc)
* Each field starts with a "type" byte, a "length" byte, then that number of {
* bytes of configuration information. The device descriptor tells us the return (void *)(desc + 1);
* total configuration length so we know when we've reached the last field. */ }
/* type + length bytes */ /* The features come immediately after the virtqueues. */
#define FHDR_LEN 2 static u8 *lg_features(const struct lguest_device_desc *desc)
{
return (void *)(lg_vq(desc) + desc->num_vq);
}
/* This finds the first field of a given type for a device's configuration. */ /* The config space comes after the two feature bitmasks. */
static void *lg_find(struct virtio_device *vdev, u8 type, unsigned int *len) static u8 *lg_config(const struct lguest_device_desc *desc)
{ {
struct lguest_device_desc *desc = to_lgdev(vdev)->desc; return lg_features(desc) + desc->feature_len * 2;
int i; }
for (i = 0; i < desc->config_len; i += FHDR_LEN + desc->config[i+1]) {
if (desc->config[i] == type) {
/* Mark it used, so Host can know we looked at it, and
* also so we won't find the same one twice. */
desc->config[i] |= 0x80;
/* Remember, the second byte is the length. */
*len = desc->config[i+1];
/* We return a pointer to the field header. */
return desc->config + i;
}
}
/* Not found: return NULL for failure. */ /* The total size of the config page used by this device (incl. desc) */
return NULL; static unsigned desc_size(const struct lguest_device_desc *desc)
{
return sizeof(*desc)
+ desc->num_vq * sizeof(struct lguest_vqconfig)
+ desc->feature_len * 2
+ desc->config_len;
}
/* This tests (and acknowleges) a feature bit. */
static bool lg_feature(struct virtio_device *vdev, unsigned fbit)
{
struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
u8 *features;
/* Obviously if they ask for a feature off the end of our feature
* bitmap, it's not set. */
if (fbit / 8 > desc->feature_len)
return false;
/* The feature bitmap comes after the virtqueues. */
features = lg_features(desc);
if (!(features[fbit / 8] & (1 << (fbit % 8))))
return false;
/* We set the matching bit in the other half of the bitmap to tell the
* Host we want to use this feature. We don't use this yet, but we
* could in future. */
features[desc->feature_len + fbit / 8] |= (1 << (fbit % 8));
return true;
} }
/* Once they've found a field, getting a copy of it is easy. */ /* Once they've found a field, getting a copy of it is easy. */
static void lg_get(struct virtio_device *vdev, void *token, static void lg_get(struct virtio_device *vdev, unsigned int offset,
void *buf, unsigned len) void *buf, unsigned len)
{ {
/* Check they didn't ask for more than the length of the field! */ struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
BUG_ON(len > ((u8 *)token)[1]);
memcpy(buf, token + FHDR_LEN, len); /* Check they didn't ask for more than the length of the config! */
BUG_ON(offset + len > desc->config_len);
memcpy(buf, lg_config(desc) + offset, len);
} }
/* Setting the contents is also trivial. */ /* Setting the contents is also trivial. */
static void lg_set(struct virtio_device *vdev, void *token, static void lg_set(struct virtio_device *vdev, unsigned int offset,
const void *buf, unsigned len) const void *buf, unsigned len)
{ {
BUG_ON(len > ((u8 *)token)[1]); struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
memcpy(token + FHDR_LEN, buf, len);
/* Check they didn't ask for more than the length of the config! */
BUG_ON(offset + len > desc->config_len);
memcpy(lg_config(desc) + offset, buf, len);
} }
/* The operations to get and set the status word just access the status field /* The operations to get and set the status word just access the status field
...@@ -114,9 +139,20 @@ static u8 lg_get_status(struct virtio_device *vdev) ...@@ -114,9 +139,20 @@ static u8 lg_get_status(struct virtio_device *vdev)
static void lg_set_status(struct virtio_device *vdev, u8 status) static void lg_set_status(struct virtio_device *vdev, u8 status)
{ {
BUG_ON(!status);
to_lgdev(vdev)->desc->status = status; to_lgdev(vdev)->desc->status = status;
} }
/* To reset the device, we (ab)use the NOTIFY hypercall, with the descriptor
* address of the device. The Host will zero the status and all the
* features. */
static void lg_reset(struct virtio_device *vdev)
{
unsigned long offset = (void *)to_lgdev(vdev)->desc - lguest_devices;
hcall(LHCALL_NOTIFY, (max_pfn<<PAGE_SHIFT) + offset, 0, 0);
}
/* /*
* Virtqueues * Virtqueues
* *
...@@ -165,39 +201,29 @@ static void lg_notify(struct virtqueue *vq) ...@@ -165,39 +201,29 @@ static void lg_notify(struct virtqueue *vq)
* *
* So we provide devices with a "find virtqueue and set it up" function. */ * So we provide devices with a "find virtqueue and set it up" function. */
static struct virtqueue *lg_find_vq(struct virtio_device *vdev, static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
bool (*callback)(struct virtqueue *vq)) unsigned index,
void (*callback)(struct virtqueue *vq))
{ {
struct lguest_device *ldev = to_lgdev(vdev);
struct lguest_vq_info *lvq; struct lguest_vq_info *lvq;
struct virtqueue *vq; struct virtqueue *vq;
unsigned int len;
void *token;
int err; int err;
/* Look for a field of the correct type to mark a virtqueue. Note that /* We must have this many virtqueues. */
* if this succeeds, then the type will be changed so it won't be found if (index >= ldev->desc->num_vq)
* again, and future lg_find_vq() calls will find the next
* virtqueue (if any). */
token = vdev->config->find(vdev, VIRTIO_CONFIG_F_VIRTQUEUE, &len);
if (!token)
return ERR_PTR(-ENOENT); return ERR_PTR(-ENOENT);
lvq = kmalloc(sizeof(*lvq), GFP_KERNEL); lvq = kmalloc(sizeof(*lvq), GFP_KERNEL);
if (!lvq) if (!lvq)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
/* Note: we could use a configuration space inside here, just like we /* Make a copy of the "struct lguest_vqconfig" entry, which sits after
* do for the device. This would allow expansion in future, because * the descriptor. We need a copy because the config space might not
* our configuration system is designed to be expansible. But this is * be aligned correctly. */
* way easier. */ memcpy(&lvq->config, lg_vq(ldev->desc)+index, sizeof(lvq->config));
if (len != sizeof(lvq->config)) {
dev_err(&vdev->dev, "Unexpected virtio config len %u\n", len);
err = -EIO;
goto free_lvq;
}
/* Make a copy of the "struct lguest_vqconfig" field. We need a copy
* because the config space might not be aligned correctly. */
vdev->config->get(vdev, token, &lvq->config, sizeof(lvq->config));
printk("Mapping virtqueue %i addr %lx\n", index,
(unsigned long)lvq->config.pfn << PAGE_SHIFT);
/* Figure out how many pages the ring will take, and map that memory */ /* Figure out how many pages the ring will take, and map that memory */
lvq->pages = lguest_map((unsigned long)lvq->config.pfn << PAGE_SHIFT, lvq->pages = lguest_map((unsigned long)lvq->config.pfn << PAGE_SHIFT,
DIV_ROUND_UP(vring_size(lvq->config.num, DIV_ROUND_UP(vring_size(lvq->config.num,
...@@ -259,11 +285,12 @@ static void lg_del_vq(struct virtqueue *vq) ...@@ -259,11 +285,12 @@ static void lg_del_vq(struct virtqueue *vq)
/* The ops structure which hooks everything together. */ /* The ops structure which hooks everything together. */
static struct virtio_config_ops lguest_config_ops = { static struct virtio_config_ops lguest_config_ops = {
.find = lg_find, .feature = lg_feature,
.get = lg_get, .get = lg_get,
.set = lg_set, .set = lg_set,
.get_status = lg_get_status, .get_status = lg_get_status,
.set_status = lg_set_status, .set_status = lg_set_status,
.reset = lg_reset,
.find_vq = lg_find_vq, .find_vq = lg_find_vq,
.del_vq = lg_del_vq, .del_vq = lg_del_vq,
}; };
...@@ -329,13 +356,14 @@ static void scan_devices(void) ...@@ -329,13 +356,14 @@ static void scan_devices(void)
struct lguest_device_desc *d; struct lguest_device_desc *d;
/* We start at the page beginning, and skip over each entry. */ /* We start at the page beginning, and skip over each entry. */
for (i = 0; i < PAGE_SIZE; i += sizeof(*d) + d->config_len) { for (i = 0; i < PAGE_SIZE; i += desc_size(d)) {
d = lguest_devices + i; d = lguest_devices + i;
/* Once we hit a zero, stop. */ /* Once we hit a zero, stop. */
if (d->type == 0) if (d->type == 0)
break; break;
printk("Device at %i has size %u\n", i, desc_size(d));
add_lguest_device(d); add_lguest_device(d);
} }
} }
......
...@@ -3114,6 +3114,7 @@ config VIRTIO_NET ...@@ -3114,6 +3114,7 @@ config VIRTIO_NET
tristate "Virtio network driver (EXPERIMENTAL)" tristate "Virtio network driver (EXPERIMENTAL)"
depends on EXPERIMENTAL && VIRTIO depends on EXPERIMENTAL && VIRTIO
---help--- ---help---
This is the virtual network driver for lguest. Say Y or M. This is the virtual network driver for virtio. It can be used with
lguest or QEMU based VMMs (like KVM or Xen). Say Y or M.
endif # NETDEVICES endif # NETDEVICES
This diff is collapsed.
# Virtio always gets selected by whoever wants it. # Virtio always gets selected by whoever wants it.
config VIRTIO config VIRTIO
bool tristate
# Similarly the virtio ring implementation. # Similarly the virtio ring implementation.
config VIRTIO_RING config VIRTIO_RING
bool tristate
depends on VIRTIO depends on VIRTIO
config VIRTIO_PCI
tristate "PCI driver for virtio devices (EXPERIMENTAL)"
depends on PCI && EXPERIMENTAL
select VIRTIO
select VIRTIO_RING
---help---
This drivers provides support for virtio based paravirtual device
drivers over PCI. This requires that your VMM has appropriate PCI
virtio backends. Most QEMU based VMMs should support these devices
(like KVM or Xen).
Currently, the ABI is not considered stable so there is no guarantee
that this version of the driver will work with your VMM.
If unsure, say M.
config VIRTIO_BALLOON
tristate "Virtio balloon driver (EXPERIMENTAL)"
select VIRTIO
select VIRTIO_RING
---help---
This driver supports increasing and decreasing the amount
of memory within a KVM guest.
If unsure, say M.
obj-$(CONFIG_VIRTIO) += virtio.o obj-$(CONFIG_VIRTIO) += virtio.o
obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o
obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
...@@ -102,9 +102,13 @@ static int virtio_dev_remove(struct device *_d) ...@@ -102,9 +102,13 @@ static int virtio_dev_remove(struct device *_d)
struct virtio_driver *drv = container_of(dev->dev.driver, struct virtio_driver *drv = container_of(dev->dev.driver,
struct virtio_driver, driver); struct virtio_driver, driver);
dev->config->set_status(dev, dev->config->get_status(dev)
& ~VIRTIO_CONFIG_S_DRIVER);
drv->remove(dev); drv->remove(dev);
/* Driver should have reset device. */
BUG_ON(dev->config->get_status(dev));
/* Acknowledge the device's existence again. */
add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
return 0; return 0;
} }
...@@ -130,6 +134,10 @@ int register_virtio_device(struct virtio_device *dev) ...@@ -130,6 +134,10 @@ int register_virtio_device(struct virtio_device *dev)
dev->dev.bus = &virtio_bus; dev->dev.bus = &virtio_bus;
sprintf(dev->dev.bus_id, "%u", dev->index); sprintf(dev->dev.bus_id, "%u", dev->index);
/* We always start by resetting the device, in case a previous
* driver messed it up. This also tests that code path a little. */
dev->config->reset(dev);
/* Acknowledge that we've seen the device. */ /* Acknowledge that we've seen the device. */
add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
...@@ -148,55 +156,18 @@ void unregister_virtio_device(struct virtio_device *dev) ...@@ -148,55 +156,18 @@ void unregister_virtio_device(struct virtio_device *dev)
} }
EXPORT_SYMBOL_GPL(unregister_virtio_device); EXPORT_SYMBOL_GPL(unregister_virtio_device);
int __virtio_config_val(struct virtio_device *vdev,
u8 type, void *val, size_t size)
{
void *token;
unsigned int len;
token = vdev->config->find(vdev, type, &len);
if (!token)
return -ENOENT;
if (len != size)
return -EIO;
vdev->config->get(vdev, token, val, size);
return 0;
}
EXPORT_SYMBOL_GPL(__virtio_config_val);
int virtio_use_bit(struct virtio_device *vdev,
void *token, unsigned int len, unsigned int bitnum)
{
unsigned long bits[16];
/* This makes it convenient to pass-through find() results. */
if (!token)
return 0;
/* bit not in range of this bitfield? */
if (bitnum * 8 >= len / 2)
return 0;
/* Giant feature bitfields are silly. */
BUG_ON(len > sizeof(bits));
vdev->config->get(vdev, token, bits, len);
if (!test_bit(bitnum, bits))
return 0;
/* Set acknowledge bit, and write it back. */
set_bit(bitnum + len * 8 / 2, bits);
vdev->config->set(vdev, token, bits, len);
return 1;
}
EXPORT_SYMBOL_GPL(virtio_use_bit);
static int virtio_init(void) static int virtio_init(void)
{ {
if (bus_register(&virtio_bus) != 0) if (bus_register(&virtio_bus) != 0)
panic("virtio bus registration failed"); panic("virtio bus registration failed");
return 0; return 0;
} }
static void __exit virtio_exit(void)
{
bus_unregister(&virtio_bus);
}
core_initcall(virtio_init); core_initcall(virtio_init);
module_exit(virtio_exit);
MODULE_LICENSE("GPL");
/* Virtio balloon implementation, inspired by Dor Loar and Marcelo
* Tosatti's implementations.
*
* Copyright 2008 Rusty Russell IBM Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
//#define DEBUG
#include <linux/virtio.h>
#include <linux/virtio_balloon.h>
#include <linux/swap.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
struct virtio_balloon
{
struct virtio_device *vdev;
struct virtqueue *inflate_vq, *deflate_vq;
/* Where the ballooning thread waits for config to change. */
wait_queue_head_t config_change;
/* The thread servicing the balloon. */
struct task_struct *thread;
/* Waiting for host to ack the pages we released. */
struct completion acked;
/* Do we have to tell Host *before* we reuse pages? */
bool tell_host_first;
/* The pages we've told the Host we're not using. */
unsigned int num_pages;
struct list_head pages;
/* The array of pfns we tell the Host about. */
unsigned int num_pfns;
u32 pfns[256];
};
static struct virtio_device_id id_table[] = {
{ VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID },
{ 0 },
};
static void balloon_ack(struct virtqueue *vq)
{
struct virtio_balloon *vb;
unsigned int len;
vb = vq->vq_ops->get_buf(vq, &len);
if (vb)
complete(&vb->acked);
}
static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
{
struct scatterlist sg;
sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
init_completion(&vb->acked);
/* We should always be able to add one buffer to an empty queue. */
if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) != 0)
BUG();
vq->vq_ops->kick(vq);
/* When host has read buffer, this completes via balloon_ack */
wait_for_completion(&vb->acked);
}
static void fill_balloon(struct virtio_balloon *vb, size_t num)
{
/* We can only do one array worth at a time. */
num = min(num, ARRAY_SIZE(vb->pfns));
for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns++) {
struct page *page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY);
if (!page) {
if (printk_ratelimit())
dev_printk(KERN_INFO, &vb->vdev->dev,
"Out of puff! Can't get %zu pages\n",
num);
/* Sleep for at least 1/5 of a second before retry. */
msleep(200);
break;
}
vb->pfns[vb->num_pfns] = page_to_pfn(page);
totalram_pages--;
vb->num_pages++;
list_add(&page->lru, &vb->pages);
}
/* Didn't get any? Oh well. */
if (vb->num_pfns == 0)
return;
tell_host(vb, vb->inflate_vq);
}
static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
{
unsigned int i;
for (i = 0; i < num; i++) {
__free_page(pfn_to_page(pfns[i]));
totalram_pages++;
}
}
static void leak_balloon(struct virtio_balloon *vb, size_t num)
{
struct page *page;
/* We can only do one array worth at a time. */
num = min(num, ARRAY_SIZE(vb->pfns));
for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns++) {
page = list_first_entry(&vb->pages, struct page, lru);
list_del(&page->lru);
vb->pfns[vb->num_pfns] = page_to_pfn(page);
vb->num_pages--;
}
if (vb->tell_host_first) {
tell_host(vb, vb->deflate_vq);
release_pages_by_pfn(vb->pfns, vb->num_pfns);
} else {
release_pages_by_pfn(vb->pfns, vb->num_pfns);
tell_host(vb, vb->deflate_vq);
}
}
static void virtballoon_changed(struct virtio_device *vdev)
{
struct virtio_balloon *vb = vdev->priv;
wake_up(&vb->config_change);
}
static inline int towards_target(struct virtio_balloon *vb)
{
u32 v;
__virtio_config_val(vb->vdev,
offsetof(struct virtio_balloon_config, num_pages),
&v);
return v - vb->num_pages;
}
static void update_balloon_size(struct virtio_balloon *vb)
{
__le32 actual = cpu_to_le32(vb->num_pages);
vb->vdev->config->set(vb->vdev,
offsetof(struct virtio_balloon_config, actual),
&actual, sizeof(actual));
}
static int balloon(void *_vballoon)
{
struct virtio_balloon *vb = _vballoon;
set_freezable();
while (!kthread_should_stop()) {
int diff;
try_to_freeze();
wait_event_interruptible(vb->config_change,
(diff = towards_target(vb)) != 0
|| kthread_should_stop());
if (diff > 0)
fill_balloon(vb, diff);
else if (diff < 0)
leak_balloon(vb, -diff);
update_balloon_size(vb);
}
return 0;
}
static int virtballoon_probe(struct virtio_device *vdev)
{
struct virtio_balloon *vb;
int err;
vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
if (!vb) {
err = -ENOMEM;
goto out;
}
INIT_LIST_HEAD(&vb->pages);
vb->num_pages = 0;
init_waitqueue_head(&vb->config_change);
vb->vdev = vdev;
/* We expect two virtqueues. */
vb->inflate_vq = vdev->config->find_vq(vdev, 0, balloon_ack);
if (IS_ERR(vb->inflate_vq)) {
err = PTR_ERR(vb->inflate_vq);
goto out_free_vb;
}
vb->deflate_vq = vdev->config->find_vq(vdev, 1, balloon_ack);
if (IS_ERR(vb->deflate_vq)) {
err = PTR_ERR(vb->deflate_vq);
goto out_del_inflate_vq;
}
vb->thread = kthread_run(balloon, vb, "vballoon");
if (IS_ERR(vb->thread)) {
err = PTR_ERR(vb->thread);
goto out_del_deflate_vq;
}
vb->tell_host_first
= vdev->config->feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
return 0;
out_del_deflate_vq:
vdev->config->del_vq(vb->deflate_vq);
out_del_inflate_vq:
vdev->config->del_vq(vb->inflate_vq);
out_free_vb:
kfree(vb);
out:
return err;
}
static void virtballoon_remove(struct virtio_device *vdev)
{
struct virtio_balloon *vb = vdev->priv;
kthread_stop(vb->thread);
/* There might be pages left in the balloon: free them. */
while (vb->num_pages)
leak_balloon(vb, vb->num_pages);
/* Now we reset the device so we can clean up the queues. */
vdev->config->reset(vdev);
vdev->config->del_vq(vb->deflate_vq);
vdev->config->del_vq(vb->inflate_vq);
kfree(vb);
}
static struct virtio_driver virtio_balloon = {
.driver.name = KBUILD_MODNAME,
.driver.owner = THIS_MODULE,
.id_table = id_table,
.probe = virtballoon_probe,
.remove = __devexit_p(virtballoon_remove),
.config_changed = virtballoon_changed,
};
static int __init init(void)
{
return register_virtio_driver(&virtio_balloon);
}
static void __exit fini(void)
{
unregister_virtio_driver(&virtio_balloon);
}
module_init(init);
module_exit(fini);
MODULE_DEVICE_TABLE(virtio, id_table);
MODULE_DESCRIPTION("Virtio balloon driver");
MODULE_LICENSE("GPL");
This diff is collapsed.
...@@ -87,6 +87,8 @@ static int vring_add_buf(struct virtqueue *_vq, ...@@ -87,6 +87,8 @@ static int vring_add_buf(struct virtqueue *_vq,
if (vq->num_free < out + in) { if (vq->num_free < out + in) {
pr_debug("Can't add buf len %i - avail = %i\n", pr_debug("Can't add buf len %i - avail = %i\n",
out + in, vq->num_free); out + in, vq->num_free);
/* We notify *even if* VRING_USED_F_NO_NOTIFY is set here. */
vq->notify(&vq->vq);
END_USE(vq); END_USE(vq);
return -ENOSPC; return -ENOSPC;
} }
...@@ -97,16 +99,14 @@ static int vring_add_buf(struct virtqueue *_vq, ...@@ -97,16 +99,14 @@ static int vring_add_buf(struct virtqueue *_vq,
head = vq->free_head; head = vq->free_head;
for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) { for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) {
vq->vring.desc[i].flags = VRING_DESC_F_NEXT; vq->vring.desc[i].flags = VRING_DESC_F_NEXT;
vq->vring.desc[i].addr = (page_to_pfn(sg_page(sg))<<PAGE_SHIFT) vq->vring.desc[i].addr = sg_phys(sg);
+ sg->offset;
vq->vring.desc[i].len = sg->length; vq->vring.desc[i].len = sg->length;
prev = i; prev = i;
sg++; sg++;
} }
for (; in; i = vq->vring.desc[i].next, in--) { for (; in; i = vq->vring.desc[i].next, in--) {
vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
vq->vring.desc[i].addr = (page_to_pfn(sg_page(sg))<<PAGE_SHIFT) vq->vring.desc[i].addr = sg_phys(sg);
+ sg->offset;
vq->vring.desc[i].len = sg->length; vq->vring.desc[i].len = sg->length;
prev = i; prev = i;
sg++; sg++;
...@@ -171,16 +171,6 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head) ...@@ -171,16 +171,6 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
vq->num_free++; vq->num_free++;
} }
/* FIXME: We need to tell other side about removal, to synchronize. */
static void vring_shutdown(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
unsigned int i;
for (i = 0; i < vq->vring.num; i++)
detach_buf(vq, i);
}
static inline bool more_used(const struct vring_virtqueue *vq) static inline bool more_used(const struct vring_virtqueue *vq)
{ {
return vq->last_used_idx != vq->vring.used->idx; return vq->last_used_idx != vq->vring.used->idx;
...@@ -220,7 +210,17 @@ static void *vring_get_buf(struct virtqueue *_vq, unsigned int *len) ...@@ -220,7 +210,17 @@ static void *vring_get_buf(struct virtqueue *_vq, unsigned int *len)
return ret; return ret;
} }
static bool vring_restart(struct virtqueue *_vq) static void vring_disable_cb(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
START_USE(vq);
BUG_ON(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT);
vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
END_USE(vq);
}
static bool vring_enable_cb(struct virtqueue *_vq)
{ {
struct vring_virtqueue *vq = to_vvq(_vq); struct vring_virtqueue *vq = to_vvq(_vq);
...@@ -253,26 +253,34 @@ irqreturn_t vring_interrupt(int irq, void *_vq) ...@@ -253,26 +253,34 @@ irqreturn_t vring_interrupt(int irq, void *_vq)
if (unlikely(vq->broken)) if (unlikely(vq->broken))
return IRQ_HANDLED; return IRQ_HANDLED;
/* Other side may have missed us turning off the interrupt,
* but we should preserve disable semantic for virtio users. */
if (unlikely(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
pr_debug("virtqueue interrupt after disable for %p\n", vq);
return IRQ_HANDLED;
}
pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
if (vq->vq.callback && !vq->vq.callback(&vq->vq)) if (vq->vq.callback)
vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; vq->vq.callback(&vq->vq);
return IRQ_HANDLED; return IRQ_HANDLED;
} }
EXPORT_SYMBOL_GPL(vring_interrupt);
static struct virtqueue_ops vring_vq_ops = { static struct virtqueue_ops vring_vq_ops = {
.add_buf = vring_add_buf, .add_buf = vring_add_buf,
.get_buf = vring_get_buf, .get_buf = vring_get_buf,
.kick = vring_kick, .kick = vring_kick,
.restart = vring_restart, .disable_cb = vring_disable_cb,
.shutdown = vring_shutdown, .enable_cb = vring_enable_cb,
}; };
struct virtqueue *vring_new_virtqueue(unsigned int num, struct virtqueue *vring_new_virtqueue(unsigned int num,
struct virtio_device *vdev, struct virtio_device *vdev,
void *pages, void *pages,
void (*notify)(struct virtqueue *), void (*notify)(struct virtqueue *),
bool (*callback)(struct virtqueue *)) void (*callback)(struct virtqueue *))
{ {
struct vring_virtqueue *vq; struct vring_virtqueue *vq;
unsigned int i; unsigned int i;
...@@ -311,9 +319,12 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, ...@@ -311,9 +319,12 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
return &vq->vq; return &vq->vq;
} }
EXPORT_SYMBOL_GPL(vring_new_virtqueue);
void vring_del_virtqueue(struct virtqueue *vq) void vring_del_virtqueue(struct virtqueue *vq)
{ {
kfree(to_vvq(vq)); kfree(to_vvq(vq));
} }
EXPORT_SYMBOL_GPL(vring_del_virtqueue);
MODULE_LICENSE("GPL");
...@@ -23,7 +23,12 @@ ...@@ -23,7 +23,12 @@
struct lguest_device_desc { struct lguest_device_desc {
/* The device type: console, network, disk etc. Type 0 terminates. */ /* The device type: console, network, disk etc. Type 0 terminates. */
__u8 type; __u8 type;
/* The number of bytes of the config array. */ /* The number of virtqueues (first in config array) */
__u8 num_vq;
/* The number of bytes of feature bits. Multiply by 2: one for host
* features and one for guest acknowledgements. */
__u8 feature_len;
/* The number of bytes of the config array after virtqueues. */
__u8 config_len; __u8 config_len;
/* A status byte, written by the Guest. */ /* A status byte, written by the Guest. */
__u8 status; __u8 status;
...@@ -31,7 +36,7 @@ struct lguest_device_desc { ...@@ -31,7 +36,7 @@ struct lguest_device_desc {
}; };
/*D:135 This is how we expect the device configuration field for a virtqueue /*D:135 This is how we expect the device configuration field for a virtqueue
* (type VIRTIO_CONFIG_F_VIRTQUEUE) to be laid out: */ * to be laid out in config space. */
struct lguest_vqconfig { struct lguest_vqconfig {
/* The number of entries in the virtio_ring */ /* The number of entries in the virtio_ring */
__u16 num; __u16 num;
......
...@@ -1810,5 +1810,6 @@ static inline void skb_forward_csum(struct sk_buff *skb) ...@@ -1810,5 +1810,6 @@ static inline void skb_forward_csum(struct sk_buff *skb)
skb->ip_summed = CHECKSUM_NONE; skb->ip_summed = CHECKSUM_NONE;
} }
bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _LINUX_SKBUFF_H */ #endif /* _LINUX_SKBUFF_H */
...@@ -11,15 +11,13 @@ ...@@ -11,15 +11,13 @@
/** /**
* virtqueue - a queue to register buffers for sending or receiving. * virtqueue - a queue to register buffers for sending or receiving.
* @callback: the function to call when buffers are consumed (can be NULL). * @callback: the function to call when buffers are consumed (can be NULL).
* If this returns false, callbacks are suppressed until vq_ops->restart
* is called.
* @vdev: the virtio device this queue was created for. * @vdev: the virtio device this queue was created for.
* @vq_ops: the operations for this virtqueue (see below). * @vq_ops: the operations for this virtqueue (see below).
* @priv: a pointer for the virtqueue implementation to use. * @priv: a pointer for the virtqueue implementation to use.
*/ */
struct virtqueue struct virtqueue
{ {
bool (*callback)(struct virtqueue *vq); void (*callback)(struct virtqueue *vq);
struct virtio_device *vdev; struct virtio_device *vdev;
struct virtqueue_ops *vq_ops; struct virtqueue_ops *vq_ops;
void *priv; void *priv;
...@@ -41,13 +39,12 @@ struct virtqueue ...@@ -41,13 +39,12 @@ struct virtqueue
* vq: the struct virtqueue we're talking about. * vq: the struct virtqueue we're talking about.
* len: the length written into the buffer * len: the length written into the buffer
* Returns NULL or the "data" token handed to add_buf. * Returns NULL or the "data" token handed to add_buf.
* @restart: restart callbacks after callback returned false. * @disable_cb: disable callbacks
* vq: the struct virtqueue we're talking about.
* @enable_cb: restart callbacks after disable_cb.
* vq: the struct virtqueue we're talking about. * vq: the struct virtqueue we're talking about.
* This returns "false" (and doesn't re-enable) if there are pending * This returns "false" (and doesn't re-enable) if there are pending
* buffers in the queue, to avoid a race. * buffers in the queue, to avoid a race.
* @shutdown: "unadd" all buffers.
* vq: the struct virtqueue we're talking about.
* Remove everything from the queue.
* *
* Locking rules are straightforward: the driver is responsible for * Locking rules are straightforward: the driver is responsible for
* locking. No two operations may be invoked simultaneously. * locking. No two operations may be invoked simultaneously.
...@@ -65,9 +62,8 @@ struct virtqueue_ops { ...@@ -65,9 +62,8 @@ struct virtqueue_ops {
void *(*get_buf)(struct virtqueue *vq, unsigned int *len); void *(*get_buf)(struct virtqueue *vq, unsigned int *len);
bool (*restart)(struct virtqueue *vq); void (*disable_cb)(struct virtqueue *vq);
bool (*enable_cb)(struct virtqueue *vq);
void (*shutdown)(struct virtqueue *vq);
}; };
/** /**
...@@ -97,12 +93,15 @@ void unregister_virtio_device(struct virtio_device *dev); ...@@ -97,12 +93,15 @@ void unregister_virtio_device(struct virtio_device *dev);
* @probe: the function to call when a device is found. Returns a token for * @probe: the function to call when a device is found. Returns a token for
* remove, or PTR_ERR(). * remove, or PTR_ERR().
* @remove: the function when a device is removed. * @remove: the function when a device is removed.
* @config_changed: optional function to call when the device configuration
* changes; may be called in interrupt context.
*/ */
struct virtio_driver { struct virtio_driver {
struct device_driver driver; struct device_driver driver;
const struct virtio_device_id *id_table; const struct virtio_device_id *id_table;
int (*probe)(struct virtio_device *dev); int (*probe)(struct virtio_device *dev);
void (*remove)(struct virtio_device *dev); void (*remove)(struct virtio_device *dev);
void (*config_changed)(struct virtio_device *dev);
}; };
int register_virtio_driver(struct virtio_driver *drv); int register_virtio_driver(struct virtio_driver *drv);
......
#ifndef _LINUX_VIRTIO_BALLOON_H
#define _LINUX_VIRTIO_BALLOON_H
#include <linux/virtio_config.h>
/* The ID for virtio_balloon */
#define VIRTIO_ID_BALLOON 5
/* The feature bitmap for virtio balloon */
#define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */
struct virtio_balloon_config
{
/* Number of pages host wants Guest to give up. */
__le32 num_pages;
/* Number of pages we've actually got in balloon. */
__le32 actual;
};
#endif /* _LINUX_VIRTIO_BALLOON_H */
...@@ -6,15 +6,19 @@ ...@@ -6,15 +6,19 @@
#define VIRTIO_ID_BLOCK 2 #define VIRTIO_ID_BLOCK 2
/* Feature bits */ /* Feature bits */
#define VIRTIO_CONFIG_BLK_F 0x40 #define VIRTIO_BLK_F_BARRIER 0 /* Does host support barriers? */
#define VIRTIO_BLK_F_BARRIER 1 /* Does host support barriers? */ #define VIRTIO_BLK_F_SIZE_MAX 1 /* Indicates maximum segment size */
#define VIRTIO_BLK_F_SEG_MAX 2 /* Indicates maximum # of segments */
/* The capacity (in 512-byte sectors). */ struct virtio_blk_config
#define VIRTIO_CONFIG_BLK_F_CAPACITY 0x41 {
/* The maximum segment size. */ /* The capacity (in 512-byte sectors). */
#define VIRTIO_CONFIG_BLK_F_SIZE_MAX 0x42 __le64 capacity;
/* The maximum number of segments. */ /* The maximum segment size (if VIRTIO_BLK_F_SIZE_MAX) */
#define VIRTIO_CONFIG_BLK_F_SEG_MAX 0x43 __le32 size_max;
/* The maximum number of segments (if VIRTIO_BLK_F_SEG_MAX) */
__le32 seg_max;
} __attribute__((packed));
/* These two define direction. */ /* These two define direction. */
#define VIRTIO_BLK_T_IN 0 #define VIRTIO_BLK_T_IN 0
...@@ -35,8 +39,6 @@ struct virtio_blk_outhdr ...@@ -35,8 +39,6 @@ struct virtio_blk_outhdr
__u32 ioprio; __u32 ioprio;
/* Sector (ie. 512 byte offset) */ /* Sector (ie. 512 byte offset) */
__u64 sector; __u64 sector;
/* Where to put reply. */
__u64 id;
}; };
#define VIRTIO_BLK_S_OK 0 #define VIRTIO_BLK_S_OK 0
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* store and access that space differently. */ * store and access that space differently. */
#include <linux/types.h> #include <linux/types.h>
/* Status byte for guest to report progress, and synchronize config. */ /* Status byte for guest to report progress, and synchronize features. */
/* We have seen device and processed generic fields (VIRTIO_CONFIG_F_VIRTIO) */ /* We have seen device and processed generic fields (VIRTIO_CONFIG_F_VIRTIO) */
#define VIRTIO_CONFIG_S_ACKNOWLEDGE 1 #define VIRTIO_CONFIG_S_ACKNOWLEDGE 1
/* We have found a driver for the device. */ /* We have found a driver for the device. */
...@@ -15,34 +15,27 @@ ...@@ -15,34 +15,27 @@
/* We've given up on this device. */ /* We've given up on this device. */
#define VIRTIO_CONFIG_S_FAILED 0x80 #define VIRTIO_CONFIG_S_FAILED 0x80
/* Feature byte (actually 7 bits availabe): */
/* Requirements/features of the virtio implementation. */
#define VIRTIO_CONFIG_F_VIRTIO 1
/* Requirements/features of the virtqueue (may have more than one). */
#define VIRTIO_CONFIG_F_VIRTQUEUE 2
#ifdef __KERNEL__ #ifdef __KERNEL__
struct virtio_device; struct virtio_device;
/** /**
* virtio_config_ops - operations for configuring a virtio device * virtio_config_ops - operations for configuring a virtio device
* @find: search for the next configuration field of the given type. * @feature: search for a feature in this config
* vdev: the virtio_device * vdev: the virtio_device
* type: the feature type * bit: the feature bit
* len: the (returned) length of the field if found. * Returns true if the feature is supported. Acknowledges the feature
* Returns a token if found, or NULL. Never returnes the same field twice * so the host can see it.
* (ie. it's used up). * @get: read the value of a configuration field
* @get: read the value of a configuration field after find().
* vdev: the virtio_device * vdev: the virtio_device
* token: the token returned from find(). * offset: the offset of the configuration field
* buf: the buffer to write the field value into. * buf: the buffer to write the field value into.
* len: the length of the buffer (given by find()). * len: the length of the buffer
* Note that contents are conventionally little-endian. * Note that contents are conventionally little-endian.
* @set: write the value of a configuration field after find(). * @set: write the value of a configuration field
* vdev: the virtio_device * vdev: the virtio_device
* token: the token returned from find(). * offset: the offset of the configuration field
* buf: the buffer to read the field value from. * buf: the buffer to read the field value from.
* len: the length of the buffer (given by find()). * len: the length of the buffer
* Note that contents are conventionally little-endian. * Note that contents are conventionally little-endian.
* @get_status: read the status byte * @get_status: read the status byte
* vdev: the virtio_device * vdev: the virtio_device
...@@ -50,62 +43,67 @@ struct virtio_device; ...@@ -50,62 +43,67 @@ struct virtio_device;
* @set_status: write the status byte * @set_status: write the status byte
* vdev: the virtio_device * vdev: the virtio_device
* status: the new status byte * status: the new status byte
* @find_vq: find the first VIRTIO_CONFIG_F_VIRTQUEUE and create a virtqueue. * @reset: reset the device
* vdev: the virtio device
* After this, status and feature negotiation must be done again
* @find_vq: find a virtqueue and instantiate it.
* vdev: the virtio_device * vdev: the virtio_device
* index: the 0-based virtqueue number in case there's more than one.
* callback: the virqtueue callback * callback: the virqtueue callback
* Returns the new virtqueue or ERR_PTR(). * Returns the new virtqueue or ERR_PTR() (eg. -ENOENT).
* @del_vq: free a virtqueue found by find_vq(). * @del_vq: free a virtqueue found by find_vq().
*/ */
struct virtio_config_ops struct virtio_config_ops
{ {
void *(*find)(struct virtio_device *vdev, u8 type, unsigned *len); bool (*feature)(struct virtio_device *vdev, unsigned bit);
void (*get)(struct virtio_device *vdev, void *token, void (*get)(struct virtio_device *vdev, unsigned offset,
void *buf, unsigned len); void *buf, unsigned len);
void (*set)(struct virtio_device *vdev, void *token, void (*set)(struct virtio_device *vdev, unsigned offset,
const void *buf, unsigned len); const void *buf, unsigned len);
u8 (*get_status)(struct virtio_device *vdev); u8 (*get_status)(struct virtio_device *vdev);
void (*set_status)(struct virtio_device *vdev, u8 status); void (*set_status)(struct virtio_device *vdev, u8 status);
void (*reset)(struct virtio_device *vdev);
struct virtqueue *(*find_vq)(struct virtio_device *vdev, struct virtqueue *(*find_vq)(struct virtio_device *vdev,
bool (*callback)(struct virtqueue *)); unsigned index,
void (*callback)(struct virtqueue *));
void (*del_vq)(struct virtqueue *vq); void (*del_vq)(struct virtqueue *vq);
}; };
/** /**
* virtio_config_val - get a single virtio config and mark it used. * virtio_config_val - look for a feature and get a single virtio config.
* @config: the virtio config space * @vdev: the virtio device
* @type: the type to search for. * @fbit: the feature bit
* @offset: the type to search for.
* @val: a pointer to the value to fill in. * @val: a pointer to the value to fill in.
* *
* Once used, the config type is marked with VIRTIO_CONFIG_F_USED so it can't * The return value is -ENOENT if the feature doesn't exist. Otherwise
* be found again. This version does endian conversion. */ * the value is endian-corrected and returned in v. */
#define virtio_config_val(vdev, type, v) ({ \ #define virtio_config_val(vdev, fbit, offset, v) ({ \
int _err = __virtio_config_val((vdev),(type),(v),sizeof(*(v))); \ int _err; \
\ if ((vdev)->config->feature((vdev), (fbit))) { \
BUILD_BUG_ON(sizeof(*(v)) != 1 && sizeof(*(v)) != 2 \ __virtio_config_val((vdev), (offset), (v)); \
&& sizeof(*(v)) != 4 && sizeof(*(v)) != 8); \ _err = 0; \
if (!_err) { \ } else \
switch (sizeof(*(v))) { \ _err = -ENOENT; \
case 2: le16_to_cpus((__u16 *) v); break; \
case 4: le32_to_cpus((__u32 *) v); break; \
case 8: le64_to_cpus((__u64 *) v); break; \
} \
} \
_err; \ _err; \
}) })
int __virtio_config_val(struct virtio_device *dev,
u8 type, void *val, size_t size);
/** /**
* virtio_use_bit - helper to use a feature bit in a bitfield value. * __virtio_config_val - get a single virtio config without feature check.
* @dev: the virtio device * @vdev: the virtio device
* @token: the token as returned from vdev->config->find(). * @offset: the type to search for.
* @len: the length of the field. * @val: a pointer to the value to fill in.
* @bitnum: the bit to test.
* *
* If handed a NULL token, it returns false, otherwise returns bit status. * The value is endian-corrected and returned in v. */
* If it's one, it sets the mirroring acknowledgement bit. */ #define __virtio_config_val(vdev, offset, v) do { \
int virtio_use_bit(struct virtio_device *vdev, BUILD_BUG_ON(sizeof(*(v)) != 1 && sizeof(*(v)) != 2 \
void *token, unsigned int len, unsigned int bitnum); && sizeof(*(v)) != 4 && sizeof(*(v)) != 8); \
(vdev)->config->get((vdev), (offset), (v), sizeof(*(v))); \
switch (sizeof(*(v))) { \
case 2: le16_to_cpus((__u16 *) v); break; \
case 4: le32_to_cpus((__u32 *) v); break; \
case 8: le64_to_cpus((__u64 *) v); break; \
} \
} while(0)
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _LINUX_VIRTIO_CONFIG_H */ #endif /* _LINUX_VIRTIO_CONFIG_H */
...@@ -5,32 +5,32 @@ ...@@ -5,32 +5,32 @@
/* The ID for virtio_net */ /* The ID for virtio_net */
#define VIRTIO_ID_NET 1 #define VIRTIO_ID_NET 1
/* The bitmap of config for virtio net */ /* The feature bitmap for virtio net */
#define VIRTIO_CONFIG_NET_F 0x40 #define VIRTIO_NET_F_CSUM 0 /* Can handle pkts w/ partial csum */
#define VIRTIO_NET_F_NO_CSUM 0 #define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */
#define VIRTIO_NET_F_TSO4 1 #define VIRTIO_NET_F_GSO 6 /* Can handle pkts w/ any GSO type */
#define VIRTIO_NET_F_UFO 2
#define VIRTIO_NET_F_TSO4_ECN 3
#define VIRTIO_NET_F_TSO6 4
/* The config defining mac address. */ struct virtio_net_config
#define VIRTIO_CONFIG_NET_MAC_F 0x41 {
/* The config defining mac address (if VIRTIO_NET_F_MAC) */
__u8 mac[6];
} __attribute__((packed));
/* This is the first element of the scatter-gather list. If you don't /* This is the first element of the scatter-gather list. If you don't
* specify GSO or CSUM features, you can simply ignore the header. */ * specify GSO or CSUM features, you can simply ignore the header. */
struct virtio_net_hdr struct virtio_net_hdr
{ {
#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 // Use csum_start, csum_offset #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 // Use csum_start, csum_offset
__u8 flags; __u8 flags;
#define VIRTIO_NET_HDR_GSO_NONE 0 // Not a GSO frame #define VIRTIO_NET_HDR_GSO_NONE 0 // Not a GSO frame
#define VIRTIO_NET_HDR_GSO_TCPV4 1 // GSO frame, IPv4 TCP (TSO) #define VIRTIO_NET_HDR_GSO_TCPV4 1 // GSO frame, IPv4 TCP (TSO)
/* FIXME: Do we need this? If they said they can handle ECN, do they care? */
#define VIRTIO_NET_HDR_GSO_TCPV4_ECN 2 // GSO frame, IPv4 TCP w/ ECN
#define VIRTIO_NET_HDR_GSO_UDP 3 // GSO frame, IPv4 UDP (UFO) #define VIRTIO_NET_HDR_GSO_UDP 3 // GSO frame, IPv4 UDP (UFO)
#define VIRTIO_NET_HDR_GSO_TCPV6 4 // GSO frame, IPv6 TCP #define VIRTIO_NET_HDR_GSO_TCPV6 4 // GSO frame, IPv6 TCP
__u8 gso_type; #define VIRTIO_NET_HDR_GSO_ECN 0x80 // TCP has ECN set
__u16 gso_size; __u8 gso_type;
__u16 csum_start; __u16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */
__u16 csum_offset; __u16 gso_size; /* Bytes to append to gso_hdr_len per frame */
__u16 csum_start; /* Position to start checksumming from */
__u16 csum_offset; /* Offset after that to place checksum */
}; };
#endif /* _LINUX_VIRTIO_NET_H */ #endif /* _LINUX_VIRTIO_NET_H */
/*
* Virtio PCI driver
*
* This module allows virtio devices to be used over a virtual PCI device.
* This can be used with QEMU based VMMs like KVM or Xen.
*
* Copyright IBM Corp. 2007
*
* Authors:
* Anthony Liguori <aliguori@us.ibm.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*
*/
#ifndef _LINUX_VIRTIO_PCI_H
#define _LINUX_VIRTIO_PCI_H
#include <linux/virtio_config.h>
/* A 32-bit r/o bitmask of the features supported by the host */
#define VIRTIO_PCI_HOST_FEATURES 0
/* A 32-bit r/w bitmask of features activated by the guest */
#define VIRTIO_PCI_GUEST_FEATURES 4
/* A 32-bit r/w PFN for the currently selected queue */
#define VIRTIO_PCI_QUEUE_PFN 8
/* A 16-bit r/o queue size for the currently selected queue */
#define VIRTIO_PCI_QUEUE_NUM 12
/* A 16-bit r/w queue selector */
#define VIRTIO_PCI_QUEUE_SEL 14
/* A 16-bit r/w queue notifier */
#define VIRTIO_PCI_QUEUE_NOTIFY 16
/* An 8-bit device status register. */
#define VIRTIO_PCI_STATUS 18
/* An 8-bit r/o interrupt status register. Reading the value will return the
* current contents of the ISR and will also clear it. This is effectively
* a read-and-acknowledge. */
#define VIRTIO_PCI_ISR 19
/* The bit of the ISR which indicates a device configuration change. */
#define VIRTIO_PCI_ISR_CONFIG 0x2
/* The remaining space is defined by each driver as the per-driver
* configuration space */
#define VIRTIO_PCI_CONFIG 20
/* Virtio ABI version, this must match exactly */
#define VIRTIO_PCI_ABI_VERSION 0
#endif
...@@ -15,9 +15,13 @@ ...@@ -15,9 +15,13 @@
/* This marks a buffer as write-only (otherwise read-only). */ /* This marks a buffer as write-only (otherwise read-only). */
#define VRING_DESC_F_WRITE 2 #define VRING_DESC_F_WRITE 2
/* This means don't notify other side when buffer added. */ /* The Host uses this in used->flags to advise the Guest: don't kick me when
* you add a buffer. It's unreliable, so it's simply an optimization. Guest
* will still kick if it's out of buffers. */
#define VRING_USED_F_NO_NOTIFY 1 #define VRING_USED_F_NO_NOTIFY 1
/* This means don't interrupt guest when buffer consumed. */ /* The Guest uses this in avail->flags to advise the Host: don't interrupt me
* when you consume a buffer. It's unreliable, so it's simply an
* optimization. */
#define VRING_AVAIL_F_NO_INTERRUPT 1 #define VRING_AVAIL_F_NO_INTERRUPT 1
/* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ /* Virtio ring descriptors: 16 bytes. These can chain together via "next". */
...@@ -89,7 +93,7 @@ struct vring { ...@@ -89,7 +93,7 @@ struct vring {
* }; * };
*/ */
static inline void vring_init(struct vring *vr, unsigned int num, void *p, static inline void vring_init(struct vring *vr, unsigned int num, void *p,
unsigned int pagesize) unsigned long pagesize)
{ {
vr->num = num; vr->num = num;
vr->desc = p; vr->desc = p;
...@@ -98,7 +102,7 @@ static inline void vring_init(struct vring *vr, unsigned int num, void *p, ...@@ -98,7 +102,7 @@ static inline void vring_init(struct vring *vr, unsigned int num, void *p,
& ~(pagesize - 1)); & ~(pagesize - 1));
} }
static inline unsigned vring_size(unsigned int num, unsigned int pagesize) static inline unsigned vring_size(unsigned int num, unsigned long pagesize)
{ {
return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num) return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num)
+ pagesize - 1) & ~(pagesize - 1)) + pagesize - 1) & ~(pagesize - 1))
...@@ -114,7 +118,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, ...@@ -114,7 +118,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
struct virtio_device *vdev, struct virtio_device *vdev,
void *pages, void *pages,
void (*notify)(struct virtqueue *vq), void (*notify)(struct virtqueue *vq),
bool (*callback)(struct virtqueue *vq)); void (*callback)(struct virtqueue *vq));
void vring_del_virtqueue(struct virtqueue *vq); void vring_del_virtqueue(struct virtqueue *vq);
irqreturn_t vring_interrupt(int irq, void *_vq); irqreturn_t vring_interrupt(int irq, void *_vq);
......
...@@ -199,14 +199,12 @@ static void p9_virtio_close(struct p9_trans *trans) ...@@ -199,14 +199,12 @@ static void p9_virtio_close(struct p9_trans *trans)
kfree(trans); kfree(trans);
} }
static bool p9_virtio_intr(struct virtqueue *q) static void p9_virtio_intr(struct virtqueue *q)
{ {
struct virtio_chan *chan = q->vdev->priv; struct virtio_chan *chan = q->vdev->priv;
P9_DPRINTK(P9_DEBUG_TRANS, "9p poll_wakeup: %p\n", &chan->wq); P9_DPRINTK(P9_DEBUG_TRANS, "9p poll_wakeup: %p\n", &chan->wq);
wake_up_interruptible(&chan->wq); wake_up_interruptible(&chan->wq);
return true;
} }
static int p9_virtio_probe(struct virtio_device *dev) static int p9_virtio_probe(struct virtio_device *dev)
...@@ -236,13 +234,13 @@ static int p9_virtio_probe(struct virtio_device *dev) ...@@ -236,13 +234,13 @@ static int p9_virtio_probe(struct virtio_device *dev)
/* Find the input queue. */ /* Find the input queue. */
dev->priv = chan; dev->priv = chan;
chan->in_vq = dev->config->find_vq(dev, p9_virtio_intr); chan->in_vq = dev->config->find_vq(dev, 0, p9_virtio_intr);
if (IS_ERR(chan->in_vq)) { if (IS_ERR(chan->in_vq)) {
err = PTR_ERR(chan->in_vq); err = PTR_ERR(chan->in_vq);
goto free; goto free;
} }
chan->out_vq = dev->config->find_vq(dev, NULL); chan->out_vq = dev->config->find_vq(dev, 1, NULL);
if (IS_ERR(chan->out_vq)) { if (IS_ERR(chan->out_vq)) {
err = PTR_ERR(chan->out_vq); err = PTR_ERR(chan->out_vq);
goto free_in_vq; goto free_in_vq;
......
...@@ -2461,6 +2461,34 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) ...@@ -2461,6 +2461,34 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
return elt; return elt;
} }
/**
* skb_partial_csum_set - set up and verify partial csum values for packet
* @skb: the skb to set
* @start: the number of bytes after skb->data to start checksumming.
* @off: the offset from start to place the checksum.
*
* For untrusted partially-checksummed packets, we need to make sure the values
* for skb->csum_start and skb->csum_offset are valid so we don't oops.
*
* This function checks and sets those values and skb->ip_summed: if this
* returns false you should drop the packet.
*/
bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
{
if (unlikely(start > skb->len - 2) ||
unlikely((int)start + off > skb->len - 2)) {
if (net_ratelimit())
printk(KERN_WARNING
"bad partial csum: csum=%u/%u len=%u\n",
start, off, skb->len);
return false;
}
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_headroom(skb) + start;
skb->csum_offset = off;
return true;
}
EXPORT_SYMBOL(___pskb_trim); EXPORT_SYMBOL(___pskb_trim);
EXPORT_SYMBOL(__kfree_skb); EXPORT_SYMBOL(__kfree_skb);
EXPORT_SYMBOL(kfree_skb); EXPORT_SYMBOL(kfree_skb);
...@@ -2497,3 +2525,4 @@ EXPORT_SYMBOL(skb_append_datato_frags); ...@@ -2497,3 +2525,4 @@ EXPORT_SYMBOL(skb_append_datato_frags);
EXPORT_SYMBOL_GPL(skb_to_sgvec); EXPORT_SYMBOL_GPL(skb_to_sgvec);
EXPORT_SYMBOL_GPL(skb_cow_data); EXPORT_SYMBOL_GPL(skb_cow_data);
EXPORT_SYMBOL_GPL(skb_partial_csum_set);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment