Re: dynamic virtio page size

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 2008-11-06 at 17:27 -0600, Hollis Blanchard wrote:
> 
> In the patch I sent, they are named VIRTIO_PFN_SHIFT and
> VRING_PAGE_SIZE. In fact, the first could really be named
> VIRTIO_PCI_PFN_SHIFT or even more specific, which hopefully would
> alleviate the confusion.
> 
> Anyways, I've been trying to implement your other idea, which was to
> advertise a "virtio page size" from the host to the guest. Since the
> virtio IO space is only 20 bytes long, and it's full, we need to offer a
> feature bit to notify the guest that the IO space has been expanded. 
> 
>        virtio's PCI IO resource
> ------------------------------------
> |    20 bytes    |                 |
> ------------------------------------
>    virtio-pci       virtio-<device>
>     specific          specific
>       data              data
> 
> If guests don't acknowledge the feature, the host pretends the
> VIRTIO_PAGE_SIZE register (which would be the new IO offset 20) doesn't
> exist, and reads to offset 20 instead return the device-specific config
> data.

FWIW, here's my current (broken) patch. Aside from being broken, it also
doesn't seem like this approach will scale well should we need to extend
the IO space again in the future.


diff --git a/qemu/hw/virtio.c b/qemu/hw/virtio.c
--- a/qemu/hw/virtio.c
+++ b/qemu/hw/virtio.c
@@ -45,7 +45,11 @@
  * a read-and-acknowledge. */
 #define VIRTIO_PCI_ISR			19
 
-#define VIRTIO_PCI_CONFIG		20
+/* A 32-bit r/o register specifying the size of a "page", in bytes, in the
+ * virtio interface. */
+#define VIRTIO_PCI_PAGESIZE		20
+
+#define VIRTIO_PCI_CONFIG		24
 
 /* Virtio ABI version, if we increment this, we break the guest driver. */
 #define VIRTIO_PCI_ABI_VERSION		0
@@ -55,6 +59,12 @@
  * KVM or if kqemu gets SMP support.
  */
 #define wmb() do { } while (0)
+
+#define VIRTIO_PAGE_SHIFT		12
+#define VIRTIO_PAGE_SIZE		(1<<VIRTIO_PAGE_SHIFT)
+#define VIRTIO_PAGE_MASK		(~(VIRTIO_PAGE_SIZE - 1))
+#define VIRTIO_PAGE_ALIGN(x)		(((x) + VIRTIO_PAGE_SIZE - 1) \
+					 & VIRTIO_PAGE_MASK)
 
 /* virt queue functions */
 
@@ -95,7 +105,7 @@ static void *virtio_map_gpa(target_phys_
 
 static size_t virtqueue_size(int num)
 {
-    return TARGET_PAGE_ALIGN((sizeof(VRingDesc) * num) +
+    return VIRTIO_PAGE_ALIGN((sizeof(VRingDesc) * num) +
 			     (sizeof(VRingAvail) + sizeof(uint16_t) * num)) +
 	(sizeof(VRingUsed) + sizeof(VRingUsedElem) * num);
 }
@@ -104,7 +114,7 @@ static void virtqueue_init(VirtQueue *vq
 {
     vq->vring.desc = p;
     vq->vring.avail = p + vq->vring.num * sizeof(VRingDesc);
-    vq->vring.used = (void *)TARGET_PAGE_ALIGN((unsigned long)&vq->vring.avail->ring[vq->vring.num]);
+    vq->vring.used = (void *)VIRTIO_PAGE_ALIGN((unsigned long)&vq->vring.avail->ring[vq->vring.num]);
 }
 
 static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i)
@@ -234,6 +244,9 @@ static uint32_t virtio_config_readb(void
 
     vdev->get_config(vdev, vdev->config);
 
+    if (!(vdev->features & VIRTIO_F_EXPLICIT_PAGE_SIZE))
+	addr += 4;
+
     addr -= vdev->addr + VIRTIO_PCI_CONFIG;
     if (addr > (vdev->config_len - sizeof(val)))
 	return (uint32_t)-1;
@@ -248,6 +261,9 @@ static uint32_t virtio_config_readw(void
     uint16_t val;
 
     vdev->get_config(vdev, vdev->config);
+
+    if (!(vdev->features & VIRTIO_F_EXPLICIT_PAGE_SIZE))
+	addr += 4;
 
     addr -= vdev->addr + VIRTIO_PCI_CONFIG;
     if (addr > (vdev->config_len - sizeof(val)))
@@ -264,6 +280,9 @@ static uint32_t virtio_config_readl(void
 
     vdev->get_config(vdev, vdev->config);
 
+    if (!(vdev->features & VIRTIO_F_EXPLICIT_PAGE_SIZE))
+	addr += 4;
+
     addr -= vdev->addr + VIRTIO_PCI_CONFIG;
     if (addr > (vdev->config_len - sizeof(val)))
 	return (uint32_t)-1;
@@ -276,6 +295,9 @@ static void virtio_config_writeb(void *o
 {
     VirtIODevice *vdev = opaque;
     uint8_t val = data;
+
+    if (!(vdev->features & VIRTIO_F_EXPLICIT_PAGE_SIZE))
+	addr += 4;
 
     addr -= vdev->addr + VIRTIO_PCI_CONFIG;
     if (addr > (vdev->config_len - sizeof(val)))
@@ -292,6 +314,9 @@ static void virtio_config_writew(void *o
     VirtIODevice *vdev = opaque;
     uint16_t val = data;
 
+    if (!(vdev->features & VIRTIO_F_EXPLICIT_PAGE_SIZE))
+	addr += 4;
+
     addr -= vdev->addr + VIRTIO_PCI_CONFIG;
     if (addr > (vdev->config_len - sizeof(val)))
 	return;
@@ -306,6 +331,9 @@ static void virtio_config_writel(void *o
 {
     VirtIODevice *vdev = opaque;
     uint32_t val = data;
+
+    if (!(vdev->features & VIRTIO_F_EXPLICIT_PAGE_SIZE))
+	addr += 4;
 
     addr -= vdev->addr + VIRTIO_PCI_CONFIG;
     if (addr > (vdev->config_len - sizeof(val)))
@@ -329,9 +357,10 @@ static void virtio_ioport_write(void *op
 	if (vdev->set_features)
 	    vdev->set_features(vdev, val);
 	vdev->features = val;
+	printf("%s: features 0x%x\n", __func__, val);
 	break;
     case VIRTIO_PCI_QUEUE_PFN:
-	pa = (ram_addr_t)val << TARGET_PAGE_BITS;
+	pa = (ram_addr_t)val << VIRTIO_PAGE_SHIFT;
 	vdev->vq[vdev->queue_sel].pfn = val;
 	if (pa == 0) {
             virtio_reset(vdev);
@@ -368,6 +397,7 @@ static uint32_t virtio_ioport_read(void 
     case VIRTIO_PCI_HOST_FEATURES:
 	ret = vdev->get_features(vdev);
 	ret |= (1 << VIRTIO_F_NOTIFY_ON_EMPTY);
+	ret |= (1 << VIRTIO_F_EXPLICIT_PAGE_SIZE);
 	break;
     case VIRTIO_PCI_GUEST_FEATURES:
 	ret = vdev->features;
@@ -390,6 +420,16 @@ static uint32_t virtio_ioport_read(void 
 	vdev->isr = 0;
 	virtio_update_irq(vdev);
 	break;
+    case VIRTIO_PCI_PAGESIZE:
+	if (vdev->features & VIRTIO_F_EXPLICIT_PAGE_SIZE) {
+	    /* Guest must have acknowledged this feature in order to enable
+	     * this IO port. */
+	    ret = VIRTIO_PAGE_SIZE;
+	} else {
+	    /* If it hasn't, return device config space instead. */
+	    ret = virtio_config_readl(vdev, addr);
+	}
+	break;
     default:
 	break;
     }
@@ -405,22 +445,24 @@ static void virtio_map(PCIDevice *pci_de
 
     vdev->addr = addr;
     for (i = 0; i < 3; i++) {
-	register_ioport_write(addr, 20, 1 << i, virtio_ioport_write, vdev);
-	register_ioport_read(addr, 20, 1 << i, virtio_ioport_read, vdev);
+	register_ioport_write(addr, VIRTIO_PCI_CONFIG, 1 << i,
+	                      virtio_ioport_write, vdev);
+	register_ioport_read(addr, VIRTIO_PCI_CONFIG, 1 << i,
+	                     virtio_ioport_read, vdev);
     }
 
     if (vdev->config_len) {
-	register_ioport_write(addr + 20, vdev->config_len, 1,
+	register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 1,
 			      virtio_config_writeb, vdev);
-	register_ioport_write(addr + 20, vdev->config_len, 2,
+	register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 2,
 			      virtio_config_writew, vdev);
-	register_ioport_write(addr + 20, vdev->config_len, 4,
+	register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 4,
 			      virtio_config_writel, vdev);
-	register_ioport_read(addr + 20, vdev->config_len, 1,
+	register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 1,
 			     virtio_config_readb, vdev);
-	register_ioport_read(addr + 20, vdev->config_len, 2,
+	register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 2,
 			     virtio_config_readw, vdev);
-	register_ioport_read(addr + 20, vdev->config_len, 4,
+	register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 4,
 			     virtio_config_readl, vdev);
 
 	vdev->get_config(vdev, vdev->config);
@@ -519,7 +561,7 @@ void virtio_load(VirtIODevice *vdev, QEM
 	    size_t size;
 	    target_phys_addr_t pa;
 
-	    pa = (ram_addr_t)vdev->vq[i].pfn << TARGET_PAGE_BITS;
+	    pa = (ram_addr_t)vdev->vq[i].pfn << VIRTIO_PAGE_SHIFT;
 	    size = virtqueue_size(vdev->vq[i].vring.num);
 	    virtqueue_init(&vdev->vq[i], virtio_map_gpa(pa, size));
 	}
diff --git a/qemu/hw/virtio.h b/qemu/hw/virtio.h
--- a/qemu/hw/virtio.h
+++ b/qemu/hw/virtio.h
@@ -33,6 +33,9 @@
 /* We notify when the ring is completely used, even if the guest is supressing
  * callbacks */
 #define VIRTIO_F_NOTIFY_ON_EMPTY        24
+
+/* Device advertises the size of a "page". */
+#define VIRTIO_F_EXPLICIT_PAGE_SIZE	28
 
 /* from Linux's linux/virtio_ring.h */
 


-- 
Hollis Blanchard
IBM Linux Technology Center

--
To unsubscribe from this list: send the line "unsubscribe kvm-ia64" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux KVM Devel]     [Linux Virtualization]     [Big List of Linux Books]     [Linux SCSI]     [Yosemite Forum]

  Powered by Linux