Google
  Web www.spinics.net

[PATCH 1/2][RFC][v2] kvm: Batch writes to MMIO

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]


This patch is the kernel part of the "batch writes to MMIO" patch.

It intoduces the ioctl interface to define MMIO zone it is allowed to delay.
Inside a zone, we can define sub-part we must not delay.

If an MMIO can be delayed, it is stored in a ring buffer which common for all VCPUs.

Signed-off-by: Laurent Vivier <Laurent.Vivier@xxxxxxxx>
---
 arch/x86/kvm/x86.c         |  172 ++++++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/kvm.h      |    7 ++
 include/asm-x86/kvm_host.h |   23 ++++++
 include/linux/kvm.h        |   16 ++++
 virt/kvm/kvm_main.c        |    3 +
 5 files changed, 221 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index dab3d4f..930986b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1518,6 +1518,103 @@ out:
 	return r;
 }
 
+static struct kvm_delayed_mmio_zone *kvm_mmio_find_zone(struct kvm *kvm,
+							u64 addr, u32 size)
+{
+	int i;
+	struct kvm_delayed_mmio_zone *zone;
+
+	for (i = 0; i < kvm->arch.nb_mmio_zones; i++) {
+		zone = &kvm->arch.mmio_zone[i];
+
+		/* (addr,size) is fully included in
+		 * (zone->addr, zone->size)
+		 */
+
+		if (zone->addr <= addr &&
+		    addr + size <= zone->addr + zone->size)
+			return zone;
+	}
+	return NULL;
+}
+
+static struct kvm_excluded_mmio_zone *
+kvm_mmio_find_excluded(struct kvm_delayed_mmio_zone *zone, u64 addr, u32 size)
+{
+	static struct kvm_excluded_mmio_zone *excluded;
+	int i;
+
+	addr -= zone->addr;
+	for (i = 0; i < zone->nb_excluded_zones; i++) {
+		excluded = &zone->excluded[i];
+
+		if ((excluded->offset <= addr &&
+		     addr < excluded->offset + excluded->size) ||
+		     (excluded->offset < addr + size &&
+		      addr + size <= excluded->offset +
+				    excluded->size))
+			return excluded;
+	}
+	return NULL;
+}
+
+static int kvm_is_delayed_mmio(struct kvm *kvm, u64 addr, u32 size)
+{
+	struct kvm_delayed_mmio_zone *zone;
+	struct kvm_excluded_mmio_zone *excluded;
+
+	zone = kvm_mmio_find_zone(kvm, addr, size);
+	if (zone == NULL)
+		return 0;	/* not a delayed MMIO address */
+
+	excluded = kvm_mmio_find_excluded(zone, addr, size);
+	return excluded == NULL;
+}
+
+static int kvm_vm_ioctl_set_mmio(struct kvm *kvm,
+				 struct kvm_mmio_zone *zone)
+{
+	struct kvm_delayed_mmio_zone *z;
+
+	if (zone->is_delayed &&
+	    kvm->arch.nb_mmio_zones >= KVM_MAX_DELAYED_MMIO_ZONE)
+		return -ENOMEM;
+
+	if (zone->is_delayed) {
+
+		/* already defined ? */
+
+		if (kvm_mmio_find_zone(kvm, zone->addr, 1) ||
+		    kvm_mmio_find_zone(kvm, zone->addr + zone->size - 1, 1))
+			return 0;
+
+		z = &kvm->arch.mmio_zone[kvm->arch.nb_mmio_zones];
+		z->addr = zone->addr;
+		z->size = zone->size;
+		kvm->arch.nb_mmio_zones++;
+		return 0;
+	}
+
+	/* exclude some parts of the delayed MMIO zone */
+
+	z = kvm_mmio_find_zone(kvm, zone->addr, zone->size);
+	if (z == NULL)
+		return -EINVAL;
+
+	if (z->nb_excluded_zones >= KVM_MAX_EXCLUDED_MMIO_ZONE)
+		return -ENOMEM;
+
+	if (kvm_mmio_find_excluded(z, zone->addr, 1) ||
+	    kvm_mmio_find_excluded(z, zone->addr + zone->size - 1, 1))
+		return 0;
+
+	z->excluded[z->nb_excluded_zones].offset = zone->addr - z->addr;
+	z->excluded[z->nb_excluded_zones].size = zone->size;
+	z->nb_excluded_zones++;
+
+	return 0;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
 		       unsigned int ioctl, unsigned long arg)
 {
@@ -1671,6 +1768,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_SET_MMIO: {
+		struct kvm_mmio_zone zone;
+		r = -EFAULT;
+		if (copy_from_user(&zone, argp, sizeof zone))
+			goto out;
+		r = -ENXIO;
+		r = kvm_vm_ioctl_set_mmio(kvm, &zone);
+		if (r)
+			goto out;
+		r = 0;
+		break;
+	}
 	default:
 		;
 	}
@@ -2706,6 +2815,52 @@ static void vapic_exit(struct kvm_vcpu *vcpu)
 	mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
 }
 
+static int batch_mmio(struct kvm_vcpu *vcpu)
+{
+	struct kvm_batch *batch = vcpu->kvm->arch.batch;
+	spinlock_t *lock = &vcpu->kvm->arch.batch_lock;
+	int next;
+
+	/* check if this MMIO can be delayed */
+
+	if (!kvm_is_delayed_mmio(vcpu->kvm,
+				 vcpu->mmio_phys_addr, vcpu->mmio_size))
+		return 0;
+
+	/* check if ring is full
+	 * we have no lock on "first"
+	 * as it can only increase we can only have
+	 * a false "full".
+	 */
+
+	spin_lock(lock);
+
+	/* last is the first free entry
+	 * check if we don't meet the first used entry
+	 * there is always one unused entry in the buffer
+	 */
+
+	next = (batch->last + 1) % KVM_MAX_BATCH;
+	if (next == batch->first) {
+		/* full */
+		spin_unlock(lock);
+		return 0;
+	}
+
+	/* batch it */
+
+	/* copy data in first free entry of the ring */
+
+	batch->mmio[batch->last].phys_addr = vcpu->mmio_phys_addr;
+	batch->mmio[batch->last].len = vcpu->mmio_size;
+	memcpy(batch->mmio[batch->last].data, vcpu->mmio_data, vcpu->mmio_size);
+	batch->last = next;
+
+	spin_unlock(lock);
+
+	return 1;
+}
+
 static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int r;
@@ -2857,6 +3012,11 @@ again:
 			goto again;
 	}
 
+	if (!r &&
+	    vcpu->mmio_is_write && kvm_run->exit_reason == KVM_EXIT_MMIO
+	    && !need_resched() && batch_mmio(vcpu))
+		goto again;
+
 out:
 	up_read(&vcpu->kvm->slots_lock);
 	if (r > 0) {
@@ -3856,12 +4016,22 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 struct  kvm *kvm_arch_create_vm(void)
 {
 	struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
+	struct page *page;
 
 	if (!kvm)
 		return ERR_PTR(-ENOMEM);
 
+	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!page) {
+		kfree(kvm);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
 
+	kvm->arch.batch_lock = __SPIN_LOCK_UNLOCKED(batch_lock);
+	kvm->arch.batch = (struct kvm_batch *)page_address(page);
+
 	return kvm;
 }
 
@@ -3902,6 +4072,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 		put_page(kvm->arch.apic_access_page);
 	if (kvm->arch.ept_identity_pagetable)
 		put_page(kvm->arch.ept_identity_pagetable);
+	if (kvm->arch.batch)
+		free_page((unsigned long)kvm->arch.batch);
 	kfree(kvm);
 }
 
diff --git a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h
index 6f18408..3c4a611 100644
--- a/include/asm-x86/kvm.h
+++ b/include/asm-x86/kvm.h
@@ -209,6 +209,13 @@ struct kvm_pit_state {
 	struct kvm_pit_channel_state channels[3];
 };
 
+struct kvm_mmio_zone {
+	__u8 is_delayed;
+	__u8 pad[3];
+	__u32 size;
+	__u64 addr;
+};
+
 #define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
 #define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
 #define KVM_TRC_PEND_INTR        (KVM_TRC_HANDLER + 0x04)
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 1466c3f..df42cdb 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -26,6 +26,7 @@
 #define KVM_PRIVATE_MEM_SLOTS 4
 
 #define KVM_PIO_PAGE_OFFSET 1
+#define KVM_MMIO_PAGE_OFFSET 2
 
 #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
 #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
@@ -293,6 +294,21 @@ struct kvm_mem_alias {
 	gfn_t target_gfn;
 };
 
+#define KVM_MAX_DELAYED_MMIO_ZONE 10
+#define KVM_MAX_EXCLUDED_MMIO_ZONE 10
+
+struct kvm_excluded_mmio_zone {
+	u32 offset;
+	u32 size;
+};
+
+struct kvm_delayed_mmio_zone {
+	u64 addr;
+	u32 size;
+	u32 nb_excluded_zones;
+	struct kvm_excluded_mmio_zone excluded[KVM_MAX_EXCLUDED_MMIO_ZONE];
+};
+
 struct kvm_arch{
 	int naliases;
 	struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
@@ -317,6 +333,13 @@ struct kvm_arch{
 
 	struct page *ept_identity_pagetable;
 	bool ept_identity_pagetable_done;
+
+	/* MMIO batch */
+
+	spinlock_t batch_lock;
+	struct kvm_batch *batch;
+	int nb_mmio_zones;
+	struct kvm_delayed_mmio_zone mmio_zone[KVM_MAX_DELAYED_MMIO_ZONE];
 };
 
 struct kvm_vm_stat {
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index a281afe..b57010d 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -173,6 +173,21 @@ struct kvm_run {
 	};
 };
 
+struct kvm_mmio {
+	__u64 phys_addr;
+	__u32 len;
+	__u32 pad;
+	__u8  data[8];
+};
+
+struct kvm_batch {
+	__u32 first, last;
+	struct kvm_mmio mmio[0];
+};
+
+#define KVM_MAX_BATCH ((PAGE_SIZE - sizeof(struct kvm_batch)) / \
+						sizeof(struct kvm_mmio))
+
 /* for KVM_TRANSLATE */
 struct kvm_translation {
 	/* in */
@@ -371,6 +386,7 @@ struct kvm_trace_rec {
 #define KVM_CREATE_PIT		  _IO(KVMIO,  0x64)
 #define KVM_GET_PIT		  _IOWR(KVMIO, 0x65, struct kvm_pit_state)
 #define KVM_SET_PIT		  _IOR(KVMIO,  0x66, struct kvm_pit_state)
+#define KVM_SET_MMIO		  _IOW(KVMIO,  0x67, struct kvm_mmio_zone)
 
 /*
  * ioctls for vcpu fds
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 64ed402..c8f1bdf 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -824,6 +824,8 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 #ifdef CONFIG_X86
 	else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
 		page = virt_to_page(vcpu->arch.pio_data);
+	else if (vmf->pgoff == KVM_MMIO_PAGE_OFFSET)
+		page = virt_to_page(vcpu->kvm->arch.batch);
 #endif
 	else
 		return VM_FAULT_SIGBUS;
@@ -1230,6 +1232,7 @@ static long kvm_dev_ioctl(struct file *filp,
 		r = PAGE_SIZE;     /* struct kvm_run */
 #ifdef CONFIG_X86
 		r += PAGE_SIZE;    /* pio data page */
+		r += PAGE_SIZE;    /* mmio batch page */
 #endif
 		break;
 	case KVM_TRACE_ENABLE:
-- 
1.5.2.4


-------------------------------------------------------------------------
 
 

_______________________________________________
kvm-devel mailing list
kvm-devel@xxxxxxxxxxxxxxxxxxxxx
https://lists.sourceforge.net/lists/listinfo/kvm-devel

[Site Home]     [Netdev]     [Ethernet Bridging]     [Linux Virtualization]     [LVS Devel]     [Linux Wireless]     [Kernel Newbies]     [Memory]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Rubini]     [100% Free Internet Dating]     [Photo]     [Yosemite]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]     [Video 4 Linux]     [Linux Resources]

Powered by Linux