[PATCH 1/1] Enble a guest to access a device's memory mapped I/O regions directly.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Ben-Ami Yassour <benami@xxxxxxxxxx>

Signed-off-by: Ben-Ami Yassour <benami@xxxxxxxxxx>
Signed-off-by: Muli Ben-Yehuda <muli@xxxxxxxxxx>
---
 arch/x86/kvm/mmu.c         |   59 +++++++++++++++++++++++++++++--------------
 arch/x86/kvm/paging_tmpl.h |   19 +++++++++----
 include/linux/kvm_host.h   |    2 +-
 virt/kvm/kvm_main.c        |   17 +++++++++++-
 4 files changed, 69 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 078a7f1..c89029d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -112,6 +112,8 @@ static int dbg = 1;
 #define PT_FIRST_AVAIL_BITS_SHIFT 9
 #define PT64_SECOND_AVAIL_BITS_SHIFT 52
 
+#define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
+
 #define VALID_PAGE(x) ((x) != INVALID_PAGE)
 
 #define PT64_LEVEL_BITS 9
@@ -237,6 +239,9 @@ static int is_dirty_pte(unsigned long pte)
 
 static int is_rmap_pte(u64 pte)
 {
+	if (pte & PT_SHADOW_IO_MARK)
+		return false;
+
 	return is_shadow_present_pte(pte);
 }
 
@@ -1034,7 +1039,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 			 unsigned pt_access, unsigned pte_access,
 			 int user_fault, int write_fault, int dirty,
 			 int *ptwrite, int largepage, gfn_t gfn,
-			 pfn_t pfn, bool speculative)
+			 pfn_t pfn, bool speculative,
+			 int direct_mmio)
 {
 	u64 spte;
 	int was_rmapped = 0;
@@ -1114,6 +1120,9 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		}
 	}
 
+	if (direct_mmio)
+		spte |= PT_SHADOW_IO_MARK;
+
 unshadowed:
 
 	if (pte_access & ACC_WRITE_MASK)
@@ -1129,16 +1138,19 @@ unshadowed:
 		++vcpu->kvm->stat.lpages;
 
 	page_header_update_slot(vcpu->kvm, shadow_pte, gfn);
-	if (!was_rmapped) {
-		rmap_add(vcpu, shadow_pte, gfn, largepage);
-		if (!is_rmap_pte(*shadow_pte))
-			kvm_release_pfn_clean(pfn);
-	} else {
-		if (was_writeble)
-			kvm_release_pfn_dirty(pfn);
-		else
-			kvm_release_pfn_clean(pfn);
+	if (!direct_mmio) {
+		if (!was_rmapped) {
+			rmap_add(vcpu, shadow_pte, gfn, largepage);
+			if (!is_rmap_pte(*shadow_pte))
+				kvm_release_pfn_clean(pfn);
+		} else {
+			if (was_writeble)
+				kvm_release_pfn_dirty(pfn);
+			else
+				kvm_release_pfn_clean(pfn);
+		}
 	}
+
 	if (!ptwrite || !*ptwrite)
 		vcpu->arch.last_pte_updated = shadow_pte;
 }
@@ -1149,7 +1161,7 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
 
 static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 			   int largepage, gfn_t gfn, pfn_t pfn,
-			   int level)
+			   int level, int direct_mmio)
 {
 	hpa_t table_addr = vcpu->arch.mmu.root_hpa;
 	int pt_write = 0;
@@ -1163,13 +1175,15 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 
 		if (level == 1) {
 			mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
-				     0, write, 1, &pt_write, 0, gfn, pfn, false);
+				     0, write, 1, &pt_write, 0, gfn, pfn,
+				     false, direct_mmio);
 			return pt_write;
 		}
 
 		if (largepage && level == 2) {
 			mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
-				     0, write, 1, &pt_write, 1, gfn, pfn, false);
+				     0, write, 1, &pt_write, 1, gfn, pfn,
+				     false, direct_mmio);
 			return pt_write;
 		}
 
@@ -1200,6 +1214,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 	int r;
 	int largepage = 0;
 	pfn_t pfn;
+	int direct_mmio = 0;
 
 	down_read(&current->mm->mmap_sem);
 	if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
@@ -1207,10 +1222,10 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 		largepage = 1;
 	}
 
-	pfn = gfn_to_pfn(vcpu->kvm, gfn);
+	pfn = gfn_to_pfn(vcpu->kvm, gfn, &direct_mmio);
 	up_read(&current->mm->mmap_sem);
 
-	/* mmio */
+	/* handle emulated mmio */
 	if (is_error_pfn(pfn)) {
 		kvm_release_pfn_clean(pfn);
 		return 1;
@@ -1219,7 +1234,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 	spin_lock(&vcpu->kvm->mmu_lock);
 	kvm_mmu_free_some_pages(vcpu);
 	r = __direct_map(vcpu, v, write, largepage, gfn, pfn,
-			 PT32E_ROOT_LEVEL);
+			 PT32E_ROOT_LEVEL, direct_mmio);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
 
@@ -1355,6 +1370,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
 	int r;
 	int largepage = 0;
 	gfn_t gfn = gpa >> PAGE_SHIFT;
+	int direct_mmio = 0;
 
 	ASSERT(vcpu);
 	ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
@@ -1368,7 +1384,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
 		gfn &= ~(KVM_PAGES_PER_HPAGE-1);
 		largepage = 1;
 	}
-	pfn = gfn_to_pfn(vcpu->kvm, gfn);
+	pfn = gfn_to_pfn(vcpu->kvm, gfn, &direct_mmio);
 	up_read(&current->mm->mmap_sem);
 	if (is_error_pfn(pfn)) {
 		kvm_release_pfn_clean(pfn);
@@ -1377,7 +1393,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
 	spin_lock(&vcpu->kvm->mmu_lock);
 	kvm_mmu_free_some_pages(vcpu);
 	r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
-			 largepage, gfn, pfn, TDP_ROOT_LEVEL);
+			 largepage, gfn, pfn, TDP_ROOT_LEVEL,
+			 direct_mmio);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
 	return r;
@@ -1643,6 +1660,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	int r;
 	u64 gpte = 0;
 	pfn_t pfn;
+	int direct_mmio = 0;
 
 	vcpu->arch.update_pte.largepage = 0;
 
@@ -1678,9 +1696,12 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 		gfn &= ~(KVM_PAGES_PER_HPAGE-1);
 		vcpu->arch.update_pte.largepage = 1;
 	}
-	pfn = gfn_to_pfn(vcpu->kvm, gfn);
+	pfn = gfn_to_pfn(vcpu->kvm, gfn, &direct_mmio);
 	up_read(&current->mm->mmap_sem);
 
+	if (direct_mmio)
+		return;
+
 	if (is_error_pfn(pfn)) {
 		kvm_release_pfn_clean(pfn);
 		return;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 156fe10..e85d8ae 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -264,9 +264,10 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
 	if (is_error_pfn(pfn))
 		return;
 	kvm_get_pfn(pfn);
+
 	mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
 		     gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
-		     pfn, true);
+		     pfn, true, false);
 }
 
 /*
@@ -275,7 +276,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
 static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 			 struct guest_walker *walker,
 			 int user_fault, int write_fault, int largepage,
-			 int *ptwrite, pfn_t pfn)
+			 int *ptwrite, pfn_t pfn, int direct_mmio)
 {
 	hpa_t shadow_addr;
 	int level;
@@ -349,11 +350,15 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 	mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access,
 		     user_fault, write_fault,
 		     walker->ptes[walker->level-1] & PT_DIRTY_MASK,
-		     ptwrite, largepage, walker->gfn, pfn, false);
+		     ptwrite, largepage, walker->gfn, pfn, false,
+		     direct_mmio);
 
 	return shadow_ent;
 }
 
+static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr);
+
+
 /*
  * Page fault handler.  There are several causes for a page fault:
  *   - there is no shadow pte for the guest pte
@@ -380,6 +385,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 	int r;
 	pfn_t pfn;
 	int largepage = 0;
+	int direct_mmio = 0;
 
 	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
 	kvm_mmu_audit(vcpu, "pre page fault");
@@ -413,10 +419,10 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 			largepage = 1;
 		}
 	}
-	pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
+	pfn = gfn_to_pfn(vcpu->kvm, walker.gfn, &direct_mmio);
 	up_read(&current->mm->mmap_sem);
 
-	/* mmio */
+	/* handle emulated mmio */
 	if (is_error_pfn(pfn)) {
 		pgprintk("gfn %x is mmio\n", walker.gfn);
 		kvm_release_pfn_clean(pfn);
@@ -426,7 +432,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 	spin_lock(&vcpu->kvm->mmu_lock);
 	kvm_mmu_free_some_pages(vcpu);
 	shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
-				  largepage, &write_pt, pfn);
+				  largepage, &write_pt, pfn,
+				  direct_mmio);
 
 	pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
 		 shadow_pte, *shadow_pte, write_pt);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 578c363..0910cc1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -173,7 +173,7 @@ void kvm_release_page_dirty(struct page *page);
 void kvm_set_page_dirty(struct page *page);
 void kvm_set_page_accessed(struct page *page);
 
-pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
+pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn, int *direct_mmio);
 void kvm_release_pfn_dirty(pfn_t);
 void kvm_release_pfn_clean(pfn_t pfn);
 void kvm_set_pfn_dirty(pfn_t pfn);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6a52c08..07b95f7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -526,20 +526,33 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 /*
  * Requires current->mm->mmap_sem to be held
  */
-pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
+pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn, int *direct_mmio)
 {
 	struct page *page[1];
 	unsigned long addr;
 	int npages;
+	struct vm_area_struct *vma;
 
 	might_sleep();
 
+	if (direct_mmio)
+		*direct_mmio = 0;
+
 	addr = gfn_to_hva(kvm, gfn);
 	if (kvm_is_error_hva(addr)) {
 		get_page(bad_page);
 		return page_to_pfn(bad_page);
 	}
 
+	/* handle mmio */
+	vma = find_vma(current->mm, addr);
+	if (vma->vm_flags & VM_IO) {
+		if (direct_mmio)
+			*direct_mmio = 1;
+
+		return ((addr - vma->vm_start) >> PAGE_SHIFT) +  vma->vm_pgoff;
+	}
+
 	npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page,
 				NULL);
 
@@ -555,7 +568,7 @@ EXPORT_SYMBOL_GPL(gfn_to_pfn);
 
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 {
-	return pfn_to_page(gfn_to_pfn(kvm, gfn));
+	return pfn_to_page(gfn_to_pfn(kvm, gfn, NULL));
 }
 
 EXPORT_SYMBOL_GPL(gfn_to_page);
-- 
1.5.4.5


-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
_______________________________________________
kvm-devel mailing list
kvm-devel@xxxxxxxxxxxxxxxxxxxxx
https://lists.sourceforge.net/lists/listinfo/kvm-devel

[Index of Archives]     [Netdev]     [Ethernet Bridging]     [Linux Virtualization]     [LVS Devel]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]     [Video 4 Linux]

  Powered by Linux