KVM: MMU: Switch to mmu spinlock

Convert the synchronization of the shadow handling to a separate mmu_lock
spinlock.

Also guard fetch() by mmap_sem in read-mode to protect against alias
and memslot changes.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index c0b757b..834698d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -971,16 +971,12 @@
 {
 }
 
-static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
+static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write,
+			   gfn_t gfn, struct page *page)
 {
 	int level = PT32E_ROOT_LEVEL;
 	hpa_t table_addr = vcpu->arch.mmu.root_hpa;
 	int pt_write = 0;
-	struct page *page;
-
-	down_read(&current->mm->mmap_sem);
-	page = gfn_to_page(vcpu->kvm, gfn);
-	up_read(&current->mm->mmap_sem);
 
 	for (; ; level--) {
 		u32 index = PT64_INDEX(v, level);
@@ -1022,9 +1018,17 @@
 {
 	int r;
 
-	mutex_lock(&vcpu->kvm->lock);
-	r = __nonpaging_map(vcpu, v, write, gfn);
-	mutex_unlock(&vcpu->kvm->lock);
+	struct page *page;
+
+	down_read(&current->mm->mmap_sem);
+	page = gfn_to_page(vcpu->kvm, gfn);
+
+	spin_lock(&vcpu->kvm->mmu_lock);
+	r = __nonpaging_map(vcpu, v, write, gfn, page);
+	spin_unlock(&vcpu->kvm->mmu_lock);
+
+	up_read(&current->mm->mmap_sem);
+
 	return r;
 }
 
@@ -1045,7 +1049,7 @@
 
 	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
 		return;
-	mutex_lock(&vcpu->kvm->lock);
+	spin_lock(&vcpu->kvm->mmu_lock);
 #ifdef CONFIG_X86_64
 	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
 		hpa_t root = vcpu->arch.mmu.root_hpa;
@@ -1053,7 +1057,7 @@
 		sp = page_header(root);
 		--sp->root_count;
 		vcpu->arch.mmu.root_hpa = INVALID_PAGE;
-		mutex_unlock(&vcpu->kvm->lock);
+		spin_unlock(&vcpu->kvm->mmu_lock);
 		return;
 	}
 #endif
@@ -1067,7 +1071,7 @@
 		}
 		vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
 	}
-	mutex_unlock(&vcpu->kvm->lock);
+	spin_unlock(&vcpu->kvm->mmu_lock);
 	vcpu->arch.mmu.root_hpa = INVALID_PAGE;
 }
 
@@ -1270,9 +1274,9 @@
 	r = mmu_topup_memory_caches(vcpu);
 	if (r)
 		goto out;
-	mutex_lock(&vcpu->kvm->lock);
+	spin_lock(&vcpu->kvm->mmu_lock);
 	mmu_alloc_roots(vcpu);
-	mutex_unlock(&vcpu->kvm->lock);
+	spin_unlock(&vcpu->kvm->mmu_lock);
 	kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
 	kvm_mmu_flush_tlb(vcpu);
 out:
@@ -1408,7 +1412,7 @@
 
 	pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
 	mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes);
-	mutex_lock(&vcpu->kvm->lock);
+	spin_lock(&vcpu->kvm->mmu_lock);
 	++vcpu->kvm->stat.mmu_pte_write;
 	kvm_mmu_audit(vcpu, "pre pte write");
 	if (gfn == vcpu->arch.last_pt_write_gfn
@@ -1477,7 +1481,7 @@
 		}
 	}
 	kvm_mmu_audit(vcpu, "post pte write");
-	mutex_unlock(&vcpu->kvm->lock);
+	spin_unlock(&vcpu->kvm->mmu_lock);
 	if (vcpu->arch.update_pte.page) {
 		kvm_release_page_clean(vcpu->arch.update_pte.page);
 		vcpu->arch.update_pte.page = NULL;
@@ -1493,15 +1497,15 @@
 	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
 	up_read(&current->mm->mmap_sem);
 
-	mutex_lock(&vcpu->kvm->lock);
+	spin_lock(&vcpu->kvm->mmu_lock);
 	r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
-	mutex_unlock(&vcpu->kvm->lock);
+	spin_unlock(&vcpu->kvm->mmu_lock);
 	return r;
 }
 
 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
 {
-	mutex_lock(&vcpu->kvm->lock);
+	spin_lock(&vcpu->kvm->mmu_lock);
 	while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES) {
 		struct kvm_mmu_page *sp;
 
@@ -1510,7 +1514,7 @@
 		kvm_mmu_zap_page(vcpu->kvm, sp);
 		++vcpu->kvm->stat.mmu_recycled;
 	}
-	mutex_unlock(&vcpu->kvm->lock);
+	spin_unlock(&vcpu->kvm->mmu_lock);
 }
 
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
@@ -1642,10 +1646,10 @@
 {
 	struct kvm_mmu_page *sp, *node;
 
-	mutex_lock(&kvm->lock);
+	spin_lock(&kvm->mmu_lock);
 	list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link)
 		kvm_mmu_zap_page(kvm, sp);
-	mutex_unlock(&kvm->lock);
+	spin_unlock(&kvm->mmu_lock);
 
 	kvm_flush_remote_tlbs(kvm);
 }
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 3d7846b..a35b83a 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -387,7 +387,6 @@
 	 */
 	r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
 			     fetch_fault);
-	up_read(&current->mm->mmap_sem);
 
 	/*
 	 * The page is not mapped by the guest.  Let the guest handle it.
@@ -396,12 +395,13 @@
 		pgprintk("%s: guest page fault\n", __FUNCTION__);
 		inject_page_fault(vcpu, addr, walker.error_code);
 		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
+		up_read(&current->mm->mmap_sem);
 		return 0;
 	}
 
 	page = gfn_to_page(vcpu->kvm, walker.gfn);
 
-	mutex_lock(&vcpu->kvm->lock);
+	spin_lock(&vcpu->kvm->mmu_lock);
 	shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
 				  &write_pt, page);
 	pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__,
@@ -414,13 +414,15 @@
 	 * mmio: emulate if accessible, otherwise its a guest fault.
 	 */
 	if (shadow_pte && is_io_pte(*shadow_pte)) {
-		mutex_unlock(&vcpu->kvm->lock);
+		spin_unlock(&vcpu->kvm->mmu_lock);
+		up_read(&current->mm->mmap_sem);
 		return 1;
 	}
 
 	++vcpu->stat.pf_fixed;
 	kvm_mmu_audit(vcpu, "post page fault (fixed)");
-	mutex_unlock(&vcpu->kvm->lock);
+	spin_unlock(&vcpu->kvm->mmu_lock);
+	up_read(&current->mm->mmap_sem);
 
 	return write_pt;
 }
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c39493f..3d251f8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1477,7 +1477,6 @@
 	struct kvm_userspace_memory_region kvm_userspace_mem;
 	int r = 0;
 
-	mutex_lock(&kvm->lock);
 	down_write(&current->mm->mmap_sem);
 	if (kvm->arch.apic_access_page)
 		goto out;
@@ -1491,7 +1490,6 @@
 	kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
 out:
 	up_write(&current->mm->mmap_sem);
-	mutex_unlock(&kvm->lock);
 	return r;
 }