From ca45aaae1ef98890ac4e3ee48d65aa22401fd1dc Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 1 Mar 2007 19:21:03 +0200 Subject: KVM: Unset kvm_arch_ops if arch module loading failed Otherwise, the core module thinks the arch module is loaded, and won't let you reload it after you've fixed the bug. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index a163bca3897..dc7a8c78cbf 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -2464,7 +2464,7 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) r = kvm_arch_ops->hardware_setup(); if (r < 0) - return r; + goto out; on_each_cpu(kvm_arch_ops->hardware_enable, NULL, 0, 1); r = register_cpu_notifier(&kvm_cpu_notifier); @@ -2500,6 +2500,8 @@ out_free_2: out_free_1: on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1); kvm_arch_ops->hardware_unsetup(); +out: + kvm_arch_ops = NULL; return r; } -- cgit v1.2.3 From f5b42c3324494ea3f9bf795e2a7e4d3cbb06c607 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 6 Mar 2007 12:05:53 +0200 Subject: KVM: Fix guest sysenter on vmx The vmx code currently treats the guest's sysenter support msrs as 32-bit values, which breaks 32-bit compat mode userspace on 64-bit guests. Fix by using the native word width of the machine. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index c07178e6112..bfa0ce42ea9 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -371,10 +371,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) data = vmcs_read32(GUEST_SYSENTER_CS); break; case MSR_IA32_SYSENTER_EIP: - data = vmcs_read32(GUEST_SYSENTER_EIP); + data = vmcs_readl(GUEST_SYSENTER_EIP); break; case MSR_IA32_SYSENTER_ESP: - data = vmcs_read32(GUEST_SYSENTER_ESP); + data = vmcs_readl(GUEST_SYSENTER_ESP); break; default: msr = find_msr_entry(vcpu, msr_index); @@ -412,10 +412,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) vmcs_write32(GUEST_SYSENTER_CS, data); break; case MSR_IA32_SYSENTER_EIP: - vmcs_write32(GUEST_SYSENTER_EIP, data); + vmcs_writel(GUEST_SYSENTER_EIP, data); break; case MSR_IA32_SYSENTER_ESP: - vmcs_write32(GUEST_SYSENTER_ESP, data); + vmcs_writel(GUEST_SYSENTER_ESP, data); break; case MSR_IA32_TIME_STAMP_COUNTER: guest_write_tsc(data); -- cgit v1.2.3 From ac1b714e78c8f0b252f8d8872e6ce6f898a123b3 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 8 Mar 2007 17:13:32 +0200 Subject: KVM: MMU: Fix guest writes to nonpae pde KVM shadow page tables are always in pae mode, regardless of the guest setting. This means that a guest pde (mapping 4MB of memory) is mapped to two shadow pdes (mapping 2MB each). When the guest writes to a pte or pde, we intercept the write and emulate it. We also remove any shadowed mappings corresponding to the write. Since the mmu did not account for the doubling in the number of pdes, it removed the wrong entry, resulting in a mismatch between shadow page tables and guest page tables, followed shortly by guest memory corruption. This patch fixes the problem by detecting the special case of writing to a non-pae pde and adjusting the address and number of shadow pdes zapped accordingly. Acked-by: Ingo Molnar Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 46 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index a1a93368f31..2cb48937be4 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -1093,22 +1093,40 @@ out: return r; } +static void mmu_pre_write_zap_pte(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *page, + u64 *spte) +{ + u64 pte; + struct kvm_mmu_page *child; + + pte = *spte; + if (is_present_pte(pte)) { + if (page->role.level == PT_PAGE_TABLE_LEVEL) + rmap_remove(vcpu, spte); + else { + child = page_header(pte & PT64_BASE_ADDR_MASK); + mmu_page_remove_parent_pte(vcpu, child, spte); + } + } + *spte = 0; +} + void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) { gfn_t gfn = gpa >> PAGE_SHIFT; struct kvm_mmu_page *page; - struct kvm_mmu_page *child; struct hlist_node *node, *n; struct hlist_head *bucket; unsigned index; u64 *spte; - u64 pte; unsigned offset = offset_in_page(gpa); unsigned pte_size; unsigned page_offset; unsigned misaligned; int level; int flooded = 0; + int npte; pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); if (gfn == vcpu->last_pt_write_gfn) { @@ -1144,22 +1162,26 @@ void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) } page_offset = offset; level = page->role.level; + npte = 1; if (page->role.glevels == PT32_ROOT_LEVEL) { - page_offset <<= 1; /* 32->64 */ + page_offset <<= 1; /* 32->64 */ + /* + * A 32-bit pde maps 4MB while the shadow pdes map + * only 2MB. So we need to double the offset again + * and zap two pdes instead of one. + */ + if (level == PT32_ROOT_LEVEL) { + page_offset <<= 1; + npte = 2; + } page_offset &= ~PAGE_MASK; } spte = __va(page->page_hpa); spte += page_offset / sizeof(*spte); - pte = *spte; - if (is_present_pte(pte)) { - if (level == PT_PAGE_TABLE_LEVEL) - rmap_remove(vcpu, spte); - else { - child = page_header(pte & PT64_BASE_ADDR_MASK); - mmu_page_remove_parent_pte(vcpu, child, spte); - } + while (npte--) { + mmu_pre_write_zap_pte(vcpu, page, spte); + ++spte; } - *spte = 0; } } -- cgit v1.2.3 From 27aba76615eeb36af84118e8ea6d35ffa51fd1e3 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 9 Mar 2007 13:04:31 +0200 Subject: KVM: MMU: Fix host memory corruption on i386 with >= 4GB ram PAGE_MASK is an unsigned long, so using it to mask physical addresses on i386 (which are 64-bit wide) leads to truncation. This can result in page->private of unrelated memory pages being modified, with disasterous results. Fix by not using PAGE_MASK for physical addresses; instead calculate the correct value directly from PAGE_SIZE. Also fix a similar BUG_ON(). Acked-by: Ingo Molnar Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 2cb48937be4..e85b4c7c36f 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -131,7 +131,7 @@ static int dbg = 1; (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1)) -#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & PAGE_MASK) +#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)) #define PT64_DIR_BASE_ADDR_MASK \ (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1)) @@ -406,8 +406,8 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) spte = desc->shadow_ptes[0]; } BUG_ON(!spte); - BUG_ON((*spte & PT64_BASE_ADDR_MASK) != - page_to_pfn(page) << PAGE_SHIFT); + BUG_ON((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT + != page_to_pfn(page)); BUG_ON(!(*spte & PT_PRESENT_MASK)); BUG_ON(!(*spte & PT_WRITABLE_MASK)); rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); -- cgit v1.2.3