From e925c5ba9380dad5fdf1d0a9d9199ac43be74c6a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 30 Apr 2007 14:47:02 +0300 Subject: KVM: Assume that writes smaller than 4 bytes are to non-pagetable pages This allows us to remove write protection earlier than otherwise. Should some mad OS choose to use byte writes to update pagetables, it will suffer a performance hit, but still work correctly. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index e8e228118de..2277b7cd118 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -1169,6 +1169,7 @@ void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) continue; pte_size = page->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); + misaligned |= bytes < 4; if (misaligned || flooded) { /* * Misaligned accesses are too much trouble to fix -- cgit v1.2.3 From 09072daf37abbfe8e2d5018dd913f229c76190f7 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 1 May 2007 14:16:52 +0300 Subject: KVM: Unify kvm_mmu_pre_write() and kvm_mmu_post_write() Instead of calling two functions and repeating expensive checks, call one function and provide it with before/after information. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 2277b7cd118..b3a83ef2cf0 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -1118,7 +1118,7 @@ out: return r; } -static void mmu_pre_write_zap_pte(struct kvm_vcpu *vcpu, +static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, u64 *spte) { @@ -1137,7 +1137,8 @@ static void mmu_pre_write_zap_pte(struct kvm_vcpu *vcpu, *spte = 0; } -void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) +void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, + const u8 *old, const u8 *new, int bytes) { gfn_t gfn = gpa >> PAGE_SHIFT; struct kvm_mmu_page *page; @@ -1206,16 +1207,12 @@ void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) spte = __va(page->page_hpa); spte += page_offset / sizeof(*spte); while (npte--) { - mmu_pre_write_zap_pte(vcpu, page, spte); + mmu_pte_write_zap_pte(vcpu, page, spte); ++spte; } } } -void kvm_mmu_post_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) -{ -} - int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) { gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva); -- cgit v1.2.3 From fce0657ff9f14f6b1f147b5fcd6db2f54c06424e Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 1 May 2007 16:44:05 +0300 Subject: KVM: MMU: Respect nonpae pagetable quadrant when zapping ptes When a guest writes to a page that has an mmu shadow, we have to clear the shadow pte corresponding to the memory location touched by the guest. Now, in nonpae mode, a single guest page may have two or four shadow pages (because a nonpae page maps 4MB or 4GB, whereas the pae shadow maps 2MB or 1GB), so we when we look up the page we find up to three additional aliases for the page. Since we _clear_ the shadow pte, it doesn't matter except for a slight performance penalty, but if we want to _update_ the shadow pte instead of clearing it, it is vital that we don't modify the aliases. Fortunately, exactly which page is needed (the "quadrant") is easily computed, and is accessible in the shadow page header. All we need is to ignore shadow pages from the wrong quadrants. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index b3a83ef2cf0..23dc4612026 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -1150,6 +1150,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned pte_size; unsigned page_offset; unsigned misaligned; + unsigned quadrant; int level; int flooded = 0; int npte; @@ -1202,7 +1203,10 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, page_offset <<= 1; npte = 2; } + quadrant = page_offset >> PAGE_SHIFT; page_offset &= ~PAGE_MASK; + if (quadrant != page->role.quadrant) + continue; } spte = __va(page->page_hpa); spte += page_offset / sizeof(*spte); -- cgit v1.2.3 From 0028425f647b6b78a0de8810d6b782fc3ce6c272 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 1 May 2007 16:53:31 +0300 Subject: KVM: Update shadow pte on write to guest pte A typical demand page/copy on write pattern is: - page fault on vaddr - kvm propagates fault to guest - guest handles fault, updates pte - kvm traps write, clears shadow pte, resumes guest - guest returns to userspace, re-faults on same vaddr - kvm installs shadow pte, resumes guest - guest continues So, three vmexits for a single guest page fault. But if instead of clearing the page table entry, we update to correspond to the value that the guest has just written, we eliminate the third vmexit. This patch does exactly that, reducing kbuild time by about 10%. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 23dc4612026..9ec3df90dbb 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -1137,6 +1137,20 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, *spte = 0; } +static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *page, + u64 *spte, + const void *new, int bytes) +{ + if (page->role.level != PT_PAGE_TABLE_LEVEL) + return; + + if (page->role.glevels == PT32_ROOT_LEVEL) + paging32_update_pte(vcpu, page, spte, new, bytes); + else + paging64_update_pte(vcpu, page, spte, new, bytes); +} + void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *old, const u8 *new, int bytes) { @@ -1212,6 +1226,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, spte += page_offset / sizeof(*spte); while (npte--) { mmu_pte_write_zap_pte(vcpu, page, spte); + mmu_pte_write_new_pte(vcpu, page, spte, new, bytes); ++spte; } } -- cgit v1.2.3 From 4b02d6daa12465b209ec4f50c363f9553a51f45b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 6 May 2007 15:36:30 +0300 Subject: KVM: MMU: Simplify kvm_mmu_free_page() a tiny bit Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 9ec3df90dbb..a96c9ae54f3 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -455,12 +455,10 @@ static int is_empty_shadow_page(hpa_t page_hpa) } #endif -static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa) +static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *page_head) { - struct kvm_mmu_page *page_head = page_header(page_hpa); - - ASSERT(is_empty_shadow_page(page_hpa)); - page_head->page_hpa = page_hpa; + ASSERT(is_empty_shadow_page(page_head->page_hpa)); list_move(&page_head->link, &vcpu->free_pages); ++vcpu->kvm->n_free_mmu_pages; } @@ -690,7 +688,7 @@ static void kvm_mmu_zap_page(struct kvm_vcpu *vcpu, kvm_mmu_page_unlink_children(vcpu, page); if (!page->root_count) { hlist_del(&page->hash_link); - kvm_mmu_free_page(vcpu, page->page_hpa); + kvm_mmu_free_page(vcpu, page); } else list_move(&page->link, &vcpu->kvm->active_mmu_pages); } -- cgit v1.2.3 From 47ad8e689b4f94f9fc3b2588a7aaa65e4eca667c Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 6 May 2007 15:50:58 +0300 Subject: KVM: MMU: Store shadow page tables as kernel virtual addresses, not physical Simpifies things a bit. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index a96c9ae54f3..c85c6649280 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -439,13 +439,12 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) } #ifdef MMU_DEBUG -static int is_empty_shadow_page(hpa_t page_hpa) +static int is_empty_shadow_page(u64 *spt) { u64 *pos; u64 *end; - for (pos = __va(page_hpa), end = pos + PAGE_SIZE / sizeof(u64); - pos != end; pos++) + for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) if (*pos != 0) { printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__, pos, *pos); @@ -458,7 +457,7 @@ static int is_empty_shadow_page(hpa_t page_hpa) static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page_head) { - ASSERT(is_empty_shadow_page(page_head->page_hpa)); + ASSERT(is_empty_shadow_page(page_head->spt)); list_move(&page_head->link, &vcpu->free_pages); ++vcpu->kvm->n_free_mmu_pages; } @@ -478,7 +477,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link); list_move(&page->link, &vcpu->kvm->active_mmu_pages); - ASSERT(is_empty_shadow_page(page->page_hpa)); + ASSERT(is_empty_shadow_page(page->spt)); page->slot_bitmap = 0; page->multimapped = 0; page->parent_pte = parent_pte; @@ -636,7 +635,7 @@ static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu, u64 *pt; u64 ent; - pt = __va(page->page_hpa); + pt = page->spt; if (page->role.level == PT_PAGE_TABLE_LEVEL) { for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { @@ -803,7 +802,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) return -ENOMEM; } - table[index] = new_table->page_hpa | PT_PRESENT_MASK + table[index] = __pa(new_table->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; } table_addr = table[index] & PT64_BASE_ADDR_MASK; @@ -855,7 +854,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) ASSERT(!VALID_PAGE(root)); page = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, 0, 0, NULL); - root = page->page_hpa; + root = __pa(page->spt); ++page->root_count; vcpu->mmu.root_hpa = root; return; @@ -876,7 +875,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) page = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, !is_paging(vcpu), 0, NULL); - root = page->page_hpa; + root = __pa(page->spt); ++page->root_count; vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK; } @@ -1220,8 +1219,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, if (quadrant != page->role.quadrant) continue; } - spte = __va(page->page_hpa); - spte += page_offset / sizeof(*spte); + spte = &page->spt[page_offset / sizeof(*spte)]; while (npte--) { mmu_pte_write_zap_pte(vcpu, page, spte); mmu_pte_write_new_pte(vcpu, page, spte, new, bytes); @@ -1262,8 +1260,8 @@ static void free_mmu_pages(struct kvm_vcpu *vcpu) page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link); list_del(&page->link); - __free_page(pfn_to_page(page->page_hpa >> PAGE_SHIFT)); - page->page_hpa = INVALID_PAGE; + free_page((unsigned long)page->spt); + page->spt = NULL; } free_page((unsigned long)vcpu->mmu.pae_root); } @@ -1282,8 +1280,8 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) if ((page = alloc_page(GFP_KERNEL)) == NULL) goto error_1; set_page_private(page, (unsigned long)page_header); - page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT; - memset(__va(page_header->page_hpa), 0, PAGE_SIZE); + page_header->spt = page_address(page); + memset(page_header->spt, 0, PAGE_SIZE); list_add(&page_header->link, &vcpu->free_pages); ++vcpu->kvm->n_free_mmu_pages; } @@ -1346,7 +1344,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot) if (!test_bit(slot, &page->slot_bitmap)) continue; - pt = __va(page->page_hpa); + pt = page->spt; for (i = 0; i < PT64_ENT_PER_PAGE; ++i) /* avoid RMW */ if (pt[i] & PT_WRITABLE_MASK) { @@ -1497,7 +1495,7 @@ static int count_writable_mappings(struct kvm_vcpu *vcpu) int i; list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) { - u64 *pt = __va(page->page_hpa); + u64 *pt = page->spt; if (page->role.level != PT_PAGE_TABLE_LEVEL) continue; -- cgit v1.2.3 From d3d25b048b9c7e5c1c20918157a71df734f71766 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 30 May 2007 12:34:53 +0300 Subject: KVM: MMU: Use slab caches for shadow pages and their headers Use slab caches instead of a simple custom list. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 64 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 25 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index c85c6649280..46491b4cd85 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -165,6 +165,8 @@ struct kvm_rmap_desc { static struct kmem_cache *pte_chain_cache; static struct kmem_cache *rmap_desc_cache; +static struct kmem_cache *mmu_page_cache; +static struct kmem_cache *mmu_page_header_cache; static int is_write_protection(struct kvm_vcpu *vcpu) { @@ -235,6 +237,14 @@ static int __mmu_topup_memory_caches(struct kvm_vcpu *vcpu, gfp_t gfp_flags) goto out; r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache, rmap_desc_cache, 1, gfp_flags); + if (r) + goto out; + r = mmu_topup_memory_cache(&vcpu->mmu_page_cache, + mmu_page_cache, 4, gfp_flags); + if (r) + goto out; + r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache, + mmu_page_header_cache, 4, gfp_flags); out: return r; } @@ -258,6 +268,8 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) { mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache); mmu_free_memory_cache(&vcpu->mmu_rmap_desc_cache); + mmu_free_memory_cache(&vcpu->mmu_page_cache); + mmu_free_memory_cache(&vcpu->mmu_page_header_cache); } static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, @@ -458,7 +470,9 @@ static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page_head) { ASSERT(is_empty_shadow_page(page_head->spt)); - list_move(&page_head->link, &vcpu->free_pages); + list_del(&page_head->link); + mmu_memory_cache_free(&vcpu->mmu_page_cache, page_head->spt); + mmu_memory_cache_free(&vcpu->mmu_page_header_cache, page_head); ++vcpu->kvm->n_free_mmu_pages; } @@ -472,11 +486,14 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, { struct kvm_mmu_page *page; - if (list_empty(&vcpu->free_pages)) + if (!vcpu->kvm->n_free_mmu_pages) return NULL; - page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link); - list_move(&page->link, &vcpu->kvm->active_mmu_pages); + page = mmu_memory_cache_alloc(&vcpu->mmu_page_header_cache, + sizeof *page); + page->spt = mmu_memory_cache_alloc(&vcpu->mmu_page_cache, PAGE_SIZE); + set_page_private(virt_to_page(page->spt), (unsigned long)page); + list_add(&page->link, &vcpu->kvm->active_mmu_pages); ASSERT(is_empty_shadow_page(page->spt)); page->slot_bitmap = 0; page->multimapped = 0; @@ -1083,6 +1100,7 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu) ASSERT(vcpu); ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa)); + mmu_topup_memory_caches(vcpu); if (!is_paging(vcpu)) return nonpaging_init_context(vcpu); else if (is_long_mode(vcpu)) @@ -1256,13 +1274,6 @@ static void free_mmu_pages(struct kvm_vcpu *vcpu) struct kvm_mmu_page, link); kvm_mmu_zap_page(vcpu, page); } - while (!list_empty(&vcpu->free_pages)) { - page = list_entry(vcpu->free_pages.next, - struct kvm_mmu_page, link); - list_del(&page->link); - free_page((unsigned long)page->spt); - page->spt = NULL; - } free_page((unsigned long)vcpu->mmu.pae_root); } @@ -1273,18 +1284,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) ASSERT(vcpu); - for (i = 0; i < KVM_NUM_MMU_PAGES; i++) { - struct kvm_mmu_page *page_header = &vcpu->page_header_buf[i]; - - INIT_LIST_HEAD(&page_header->link); - if ((page = alloc_page(GFP_KERNEL)) == NULL) - goto error_1; - set_page_private(page, (unsigned long)page_header); - page_header->spt = page_address(page); - memset(page_header->spt, 0, PAGE_SIZE); - list_add(&page_header->link, &vcpu->free_pages); - ++vcpu->kvm->n_free_mmu_pages; - } + vcpu->kvm->n_free_mmu_pages = KVM_NUM_MMU_PAGES; /* * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64. @@ -1309,7 +1309,6 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu) { ASSERT(vcpu); ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa)); - ASSERT(list_empty(&vcpu->free_pages)); return alloc_mmu_pages(vcpu); } @@ -1318,7 +1317,6 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu) { ASSERT(vcpu); ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa)); - ASSERT(!list_empty(&vcpu->free_pages)); return init_kvm_mmu(vcpu); } @@ -1377,6 +1375,10 @@ void kvm_mmu_module_exit(void) kmem_cache_destroy(pte_chain_cache); if (rmap_desc_cache) kmem_cache_destroy(rmap_desc_cache); + if (mmu_page_cache) + kmem_cache_destroy(mmu_page_cache); + if (mmu_page_header_cache) + kmem_cache_destroy(mmu_page_header_cache); } int kvm_mmu_module_init(void) @@ -1392,6 +1394,18 @@ int kvm_mmu_module_init(void) if (!rmap_desc_cache) goto nomem; + mmu_page_cache = kmem_cache_create("kvm_mmu_page", + PAGE_SIZE, + PAGE_SIZE, 0, NULL, NULL); + if (!mmu_page_cache) + goto nomem; + + mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header", + sizeof(struct kvm_mmu_page), + 0, 0, NULL, NULL); + if (!mmu_page_header_cache) + goto nomem; + return 0; nomem: -- cgit v1.2.3 From e60d75ea292071e7ab33c10ca73fdd33fcbbe501 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 30 May 2007 19:31:17 +0300 Subject: KVM: MMU: Move set_pte_common() to pte width dependent code In preparation of some modifications. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 48 ------------------------------------------------ 1 file changed, 48 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 46491b4cd85..a7631502f22 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -965,54 +965,6 @@ static void paging_new_cr3(struct kvm_vcpu *vcpu) kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); } -static inline void set_pte_common(struct kvm_vcpu *vcpu, - u64 *shadow_pte, - gpa_t gaddr, - int dirty, - u64 access_bits, - gfn_t gfn) -{ - hpa_t paddr; - - *shadow_pte |= access_bits << PT_SHADOW_BITS_OFFSET; - if (!dirty) - access_bits &= ~PT_WRITABLE_MASK; - - paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK); - - *shadow_pte |= access_bits; - - if (is_error_hpa(paddr)) { - *shadow_pte |= gaddr; - *shadow_pte |= PT_SHADOW_IO_MARK; - *shadow_pte &= ~PT_PRESENT_MASK; - return; - } - - *shadow_pte |= paddr; - - if (access_bits & PT_WRITABLE_MASK) { - struct kvm_mmu_page *shadow; - - shadow = kvm_mmu_lookup_page(vcpu, gfn); - if (shadow) { - pgprintk("%s: found shadow page for %lx, marking ro\n", - __FUNCTION__, gfn); - access_bits &= ~PT_WRITABLE_MASK; - if (is_writeble_pte(*shadow_pte)) { - *shadow_pte &= ~PT_WRITABLE_MASK; - kvm_arch_ops->tlb_flush(vcpu); - } - } - } - - if (access_bits & PT_WRITABLE_MASK) - mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT); - - page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); - rmap_add(vcpu, shadow_pte); -} - static void inject_page_fault(struct kvm_vcpu *vcpu, u64 addr, u32 err_code) -- cgit v1.2.3 From 63b1ad24d2695db3ec1cc8b10760e130e1a1f04b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 31 May 2007 11:56:54 +0300 Subject: KVM: MMU: Fold fix_read_pf() into set_pte_common() Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index a7631502f22..2079d69f186 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -972,23 +972,6 @@ static void inject_page_fault(struct kvm_vcpu *vcpu, kvm_arch_ops->inject_page_fault(vcpu, addr, err_code); } -static inline int fix_read_pf(u64 *shadow_ent) -{ - if ((*shadow_ent & PT_SHADOW_USER_MASK) && - !(*shadow_ent & PT_USER_MASK)) { - /* - * If supervisor write protect is disabled, we shadow kernel - * pages as user pages so we can trap the write access. - */ - *shadow_ent |= PT_USER_MASK; - *shadow_ent &= ~PT_WRITABLE_MASK; - - return 1; - - } - return 0; -} - static void paging_free(struct kvm_vcpu *vcpu) { nonpaging_free(vcpu); -- cgit v1.2.3 From 97a0a01ea9229e4f3f0f06e0584227e9687159a5 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 31 May 2007 15:08:29 +0300 Subject: KVM: MMU: Fold fix_write_pf() into set_pte_common() This prevents some work from being performed twice, and, more importantly, reduces the number of places where we modify shadow ptes. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 2079d69f186..3cdbf687df2 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -731,6 +731,17 @@ static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn) return r; } +static void mmu_unshadow(struct kvm_vcpu *vcpu, gfn_t gfn) +{ + struct kvm_mmu_page *page; + + while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) { + pgprintk("%s: zap %lx %x\n", + __FUNCTION__, gfn, page->role.word); + kvm_mmu_zap_page(vcpu, page); + } +} + static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa) { int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT)); -- cgit v1.2.3 From e663ee64aefc57f7eff7325142206c4ea0200be8 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 31 May 2007 15:46:04 +0300 Subject: KVM: MMU: Make setting shadow ptes atomic on i386 Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 3cdbf687df2..f24b540148a 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "vmx.h" #include "kvm.h" @@ -204,6 +205,15 @@ static int is_rmap_pte(u64 pte) == (PT_WRITABLE_MASK | PT_PRESENT_MASK); } +static void set_shadow_pte(u64 *sptep, u64 spte) +{ +#ifdef CONFIG_X86_64 + set_64bit((unsigned long *)sptep, spte); +#else + set_64bit((unsigned long long *)sptep, spte); +#endif +} + static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, struct kmem_cache *base_cache, int min, gfp_t gfp_flags) @@ -446,7 +456,7 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); rmap_remove(vcpu, spte); kvm_arch_ops->tlb_flush(vcpu); - *spte &= ~(u64)PT_WRITABLE_MASK; + set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK); } } @@ -699,7 +709,7 @@ static void kvm_mmu_zap_page(struct kvm_vcpu *vcpu, } BUG_ON(!parent_pte); kvm_mmu_put_page(vcpu, page, parent_pte); - *parent_pte = 0; + set_shadow_pte(parent_pte, 0); } kvm_mmu_page_unlink_children(vcpu, page); if (!page->root_count) { -- cgit v1.2.3 From fd97dc516c372982f9c3637e20b131e1f55ac2f6 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 31 May 2007 18:20:14 +0300 Subject: KVM: MMU: Simpify accessed/dirty/present/nx bit handling Always set the accessed and dirty bit (since having them cleared causes a read-modify-write cycle), always set the present bit, and copy the nx bit from the guest. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index f24b540148a..b47391ffe54 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -91,11 +91,6 @@ static int dbg = 1; #define PT32_DIR_PSE36_MASK (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) -#define PT32_PTE_COPY_MASK \ - (PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK | PT_GLOBAL_MASK) - -#define PT64_PTE_COPY_MASK (PT64_NX_MASK | PT32_PTE_COPY_MASK) - #define PT_FIRST_AVAIL_BITS_SHIFT 9 #define PT64_SECOND_AVAIL_BITS_SHIFT 52 -- cgit v1.2.3 From b64b3763a5b3868e85330c891e1a30189dcde9b1 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 31 May 2007 18:24:09 +0300 Subject: KVM: MMU: Don't cache guest access bits in the shadow page table This was once used to avoid accessing the guest pte when upgrading the shadow pte from read-only to read-write. But usually we need to set the guest pte dirty or accessed bits anyway, so this wasn't really exploited. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index b47391ffe54..986d01294f3 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -97,14 +97,6 @@ static int dbg = 1; #define PT_SHADOW_PS_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) #define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) -#define PT_SHADOW_WRITABLE_SHIFT (PT_FIRST_AVAIL_BITS_SHIFT + 1) -#define PT_SHADOW_WRITABLE_MASK (1ULL << PT_SHADOW_WRITABLE_SHIFT) - -#define PT_SHADOW_USER_SHIFT (PT_SHADOW_WRITABLE_SHIFT + 1) -#define PT_SHADOW_USER_MASK (1ULL << (PT_SHADOW_USER_SHIFT)) - -#define PT_SHADOW_BITS_OFFSET (PT_SHADOW_WRITABLE_SHIFT - PT_WRITABLE_SHIFT) - #define VALID_PAGE(x) ((x) != INVALID_PAGE) #define PT64_LEVEL_BITS 9 -- cgit v1.2.3 From bd2b2baa5c5fbb08b4b0df7508ff419407f7ece6 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 31 May 2007 18:28:51 +0300 Subject: KVM: MMU: Remove unused large page marker This has not been used for some time, as the same information is available in the page header. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 986d01294f3..283df031b03 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -94,7 +94,6 @@ static int dbg = 1; #define PT_FIRST_AVAIL_BITS_SHIFT 9 #define PT64_SECOND_AVAIL_BITS_SHIFT 52 -#define PT_SHADOW_PS_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) #define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) #define VALID_PAGE(x) ((x) != INVALID_PAGE) -- cgit v1.2.3 From 17c3ba9d37dbda490792a2b52953f09d0dee30d6 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 4 Jun 2007 15:58:30 +0300 Subject: KVM: Lazy guest cr3 switching Switch guest paging context may require us to allocate memory, which might fail. Instead of wiring up error paths everywhere, make context switching lazy and actually do the switch before the next guest entry, where we can return an error if allocation fails. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 283df031b03..5915d7a1c4f 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -949,9 +949,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu) context->free = nonpaging_free; context->root_level = 0; context->shadow_root_level = PT32E_ROOT_LEVEL; - mmu_alloc_roots(vcpu); - ASSERT(VALID_PAGE(context->root_hpa)); - kvm_arch_ops->set_cr3(vcpu, context->root_hpa); + context->root_hpa = INVALID_PAGE; return 0; } @@ -965,11 +963,6 @@ static void paging_new_cr3(struct kvm_vcpu *vcpu) { pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3); mmu_free_roots(vcpu); - if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES)) - kvm_mmu_free_some_pages(vcpu); - mmu_alloc_roots(vcpu); - kvm_mmu_flush_tlb(vcpu); - kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); } static void inject_page_fault(struct kvm_vcpu *vcpu, @@ -1003,10 +996,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) context->free = paging_free; context->root_level = level; context->shadow_root_level = level; - mmu_alloc_roots(vcpu); - ASSERT(VALID_PAGE(context->root_hpa)); - kvm_arch_ops->set_cr3(vcpu, context->root_hpa | - (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK))); + context->root_hpa = INVALID_PAGE; return 0; } @@ -1025,10 +1015,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) context->free = paging_free; context->root_level = PT32_ROOT_LEVEL; context->shadow_root_level = PT32E_ROOT_LEVEL; - mmu_alloc_roots(vcpu); - ASSERT(VALID_PAGE(context->root_hpa)); - kvm_arch_ops->set_cr3(vcpu, context->root_hpa | - (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK))); + context->root_hpa = INVALID_PAGE; return 0; } @@ -1042,7 +1029,6 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu) ASSERT(vcpu); ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa)); - mmu_topup_memory_caches(vcpu); if (!is_paging(vcpu)) return nonpaging_init_context(vcpu); else if (is_long_mode(vcpu)) @@ -1063,17 +1049,32 @@ static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) } int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) +{ + destroy_kvm_mmu(vcpu); + return init_kvm_mmu(vcpu); +} + +int kvm_mmu_load(struct kvm_vcpu *vcpu) { int r; - destroy_kvm_mmu(vcpu); - r = init_kvm_mmu(vcpu); - if (r < 0) - goto out; + spin_lock(&vcpu->kvm->lock); r = mmu_topup_memory_caches(vcpu); + if (r) + goto out; + mmu_alloc_roots(vcpu); + kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); + kvm_mmu_flush_tlb(vcpu); out: + spin_unlock(&vcpu->kvm->lock); return r; } +EXPORT_SYMBOL_GPL(kvm_mmu_load); + +void kvm_mmu_unload(struct kvm_vcpu *vcpu) +{ + mmu_free_roots(vcpu); +} static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, -- cgit v1.2.3 From 7b53aa56508479507c6e5667bb252ca7c2cd19cf Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 5 Jun 2007 12:17:03 +0300 Subject: KVM: Fix vcpu freeing for guest smp A vcpu can pin up to four mmu shadow pages, which means the freeing loop will never terminate. Fix by first unpinning shadow pages on all vcpus, then freeing shadow pages. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 5915d7a1c4f..d4de988d182 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -838,11 +838,12 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) int i; struct kvm_mmu_page *page; + if (!VALID_PAGE(vcpu->mmu.root_hpa)) + return; #ifdef CONFIG_X86_64 if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) { hpa_t root = vcpu->mmu.root_hpa; - ASSERT(VALID_PAGE(root)); page = page_header(root); --page->root_count; vcpu->mmu.root_hpa = INVALID_PAGE; @@ -853,7 +854,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) hpa_t root = vcpu->mmu.pae_root[i]; if (root) { - ASSERT(VALID_PAGE(root)); root &= PT64_BASE_ADDR_MASK; page = page_header(root); --page->root_count; -- cgit v1.2.3 From d9e368d61263055eceac2966bb7ea31b89da3425 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 7 Jun 2007 19:18:30 +0300 Subject: KVM: Flush remote tlbs when reducing shadow pte permissions When a vcpu causes a shadow tlb entry to have reduced permissions, it must also clear the tlb on remote vcpus. We do that by: - setting a bit on the vcpu that requests a tlb flush before the next entry - if the vcpu is currently executing, we send an ipi to make sure it exits before we continue Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index d4de988d182..ad50cfda5ac 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -441,7 +441,7 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) BUG_ON(!(*spte & PT_WRITABLE_MASK)); rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); rmap_remove(vcpu, spte); - kvm_arch_ops->tlb_flush(vcpu); + kvm_flush_remote_tlbs(vcpu->kvm); set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK); } } @@ -656,7 +656,7 @@ static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu, rmap_remove(vcpu, &pt[i]); pt[i] = 0; } - kvm_arch_ops->tlb_flush(vcpu); + kvm_flush_remote_tlbs(vcpu->kvm); return; } @@ -669,6 +669,7 @@ static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu, ent &= PT64_BASE_ADDR_MASK; mmu_page_remove_parent_pte(vcpu, page_header(ent), &pt[i]); } + kvm_flush_remote_tlbs(vcpu->kvm); } static void kvm_mmu_put_page(struct kvm_vcpu *vcpu, @@ -1093,6 +1094,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, } } *spte = 0; + kvm_flush_remote_tlbs(vcpu->kvm); } static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, @@ -1308,7 +1310,7 @@ void kvm_mmu_zap_all(struct kvm_vcpu *vcpu) } mmu_free_memory_caches(vcpu); - kvm_arch_ops->tlb_flush(vcpu); + kvm_flush_remote_tlbs(vcpu->kvm); init_kvm_mmu(vcpu); } -- cgit v1.2.3 From 88a97f0b2fe1cd08d06390dc2669b709ea96e11a Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 20 Jun 2007 17:13:26 +0800 Subject: KVM: MMU: Fix Wrong tlb flush order Need to flush the tlb after updating a pte, not before. Signed-off-by: Shaohua Li Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index ad50cfda5ac..49ffbd3da74 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -441,8 +441,8 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) BUG_ON(!(*spte & PT_WRITABLE_MASK)); rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); rmap_remove(vcpu, spte); - kvm_flush_remote_tlbs(vcpu->kvm); set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK); + kvm_flush_remote_tlbs(vcpu->kvm); } } -- cgit v1.2.3 From e495606dd09d79f9fa496334ac3958f6ff179d82 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 28 Jun 2007 14:15:57 -0400 Subject: KVM: Clean up #includes Remove unnecessary ones, and rearange the remaining in the standard order. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 49ffbd3da74..b297a6b111a 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -16,16 +16,18 @@ * the COPYING file in the top-level directory. * */ + +#include "vmx.h" +#include "kvm.h" + #include #include -#include #include #include #include -#include -#include "vmx.h" -#include "kvm.h" +#include +#include #undef MMU_DEBUG -- cgit v1.2.3