From bc6678a33d9b952981a8e44a4f876c3ad64ca4d8 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 23 Dec 2009 14:35:21 -0200 Subject: KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU update Use two steps for memslot deletion: mark the slot invalid (which stops instantiation of new shadow pages for that slot, but allows destruction), then instantiate the new empty slot. Also simplifies kvm_handle_hva locking. Signed-off-by: Marcelo Tosatti --- virt/kvm/assigned-dev.c | 8 +-- virt/kvm/iommu.c | 4 +- virt/kvm/kvm_main.c | 141 +++++++++++++++++++++++++++++++++++------------- 3 files changed, 111 insertions(+), 42 deletions(-) (limited to 'virt') diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index f73de631e3e..f51e684dd23 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -504,12 +504,12 @@ out: static int kvm_vm_ioctl_assign_device(struct kvm *kvm, struct kvm_assigned_pci_dev *assigned_dev) { - int r = 0; + int r = 0, idx; struct kvm_assigned_dev_kernel *match; struct pci_dev *dev; mutex_lock(&kvm->lock); - down_read(&kvm->slots_lock); + idx = srcu_read_lock(&kvm->srcu); match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, assigned_dev->assigned_dev_id); @@ -573,7 +573,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, } out: - up_read(&kvm->slots_lock); + srcu_read_unlock(&kvm->srcu, idx); mutex_unlock(&kvm->lock); return r; out_list_del: @@ -585,7 +585,7 @@ out_put: pci_dev_put(dev); out_free: kfree(match); - up_read(&kvm->slots_lock); + srcu_read_unlock(&kvm->srcu, idx); mutex_unlock(&kvm->lock); return r; } diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index cf567d8033d..65a51432c8e 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -78,7 +78,7 @@ static int kvm_iommu_map_memslots(struct kvm *kvm) int i, r = 0; struct kvm_memslots *slots; - slots = kvm->memslots; + slots = rcu_dereference(kvm->memslots); for (i = 0; i < slots->nmemslots; i++) { r = kvm_iommu_map_pages(kvm, &slots->memslots[i]); @@ -214,7 +214,7 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm) int i; struct kvm_memslots *slots; - slots = kvm->memslots; + slots = rcu_dereference(kvm->memslots); for (i = 0; i < slots->nmemslots; i++) { kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 87d296d8b27..2bb24a814fd 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -213,7 +214,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, unsigned long address) { struct kvm *kvm = mmu_notifier_to_kvm(mn); - int need_tlb_flush; + int need_tlb_flush, idx; /* * When ->invalidate_page runs, the linux pte has been zapped @@ -233,10 +234,12 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, * pte after kvm_unmap_hva returned, without noticing the page * is going to be freed. */ + idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); kvm->mmu_notifier_seq++; need_tlb_flush = kvm_unmap_hva(kvm, address); spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); /* we've to flush the tlb before the pages can be freed */ if (need_tlb_flush) @@ -250,11 +253,14 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, pte_t pte) { struct kvm *kvm = mmu_notifier_to_kvm(mn); + int idx; + idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); kvm->mmu_notifier_seq++; kvm_set_spte_hva(kvm, address, pte); spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); } static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, @@ -263,8 +269,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, unsigned long end) { struct kvm *kvm = mmu_notifier_to_kvm(mn); - int need_tlb_flush = 0; + int need_tlb_flush = 0, idx; + idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); /* * The count increase must become visible at unlock time as no @@ -275,6 +282,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, for (; start < end; start += PAGE_SIZE) need_tlb_flush |= kvm_unmap_hva(kvm, start); spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); /* we've to flush the tlb before the pages can be freed */ if (need_tlb_flush) @@ -312,11 +320,13 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, unsigned long address) { struct kvm *kvm = mmu_notifier_to_kvm(mn); - int young; + int young, idx; + idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); young = kvm_age_hva(kvm, address); spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); if (young) kvm_flush_remote_tlbs(kvm); @@ -379,11 +389,15 @@ static struct kvm *kvm_create_vm(void) kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); if (!kvm->memslots) goto out_err; + if (init_srcu_struct(&kvm->srcu)) + goto out_err; #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET page = alloc_page(GFP_KERNEL | __GFP_ZERO); - if (!page) + if (!page) { + cleanup_srcu_struct(&kvm->srcu); goto out_err; + } kvm->coalesced_mmio_ring = (struct kvm_coalesced_mmio_ring *)page_address(page); @@ -391,6 +405,7 @@ static struct kvm *kvm_create_vm(void) r = kvm_init_mmu_notifier(kvm); if (r) { + cleanup_srcu_struct(&kvm->srcu); #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET put_page(page); #endif @@ -480,6 +495,7 @@ static void kvm_destroy_vm(struct kvm *kvm) #else kvm_arch_flush_shadow(kvm); #endif + cleanup_srcu_struct(&kvm->srcu); kvm_arch_destroy_vm(kvm); hardware_disable_all(); mmdrop(mm); @@ -521,12 +537,13 @@ int __kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, int user_alloc) { - int r; + int r, flush_shadow = 0; gfn_t base_gfn; unsigned long npages; unsigned long i; struct kvm_memory_slot *memslot; struct kvm_memory_slot old, new; + struct kvm_memslots *slots, *old_memslots; r = -EINVAL; /* General sanity checks */ @@ -588,15 +605,7 @@ int __kvm_set_memory_region(struct kvm *kvm, memset(new.rmap, 0, npages * sizeof(*new.rmap)); new.user_alloc = user_alloc; - /* - * hva_to_rmmap() serialzies with the mmu_lock and to be - * safe it has to ignore memslots with !user_alloc && - * !userspace_addr. - */ - if (user_alloc) - new.userspace_addr = mem->userspace_addr; - else - new.userspace_addr = 0; + new.userspace_addr = mem->userspace_addr; } if (!npages) goto skip_lpage; @@ -651,8 +660,9 @@ skip_lpage: if (!new.dirty_bitmap) goto out_free; memset(new.dirty_bitmap, 0, dirty_bytes); + /* destroy any largepage mappings for dirty tracking */ if (old.npages) - kvm_arch_flush_shadow(kvm); + flush_shadow = 1; } #else /* not defined CONFIG_S390 */ new.user_alloc = user_alloc; @@ -660,34 +670,72 @@ skip_lpage: new.userspace_addr = mem->userspace_addr; #endif /* not defined CONFIG_S390 */ - if (!npages) + if (!npages) { + r = -ENOMEM; + slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); + if (!slots) + goto out_free; + memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); + if (mem->slot >= slots->nmemslots) + slots->nmemslots = mem->slot + 1; + slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID; + + old_memslots = kvm->memslots; + rcu_assign_pointer(kvm->memslots, slots); + synchronize_srcu_expedited(&kvm->srcu); + /* From this point no new shadow pages pointing to a deleted + * memslot will be created. + * + * validation of sp->gfn happens in: + * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) + * - kvm_is_visible_gfn (mmu_check_roots) + */ kvm_arch_flush_shadow(kvm); + kfree(old_memslots); + } r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc); if (r) goto out_free; - spin_lock(&kvm->mmu_lock); - if (mem->slot >= kvm->memslots->nmemslots) - kvm->memslots->nmemslots = mem->slot + 1; +#ifdef CONFIG_DMAR + /* map the pages in iommu page table */ + if (npages) { + r = kvm_iommu_map_pages(kvm, &new); + if (r) + goto out_free; + } +#endif - *memslot = new; - spin_unlock(&kvm->mmu_lock); + r = -ENOMEM; + slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); + if (!slots) + goto out_free; + memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); + if (mem->slot >= slots->nmemslots) + slots->nmemslots = mem->slot + 1; + + /* actual memory is freed via old in kvm_free_physmem_slot below */ + if (!npages) { + new.rmap = NULL; + new.dirty_bitmap = NULL; + for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) + new.lpage_info[i] = NULL; + } + + slots->memslots[mem->slot] = new; + old_memslots = kvm->memslots; + rcu_assign_pointer(kvm->memslots, slots); + synchronize_srcu_expedited(&kvm->srcu); kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); - kvm_free_physmem_slot(&old, npages ? &new : NULL); - /* Slot deletion case: we have to update the current slot */ - spin_lock(&kvm->mmu_lock); - if (!npages) - *memslot = old; - spin_unlock(&kvm->mmu_lock); -#ifdef CONFIG_DMAR - /* map the pages in iommu page table */ - r = kvm_iommu_map_pages(kvm, memslot); - if (r) - goto out; -#endif + kvm_free_physmem_slot(&old, &new); + kfree(old_memslots); + + if (flush_shadow) + kvm_arch_flush_shadow(kvm); + return 0; out_free: @@ -787,7 +835,7 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva); struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) { int i; - struct kvm_memslots *slots = kvm->memslots; + struct kvm_memslots *slots = rcu_dereference(kvm->memslots); for (i = 0; i < slots->nmemslots; ++i) { struct kvm_memory_slot *memslot = &slots->memslots[i]; @@ -809,12 +857,15 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) { int i; - struct kvm_memslots *slots = kvm->memslots; + struct kvm_memslots *slots = rcu_dereference(kvm->memslots); gfn = unalias_gfn(kvm, gfn); for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { struct kvm_memory_slot *memslot = &slots->memslots[i]; + if (memslot->flags & KVM_MEMSLOT_INVALID) + continue; + if (gfn >= memslot->base_gfn && gfn < memslot->base_gfn + memslot->npages) return 1; @@ -823,13 +874,31 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); +int memslot_id(struct kvm *kvm, gfn_t gfn) +{ + int i; + struct kvm_memslots *slots = rcu_dereference(kvm->memslots); + struct kvm_memory_slot *memslot = NULL; + + gfn = unalias_gfn(kvm, gfn); + for (i = 0; i < slots->nmemslots; ++i) { + memslot = &slots->memslots[i]; + + if (gfn >= memslot->base_gfn + && gfn < memslot->base_gfn + memslot->npages) + break; + } + + return memslot - slots->memslots; +} + unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) { struct kvm_memory_slot *slot; gfn = unalias_gfn(kvm, gfn); slot = gfn_to_memslot_unaliased(kvm, gfn); - if (!slot) + if (!slot || slot->flags & KVM_MEMSLOT_INVALID) return bad_hva(); return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); } -- cgit v1.2.3