From 85db06e514422ae429b5f85742d8111b70bd56f3 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 10 Dec 2008 21:23:26 +0100 Subject: KVM: mmu_notifiers release method The destructor for huge pages uses the backing inode for adjusting hugetlbfs accounting. Hugepage mappings are destroyed by exit_mmap, after mmu_notifier_release, so there are no notifications through unmap_hugepage_range at this point. The hugetlbfs inode can be freed with pages backed by it referenced by the shadow. When the shadow releases its reference, the huge page destructor will access a now freed inode. Implement the release operation for kvm mmu notifiers to release page refs before the hugetlbfs inode is gone. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3a5a08298aa..0b6f2f71271 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -789,11 +789,19 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, return young; } +static void kvm_mmu_notifier_release(struct mmu_notifier *mn, + struct mm_struct *mm) +{ + struct kvm *kvm = mmu_notifier_to_kvm(mn); + kvm_arch_flush_shadow(kvm); +} + static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { .invalidate_page = kvm_mmu_notifier_invalidate_page, .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, .clear_flush_young = kvm_mmu_notifier_clear_flush_young, + .release = kvm_mmu_notifier_release, }; #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ -- cgit v1.2.3 From ad8ba2cd44d4d39fb3fe55d5dcc565b19fc3a7fb Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 6 Jan 2009 10:03:02 +0800 Subject: KVM: Add kvm_arch_sync_events to sync with asynchronize events kvm_arch_sync_events is introduced to quiet down all other events may happen contemporary with VM destroy process, like IRQ handler and work struct for assigned device. For kvm_arch_sync_events is called at the very beginning of kvm_destroy_vm(), so the state of KVM here is legal and can provide a environment to quiet down other events. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0b6f2f71271..68e3f1ec167 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -891,6 +891,7 @@ static void kvm_destroy_vm(struct kvm *kvm) { struct mm_struct *mm = kvm->mm; + kvm_arch_sync_events(kvm); spin_lock(&kvm_lock); list_del(&kvm->vm_list); spin_unlock(&kvm_lock); -- cgit v1.2.3 From ba4cef31d5a397b64ba6d3ff713ce06c62f0c597 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 6 Jan 2009 10:03:03 +0800 Subject: KVM: Fix racy in kvm_free_assigned_irq In the past, kvm_get_kvm() and kvm_put_kvm() was called in assigned device irq handler and interrupt_work, in order to prevent cancel_work_sync() in kvm_free_assigned_irq got a illegal state when waiting for interrupt_work done. But it's tricky and still got two problems: 1. A bug ignored two conditions that cancel_work_sync() would return true result in a additional kvm_put_kvm(). 2. If interrupt type is MSI, we would got a window between cancel_work_sync() and free_irq(), which interrupt would be injected again... This patch discard the reference count used for irq handler and interrupt_work, and ensure the legal state by moving the free function at the very beginning of kvm_destroy_vm(). And the patch fix the second bug by disable irq before cancel_work_sync(), which may result in nested disable of irq but OK for we are going to free it. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 68e3f1ec167..277ea7f39fc 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -173,7 +173,6 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) assigned_dev->host_irq_disabled = false; } mutex_unlock(&assigned_dev->kvm->lock); - kvm_put_kvm(assigned_dev->kvm); } static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) @@ -181,8 +180,6 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) struct kvm_assigned_dev_kernel *assigned_dev = (struct kvm_assigned_dev_kernel *) dev_id; - kvm_get_kvm(assigned_dev->kvm); - schedule_work(&assigned_dev->interrupt_work); disable_irq_nosync(irq); @@ -213,6 +210,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) } } +/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ static void kvm_free_assigned_irq(struct kvm *kvm, struct kvm_assigned_dev_kernel *assigned_dev) { @@ -228,11 +226,24 @@ static void kvm_free_assigned_irq(struct kvm *kvm, if (!assigned_dev->irq_requested_type) return; - if (cancel_work_sync(&assigned_dev->interrupt_work)) - /* We had pending work. That means we will have to take - * care of kvm_put_kvm. - */ - kvm_put_kvm(kvm); + /* + * In kvm_free_device_irq, cancel_work_sync return true if: + * 1. work is scheduled, and then cancelled. + * 2. work callback is executed. + * + * The first one ensured that the irq is disabled and no more events + * would happen. But for the second one, the irq may be enabled (e.g. + * for MSI). So we disable irq here to prevent further events. + * + * Notice this maybe result in nested disable if the interrupt type is + * INTx, but it's OK for we are going to free it. + * + * If this function is a part of VM destroy, please ensure that till + * now, the kvm state is still legal for probably we also have to wait + * interrupt_work done. + */ + disable_irq_nosync(assigned_dev->host_irq); + cancel_work_sync(&assigned_dev->interrupt_work); free_irq(assigned_dev->host_irq, (void *)assigned_dev); -- cgit v1.2.3 From d7cff1c37664971aae32bb0de82231ed34933d8e Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 6 Jan 2009 16:25:10 +0800 Subject: KVM: Fix INTx for device assignment Missing buckets and wrong parameter for free_irq() Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 277ea7f39fc..d9bbb20f230 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -296,8 +296,8 @@ static int assigned_device_update_intx(struct kvm *kvm, if (irqchip_in_kernel(kvm)) { if (!msi2intx && - adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) { - free_irq(adev->host_irq, (void *)kvm); + (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)) { + free_irq(adev->host_irq, (void *)adev); pci_disable_msi(adev->dev); } -- cgit v1.2.3 From 682edb4c01e690c7c7cd772dbd6f4e0fd74dc572 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Thu, 5 Feb 2009 18:23:46 +0000 Subject: KVM: Fix assigned devices circular locking dependency kvm->slots_lock is outer to kvm->lock, so take slots_lock in kvm_vm_ioctl_assign_device() before taking kvm->lock, rather than taking it in kvm_iommu_map_memslots(). Cc: stable@kernel.org Signed-off-by: Mark McLoughlin Acked-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- virt/kvm/iommu.c | 6 ++---- virt/kvm/kvm_main.c | 3 +++ 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index e9693a29d00..4c403750360 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -73,14 +73,13 @@ static int kvm_iommu_map_memslots(struct kvm *kvm) { int i, r = 0; - down_read(&kvm->slots_lock); for (i = 0; i < kvm->nmemslots; i++) { r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn, kvm->memslots[i].npages); if (r) break; } - up_read(&kvm->slots_lock); + return r; } @@ -190,12 +189,11 @@ static void kvm_iommu_put_pages(struct kvm *kvm, static int kvm_iommu_unmap_memslots(struct kvm *kvm) { int i; - down_read(&kvm->slots_lock); + for (i = 0; i < kvm->nmemslots; i++) { kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn, kvm->memslots[i].npages); } - up_read(&kvm->slots_lock); return 0; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d9bbb20f230..29a667ce35b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -466,6 +466,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, struct kvm_assigned_dev_kernel *match; struct pci_dev *dev; + down_read(&kvm->slots_lock); mutex_lock(&kvm->lock); match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, @@ -527,6 +528,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, out: mutex_unlock(&kvm->lock); + up_read(&kvm->slots_lock); return r; out_list_del: list_del(&match->list); @@ -538,6 +540,7 @@ out_put: out_free: kfree(match); mutex_unlock(&kvm->lock); + up_read(&kvm->slots_lock); return r; } #endif -- cgit v1.2.3