diff options
Diffstat (limited to 'drivers/kvm')
-rw-r--r-- | drivers/kvm/kvm.h | 4 | ||||
-rw-r--r-- | drivers/kvm/kvm_main.c | 123 | ||||
-rw-r--r-- | drivers/kvm/paging_tmpl.h | 28 | ||||
-rw-r--r-- | drivers/kvm/svm.c | 15 | ||||
-rw-r--r-- | drivers/kvm/vmx.c | 43 | ||||
-rw-r--r-- | drivers/kvm/vmx.h | 1 |
6 files changed, 174 insertions, 40 deletions
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 2db1ca4c680..04574a9d443 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -304,6 +304,7 @@ struct kvm { int memory_config_version; int busy; unsigned long rmap_overflow; + struct list_head vm_list; }; struct kvm_stat { @@ -340,6 +341,7 @@ struct kvm_arch_ops { struct kvm_vcpu *(*vcpu_load)(struct kvm_vcpu *vcpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); + void (*vcpu_decache)(struct kvm_vcpu *vcpu); int (*set_guest_debug)(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg); @@ -558,7 +560,7 @@ static inline void load_gs(u16 sel) #ifndef load_ldt static inline void load_ldt(u16 sel) { - asm ("lldt %0" : : "g"(sel)); + asm ("lldt %0" : : "rm"(sel)); } #endif diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 099f0afd394..af866147ff2 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -34,6 +34,8 @@ #include <linux/highmem.h> #include <linux/file.h> #include <asm/desc.h> +#include <linux/sysdev.h> +#include <linux/cpu.h> #include "x86_emulate.h" #include "segment_descriptor.h" @@ -41,6 +43,9 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); +static DEFINE_SPINLOCK(kvm_lock); +static LIST_HEAD(vm_list); + struct kvm_arch_ops *kvm_arch_ops; struct kvm_stat kvm_stat; EXPORT_SYMBOL_GPL(kvm_stat); @@ -230,9 +235,13 @@ static int kvm_dev_open(struct inode *inode, struct file *filp) struct kvm_vcpu *vcpu = &kvm->vcpus[i]; mutex_init(&vcpu->mutex); + vcpu->cpu = -1; vcpu->kvm = kvm; vcpu->mmu.root_hpa = INVALID_PAGE; INIT_LIST_HEAD(&vcpu->free_pages); + spin_lock(&kvm_lock); + list_add(&kvm->vm_list, &vm_list); + spin_unlock(&kvm_lock); } filp->private_data = kvm; return 0; @@ -272,7 +281,9 @@ static void kvm_free_physmem(struct kvm *kvm) static void kvm_free_vcpu(struct kvm_vcpu *vcpu) { - vcpu_load(vcpu->kvm, vcpu_slot(vcpu)); + if (!vcpu_load(vcpu->kvm, vcpu_slot(vcpu))) + return; + kvm_mmu_destroy(vcpu); vcpu_put(vcpu); kvm_arch_ops->vcpu_free(vcpu); @@ -290,6 +301,9 @@ static int kvm_dev_release(struct inode *inode, struct file *filp) { struct kvm *kvm = filp->private_data; + spin_lock(&kvm_lock); + list_del(&kvm->vm_list); + spin_unlock(&kvm_lock); kvm_free_vcpus(kvm); kvm_free_physmem(kvm); kfree(kvm); @@ -544,7 +558,6 @@ static int kvm_dev_ioctl_create_vcpu(struct kvm *kvm, int n) FX_IMAGE_ALIGN); vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; - vcpu->cpu = -1; /* First load will set up TR */ r = kvm_arch_ops->vcpu_create(vcpu); if (r < 0) goto out_free_vcpus; @@ -1360,6 +1373,9 @@ static int kvm_dev_ioctl_run(struct kvm *kvm, struct kvm_run *kvm_run) if (!vcpu) return -ENOENT; + /* re-sync apic's tpr */ + vcpu->cr8 = kvm_run->cr8; + if (kvm_run->emulated) { kvm_arch_ops->skip_emulated_instruction(vcpu); kvm_run->emulated = 0; @@ -2025,6 +2041,64 @@ static struct notifier_block kvm_reboot_notifier = { .priority = 0, }; +/* + * Make sure that a cpu that is being hot-unplugged does not have any vcpus + * cached on it. + */ +static void decache_vcpus_on_cpu(int cpu) +{ + struct kvm *vm; + struct kvm_vcpu *vcpu; + int i; + + spin_lock(&kvm_lock); + list_for_each_entry(vm, &vm_list, vm_list) + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + vcpu = &vm->vcpus[i]; + /* + * If the vcpu is locked, then it is running on some + * other cpu and therefore it is not cached on the + * cpu in question. + * + * If it's not locked, check the last cpu it executed + * on. + */ + if (mutex_trylock(&vcpu->mutex)) { + if (vcpu->cpu == cpu) { + kvm_arch_ops->vcpu_decache(vcpu); + vcpu->cpu = -1; + } + mutex_unlock(&vcpu->mutex); + } + } + spin_unlock(&kvm_lock); +} + +static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, + void *v) +{ + int cpu = (long)v; + + switch (val) { + case CPU_DEAD: + case CPU_UP_CANCELED: + decache_vcpus_on_cpu(cpu); + smp_call_function_single(cpu, kvm_arch_ops->hardware_disable, + NULL, 0, 1); + break; + case CPU_UP_PREPARE: + smp_call_function_single(cpu, kvm_arch_ops->hardware_enable, + NULL, 0, 1); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block kvm_cpu_notifier = { + .notifier_call = kvm_cpu_hotplug, + .priority = 20, /* must be > scheduler priority */ +}; + static __init void kvm_init_debug(void) { struct kvm_stats_debugfs_item *p; @@ -2044,6 +2118,30 @@ static void kvm_exit_debug(void) debugfs_remove(debugfs_dir); } +static int kvm_suspend(struct sys_device *dev, pm_message_t state) +{ + decache_vcpus_on_cpu(raw_smp_processor_id()); + on_each_cpu(kvm_arch_ops->hardware_disable, 0, 0, 1); + return 0; +} + +static int kvm_resume(struct sys_device *dev) +{ + on_each_cpu(kvm_arch_ops->hardware_enable, 0, 0, 1); + return 0; +} + +static struct sysdev_class kvm_sysdev_class = { + set_kset_name("kvm"), + .suspend = kvm_suspend, + .resume = kvm_resume, +}; + +static struct sys_device kvm_sysdev = { + .id = 0, + .cls = &kvm_sysdev_class, +}; + hpa_t bad_page_address; int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) @@ -2071,8 +2169,19 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) return r; on_each_cpu(kvm_arch_ops->hardware_enable, NULL, 0, 1); + r = register_cpu_notifier(&kvm_cpu_notifier); + if (r) + goto out_free_1; register_reboot_notifier(&kvm_reboot_notifier); + r = sysdev_class_register(&kvm_sysdev_class); + if (r) + goto out_free_2; + + r = sysdev_register(&kvm_sysdev); + if (r) + goto out_free_3; + kvm_chardev_ops.owner = module; r = misc_register(&kvm_dev); @@ -2084,7 +2193,13 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) return r; out_free: + sysdev_unregister(&kvm_sysdev); +out_free_3: + sysdev_class_unregister(&kvm_sysdev_class); +out_free_2: unregister_reboot_notifier(&kvm_reboot_notifier); + unregister_cpu_notifier(&kvm_cpu_notifier); +out_free_1: on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1); kvm_arch_ops->hardware_unsetup(); return r; @@ -2093,8 +2208,10 @@ out_free: void kvm_exit_arch(void) { misc_deregister(&kvm_dev); - + sysdev_unregister(&kvm_sysdev); + sysdev_class_unregister(&kvm_sysdev_class); unregister_reboot_notifier(&kvm_reboot_notifier); + unregister_cpu_notifier(&kvm_cpu_notifier); on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1); kvm_arch_ops->hardware_unsetup(); kvm_arch_ops = NULL; diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 149fa45fd9a..b6b90e9e130 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -443,31 +443,17 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) { struct guest_walker walker; - pt_element_t guest_pte; - gpa_t gpa; - - FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0); - guest_pte = *walker.ptep; - FNAME(release_walker)(&walker); - - if (!is_present_pte(guest_pte)) - return UNMAPPED_GVA; - - if (walker.level == PT_DIRECTORY_LEVEL) { - ASSERT((guest_pte & PT_PAGE_SIZE_MASK)); - ASSERT(PTTYPE == 64 || is_pse(vcpu)); + gpa_t gpa = UNMAPPED_GVA; + int r; - gpa = (guest_pte & PT_DIR_BASE_ADDR_MASK) | (vaddr & - (PT_LEVEL_MASK(PT_PAGE_TABLE_LEVEL) | ~PAGE_MASK)); + r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0); - if (PTTYPE == 32 && is_cpuid_PSE36()) - gpa |= (guest_pte & PT32_DIR_PSE36_MASK) << - (32 - PT32_DIR_PSE36_SHIFT); - } else { - gpa = (guest_pte & PT_BASE_ADDR_MASK); - gpa |= (vaddr & ~PAGE_MASK); + if (r) { + gpa = (gpa_t)walker.gfn << PAGE_SHIFT; + gpa |= vaddr & ~PAGE_MASK; } + FNAME(release_walker)(&walker); return gpa; } diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 85f61dd1e93..83da4ea150a 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -528,7 +528,13 @@ static void init_vmcb(struct vmcb *vmcb) save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK; save->cs.limit = 0xffff; - save->cs.base = 0xffff0000; + /* + * cs.base should really be 0xffff0000, but vmx can't handle that, so + * be consistent with it. + * + * Replace when we have real mode working for vmx. + */ + save->cs.base = 0xf0000; save->gdtr.limit = 0xffff; save->idtr.limit = 0xffff; @@ -603,6 +609,10 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) put_cpu(); } +static void svm_vcpu_decache(struct kvm_vcpu *vcpu) +{ +} + static void svm_cache_regs(struct kvm_vcpu *vcpu) { vcpu->regs[VCPU_REGS_RAX] = vcpu->svm->vmcb->save.rax; @@ -723,7 +733,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } #endif vcpu->svm->cr0 = cr0; - vcpu->svm->vmcb->save.cr0 = cr0 | CR0_PG_MASK; + vcpu->svm->vmcb->save.cr0 = cr0 | CR0_PG_MASK | CR0_WP_MASK; vcpu->cr0 = cr0; } @@ -1671,6 +1681,7 @@ static struct kvm_arch_ops svm_arch_ops = { .vcpu_load = svm_vcpu_load, .vcpu_put = svm_vcpu_put, + .vcpu_decache = svm_vcpu_decache, .set_guest_debug = svm_guest_debug, .get_msr = svm_get_msr, diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 27e05a77e21..1e640b89917 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -125,6 +125,15 @@ static void __vcpu_clear(void *arg) per_cpu(current_vmcs, cpu) = NULL; } +static void vcpu_clear(struct kvm_vcpu *vcpu) +{ + if (vcpu->cpu != raw_smp_processor_id() && vcpu->cpu != -1) + smp_call_function_single(vcpu->cpu, __vcpu_clear, vcpu, 0, 1); + else + __vcpu_clear(vcpu); + vcpu->launched = 0; +} + static unsigned long vmcs_readl(unsigned long field) { unsigned long value; @@ -202,10 +211,8 @@ static struct kvm_vcpu *vmx_vcpu_load(struct kvm_vcpu *vcpu) cpu = get_cpu(); - if (vcpu->cpu != cpu) { - smp_call_function(__vcpu_clear, vcpu, 0, 1); - vcpu->launched = 0; - } + if (vcpu->cpu != cpu) + vcpu_clear(vcpu); if (per_cpu(current_vmcs, cpu) != vcpu->vmcs) { u8 error; @@ -243,6 +250,11 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu) put_cpu(); } +static void vmx_vcpu_decache(struct kvm_vcpu *vcpu) +{ + vcpu_clear(vcpu); +} + static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) { return vmcs_readl(GUEST_RFLAGS); @@ -502,7 +514,7 @@ static __init int vmx_disabled_by_bios(void) return (msr & 5) == 1; /* locked but not enabled */ } -static __init void hardware_enable(void *garbage) +static void hardware_enable(void *garbage) { int cpu = raw_smp_processor_id(); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); @@ -1375,6 +1387,11 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu, return 1; } +static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; + return 0; +} static int get_io_count(struct kvm_vcpu *vcpu, u64 *count) { @@ -1635,6 +1652,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) = { [EXIT_REASON_EXCEPTION_NMI] = handle_exception, [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, + [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, [EXIT_REASON_IO_INSTRUCTION] = handle_io, [EXIT_REASON_CR_ACCESS] = handle_cr, [EXIT_REASON_DR_ACCESS] = handle_dr, @@ -1786,10 +1804,10 @@ again: "kvm_vmx_return: " /* Save guest registers, load host registers, keep flags */ #ifdef CONFIG_X86_64 - "xchg %3, 0(%%rsp) \n\t" + "xchg %3, (%%rsp) \n\t" "mov %%rax, %c[rax](%3) \n\t" "mov %%rbx, %c[rbx](%3) \n\t" - "pushq 0(%%rsp); popq %c[rcx](%3) \n\t" + "pushq (%%rsp); popq %c[rcx](%3) \n\t" "mov %%rdx, %c[rdx](%3) \n\t" "mov %%rsi, %c[rsi](%3) \n\t" "mov %%rdi, %c[rdi](%3) \n\t" @@ -1804,24 +1822,24 @@ again: "mov %%r15, %c[r15](%3) \n\t" "mov %%cr2, %%rax \n\t" "mov %%rax, %c[cr2](%3) \n\t" - "mov 0(%%rsp), %3 \n\t" + "mov (%%rsp), %3 \n\t" "pop %%rcx; pop %%r15; pop %%r14; pop %%r13; pop %%r12;" "pop %%r11; pop %%r10; pop %%r9; pop %%r8;" "pop %%rbp; pop %%rdi; pop %%rsi;" "pop %%rdx; pop %%rbx; pop %%rax \n\t" #else - "xchg %3, 0(%%esp) \n\t" + "xchg %3, (%%esp) \n\t" "mov %%eax, %c[rax](%3) \n\t" "mov %%ebx, %c[rbx](%3) \n\t" - "pushl 0(%%esp); popl %c[rcx](%3) \n\t" + "pushl (%%esp); popl %c[rcx](%3) \n\t" "mov %%edx, %c[rdx](%3) \n\t" "mov %%esi, %c[rsi](%3) \n\t" "mov %%edi, %c[rdi](%3) \n\t" "mov %%ebp, %c[rbp](%3) \n\t" "mov %%cr2, %%eax \n\t" "mov %%eax, %c[cr2](%3) \n\t" - "mov 0(%%esp), %3 \n\t" + "mov (%%esp), %3 \n\t" "pop %%ecx; popa \n\t" #endif @@ -1859,9 +1877,7 @@ again: fx_restore(vcpu->host_fx_image); vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; -#ifndef CONFIG_X86_64 asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); -#endif /* * Profile KVM exit RIPs: @@ -2012,6 +2028,7 @@ static struct kvm_arch_ops vmx_arch_ops = { .vcpu_load = vmx_vcpu_load, .vcpu_put = vmx_vcpu_put, + .vcpu_decache = vmx_vcpu_decache, .set_guest_debug = set_guest_debug, .get_msr = vmx_get_msr, diff --git a/drivers/kvm/vmx.h b/drivers/kvm/vmx.h index 4c0ab151836..d0dc93df411 100644 --- a/drivers/kvm/vmx.h +++ b/drivers/kvm/vmx.h @@ -180,6 +180,7 @@ enum vmcs_field { #define EXIT_REASON_EXCEPTION_NMI 0 #define EXIT_REASON_EXTERNAL_INTERRUPT 1 +#define EXIT_REASON_TRIPLE_FAULT 2 #define EXIT_REASON_PENDING_INTERRUPT 7 |