aboutsummaryrefslogtreecommitdiff
path: root/drivers/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/kvm')
-rw-r--r--drivers/kvm/kvm.h4
-rw-r--r--drivers/kvm/kvm_main.c123
-rw-r--r--drivers/kvm/paging_tmpl.h28
-rw-r--r--drivers/kvm/svm.c15
-rw-r--r--drivers/kvm/vmx.c43
-rw-r--r--drivers/kvm/vmx.h1
6 files changed, 174 insertions, 40 deletions
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 2db1ca4c680..04574a9d443 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -304,6 +304,7 @@ struct kvm {
int memory_config_version;
int busy;
unsigned long rmap_overflow;
+ struct list_head vm_list;
};
struct kvm_stat {
@@ -340,6 +341,7 @@ struct kvm_arch_ops {
struct kvm_vcpu *(*vcpu_load)(struct kvm_vcpu *vcpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
+ void (*vcpu_decache)(struct kvm_vcpu *vcpu);
int (*set_guest_debug)(struct kvm_vcpu *vcpu,
struct kvm_debug_guest *dbg);
@@ -558,7 +560,7 @@ static inline void load_gs(u16 sel)
#ifndef load_ldt
static inline void load_ldt(u16 sel)
{
- asm ("lldt %0" : : "g"(sel));
+ asm ("lldt %0" : : "rm"(sel));
}
#endif
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 099f0afd394..af866147ff2 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -34,6 +34,8 @@
#include <linux/highmem.h>
#include <linux/file.h>
#include <asm/desc.h>
+#include <linux/sysdev.h>
+#include <linux/cpu.h>
#include "x86_emulate.h"
#include "segment_descriptor.h"
@@ -41,6 +43,9 @@
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
+static DEFINE_SPINLOCK(kvm_lock);
+static LIST_HEAD(vm_list);
+
struct kvm_arch_ops *kvm_arch_ops;
struct kvm_stat kvm_stat;
EXPORT_SYMBOL_GPL(kvm_stat);
@@ -230,9 +235,13 @@ static int kvm_dev_open(struct inode *inode, struct file *filp)
struct kvm_vcpu *vcpu = &kvm->vcpus[i];
mutex_init(&vcpu->mutex);
+ vcpu->cpu = -1;
vcpu->kvm = kvm;
vcpu->mmu.root_hpa = INVALID_PAGE;
INIT_LIST_HEAD(&vcpu->free_pages);
+ spin_lock(&kvm_lock);
+ list_add(&kvm->vm_list, &vm_list);
+ spin_unlock(&kvm_lock);
}
filp->private_data = kvm;
return 0;
@@ -272,7 +281,9 @@ static void kvm_free_physmem(struct kvm *kvm)
static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
{
- vcpu_load(vcpu->kvm, vcpu_slot(vcpu));
+ if (!vcpu_load(vcpu->kvm, vcpu_slot(vcpu)))
+ return;
+
kvm_mmu_destroy(vcpu);
vcpu_put(vcpu);
kvm_arch_ops->vcpu_free(vcpu);
@@ -290,6 +301,9 @@ static int kvm_dev_release(struct inode *inode, struct file *filp)
{
struct kvm *kvm = filp->private_data;
+ spin_lock(&kvm_lock);
+ list_del(&kvm->vm_list);
+ spin_unlock(&kvm_lock);
kvm_free_vcpus(kvm);
kvm_free_physmem(kvm);
kfree(kvm);
@@ -544,7 +558,6 @@ static int kvm_dev_ioctl_create_vcpu(struct kvm *kvm, int n)
FX_IMAGE_ALIGN);
vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE;
- vcpu->cpu = -1; /* First load will set up TR */
r = kvm_arch_ops->vcpu_create(vcpu);
if (r < 0)
goto out_free_vcpus;
@@ -1360,6 +1373,9 @@ static int kvm_dev_ioctl_run(struct kvm *kvm, struct kvm_run *kvm_run)
if (!vcpu)
return -ENOENT;
+ /* re-sync apic's tpr */
+ vcpu->cr8 = kvm_run->cr8;
+
if (kvm_run->emulated) {
kvm_arch_ops->skip_emulated_instruction(vcpu);
kvm_run->emulated = 0;
@@ -2025,6 +2041,64 @@ static struct notifier_block kvm_reboot_notifier = {
.priority = 0,
};
+/*
+ * Make sure that a cpu that is being hot-unplugged does not have any vcpus
+ * cached on it.
+ */
+static void decache_vcpus_on_cpu(int cpu)
+{
+ struct kvm *vm;
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ spin_lock(&kvm_lock);
+ list_for_each_entry(vm, &vm_list, vm_list)
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ vcpu = &vm->vcpus[i];
+ /*
+ * If the vcpu is locked, then it is running on some
+ * other cpu and therefore it is not cached on the
+ * cpu in question.
+ *
+ * If it's not locked, check the last cpu it executed
+ * on.
+ */
+ if (mutex_trylock(&vcpu->mutex)) {
+ if (vcpu->cpu == cpu) {
+ kvm_arch_ops->vcpu_decache(vcpu);
+ vcpu->cpu = -1;
+ }
+ mutex_unlock(&vcpu->mutex);
+ }
+ }
+ spin_unlock(&kvm_lock);
+}
+
+static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
+ void *v)
+{
+ int cpu = (long)v;
+
+ switch (val) {
+ case CPU_DEAD:
+ case CPU_UP_CANCELED:
+ decache_vcpus_on_cpu(cpu);
+ smp_call_function_single(cpu, kvm_arch_ops->hardware_disable,
+ NULL, 0, 1);
+ break;
+ case CPU_UP_PREPARE:
+ smp_call_function_single(cpu, kvm_arch_ops->hardware_enable,
+ NULL, 0, 1);
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block kvm_cpu_notifier = {
+ .notifier_call = kvm_cpu_hotplug,
+ .priority = 20, /* must be > scheduler priority */
+};
+
static __init void kvm_init_debug(void)
{
struct kvm_stats_debugfs_item *p;
@@ -2044,6 +2118,30 @@ static void kvm_exit_debug(void)
debugfs_remove(debugfs_dir);
}
+static int kvm_suspend(struct sys_device *dev, pm_message_t state)
+{
+ decache_vcpus_on_cpu(raw_smp_processor_id());
+ on_each_cpu(kvm_arch_ops->hardware_disable, 0, 0, 1);
+ return 0;
+}
+
+static int kvm_resume(struct sys_device *dev)
+{
+ on_each_cpu(kvm_arch_ops->hardware_enable, 0, 0, 1);
+ return 0;
+}
+
+static struct sysdev_class kvm_sysdev_class = {
+ set_kset_name("kvm"),
+ .suspend = kvm_suspend,
+ .resume = kvm_resume,
+};
+
+static struct sys_device kvm_sysdev = {
+ .id = 0,
+ .cls = &kvm_sysdev_class,
+};
+
hpa_t bad_page_address;
int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
@@ -2071,8 +2169,19 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
return r;
on_each_cpu(kvm_arch_ops->hardware_enable, NULL, 0, 1);
+ r = register_cpu_notifier(&kvm_cpu_notifier);
+ if (r)
+ goto out_free_1;
register_reboot_notifier(&kvm_reboot_notifier);
+ r = sysdev_class_register(&kvm_sysdev_class);
+ if (r)
+ goto out_free_2;
+
+ r = sysdev_register(&kvm_sysdev);
+ if (r)
+ goto out_free_3;
+
kvm_chardev_ops.owner = module;
r = misc_register(&kvm_dev);
@@ -2084,7 +2193,13 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
return r;
out_free:
+ sysdev_unregister(&kvm_sysdev);
+out_free_3:
+ sysdev_class_unregister(&kvm_sysdev_class);
+out_free_2:
unregister_reboot_notifier(&kvm_reboot_notifier);
+ unregister_cpu_notifier(&kvm_cpu_notifier);
+out_free_1:
on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1);
kvm_arch_ops->hardware_unsetup();
return r;
@@ -2093,8 +2208,10 @@ out_free:
void kvm_exit_arch(void)
{
misc_deregister(&kvm_dev);
-
+ sysdev_unregister(&kvm_sysdev);
+ sysdev_class_unregister(&kvm_sysdev_class);
unregister_reboot_notifier(&kvm_reboot_notifier);
+ unregister_cpu_notifier(&kvm_cpu_notifier);
on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1);
kvm_arch_ops->hardware_unsetup();
kvm_arch_ops = NULL;
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index 149fa45fd9a..b6b90e9e130 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -443,31 +443,17 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
{
struct guest_walker walker;
- pt_element_t guest_pte;
- gpa_t gpa;
-
- FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0);
- guest_pte = *walker.ptep;
- FNAME(release_walker)(&walker);
-
- if (!is_present_pte(guest_pte))
- return UNMAPPED_GVA;
-
- if (walker.level == PT_DIRECTORY_LEVEL) {
- ASSERT((guest_pte & PT_PAGE_SIZE_MASK));
- ASSERT(PTTYPE == 64 || is_pse(vcpu));
+ gpa_t gpa = UNMAPPED_GVA;
+ int r;
- gpa = (guest_pte & PT_DIR_BASE_ADDR_MASK) | (vaddr &
- (PT_LEVEL_MASK(PT_PAGE_TABLE_LEVEL) | ~PAGE_MASK));
+ r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0);
- if (PTTYPE == 32 && is_cpuid_PSE36())
- gpa |= (guest_pte & PT32_DIR_PSE36_MASK) <<
- (32 - PT32_DIR_PSE36_SHIFT);
- } else {
- gpa = (guest_pte & PT_BASE_ADDR_MASK);
- gpa |= (vaddr & ~PAGE_MASK);
+ if (r) {
+ gpa = (gpa_t)walker.gfn << PAGE_SHIFT;
+ gpa |= vaddr & ~PAGE_MASK;
}
+ FNAME(release_walker)(&walker);
return gpa;
}
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 85f61dd1e93..83da4ea150a 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -528,7 +528,13 @@ static void init_vmcb(struct vmcb *vmcb)
save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
save->cs.limit = 0xffff;
- save->cs.base = 0xffff0000;
+ /*
+ * cs.base should really be 0xffff0000, but vmx can't handle that, so
+ * be consistent with it.
+ *
+ * Replace when we have real mode working for vmx.
+ */
+ save->cs.base = 0xf0000;
save->gdtr.limit = 0xffff;
save->idtr.limit = 0xffff;
@@ -603,6 +609,10 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
put_cpu();
}
+static void svm_vcpu_decache(struct kvm_vcpu *vcpu)
+{
+}
+
static void svm_cache_regs(struct kvm_vcpu *vcpu)
{
vcpu->regs[VCPU_REGS_RAX] = vcpu->svm->vmcb->save.rax;
@@ -723,7 +733,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
}
#endif
vcpu->svm->cr0 = cr0;
- vcpu->svm->vmcb->save.cr0 = cr0 | CR0_PG_MASK;
+ vcpu->svm->vmcb->save.cr0 = cr0 | CR0_PG_MASK | CR0_WP_MASK;
vcpu->cr0 = cr0;
}
@@ -1671,6 +1681,7 @@ static struct kvm_arch_ops svm_arch_ops = {
.vcpu_load = svm_vcpu_load,
.vcpu_put = svm_vcpu_put,
+ .vcpu_decache = svm_vcpu_decache,
.set_guest_debug = svm_guest_debug,
.get_msr = svm_get_msr,
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 27e05a77e21..1e640b89917 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -125,6 +125,15 @@ static void __vcpu_clear(void *arg)
per_cpu(current_vmcs, cpu) = NULL;
}
+static void vcpu_clear(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->cpu != raw_smp_processor_id() && vcpu->cpu != -1)
+ smp_call_function_single(vcpu->cpu, __vcpu_clear, vcpu, 0, 1);
+ else
+ __vcpu_clear(vcpu);
+ vcpu->launched = 0;
+}
+
static unsigned long vmcs_readl(unsigned long field)
{
unsigned long value;
@@ -202,10 +211,8 @@ static struct kvm_vcpu *vmx_vcpu_load(struct kvm_vcpu *vcpu)
cpu = get_cpu();
- if (vcpu->cpu != cpu) {
- smp_call_function(__vcpu_clear, vcpu, 0, 1);
- vcpu->launched = 0;
- }
+ if (vcpu->cpu != cpu)
+ vcpu_clear(vcpu);
if (per_cpu(current_vmcs, cpu) != vcpu->vmcs) {
u8 error;
@@ -243,6 +250,11 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
put_cpu();
}
+static void vmx_vcpu_decache(struct kvm_vcpu *vcpu)
+{
+ vcpu_clear(vcpu);
+}
+
static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
{
return vmcs_readl(GUEST_RFLAGS);
@@ -502,7 +514,7 @@ static __init int vmx_disabled_by_bios(void)
return (msr & 5) == 1; /* locked but not enabled */
}
-static __init void hardware_enable(void *garbage)
+static void hardware_enable(void *garbage)
{
int cpu = raw_smp_processor_id();
u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
@@ -1375,6 +1387,11 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu,
return 1;
}
+static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
+ return 0;
+}
static int get_io_count(struct kvm_vcpu *vcpu, u64 *count)
{
@@ -1635,6 +1652,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run) = {
[EXIT_REASON_EXCEPTION_NMI] = handle_exception,
[EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
+ [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
[EXIT_REASON_IO_INSTRUCTION] = handle_io,
[EXIT_REASON_CR_ACCESS] = handle_cr,
[EXIT_REASON_DR_ACCESS] = handle_dr,
@@ -1786,10 +1804,10 @@ again:
"kvm_vmx_return: "
/* Save guest registers, load host registers, keep flags */
#ifdef CONFIG_X86_64
- "xchg %3, 0(%%rsp) \n\t"
+ "xchg %3, (%%rsp) \n\t"
"mov %%rax, %c[rax](%3) \n\t"
"mov %%rbx, %c[rbx](%3) \n\t"
- "pushq 0(%%rsp); popq %c[rcx](%3) \n\t"
+ "pushq (%%rsp); popq %c[rcx](%3) \n\t"
"mov %%rdx, %c[rdx](%3) \n\t"
"mov %%rsi, %c[rsi](%3) \n\t"
"mov %%rdi, %c[rdi](%3) \n\t"
@@ -1804,24 +1822,24 @@ again:
"mov %%r15, %c[r15](%3) \n\t"
"mov %%cr2, %%rax \n\t"
"mov %%rax, %c[cr2](%3) \n\t"
- "mov 0(%%rsp), %3 \n\t"
+ "mov (%%rsp), %3 \n\t"
"pop %%rcx; pop %%r15; pop %%r14; pop %%r13; pop %%r12;"
"pop %%r11; pop %%r10; pop %%r9; pop %%r8;"
"pop %%rbp; pop %%rdi; pop %%rsi;"
"pop %%rdx; pop %%rbx; pop %%rax \n\t"
#else
- "xchg %3, 0(%%esp) \n\t"
+ "xchg %3, (%%esp) \n\t"
"mov %%eax, %c[rax](%3) \n\t"
"mov %%ebx, %c[rbx](%3) \n\t"
- "pushl 0(%%esp); popl %c[rcx](%3) \n\t"
+ "pushl (%%esp); popl %c[rcx](%3) \n\t"
"mov %%edx, %c[rdx](%3) \n\t"
"mov %%esi, %c[rsi](%3) \n\t"
"mov %%edi, %c[rdi](%3) \n\t"
"mov %%ebp, %c[rbp](%3) \n\t"
"mov %%cr2, %%eax \n\t"
"mov %%eax, %c[cr2](%3) \n\t"
- "mov 0(%%esp), %3 \n\t"
+ "mov (%%esp), %3 \n\t"
"pop %%ecx; popa \n\t"
#endif
@@ -1859,9 +1877,7 @@ again:
fx_restore(vcpu->host_fx_image);
vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
-#ifndef CONFIG_X86_64
asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
-#endif
/*
* Profile KVM exit RIPs:
@@ -2012,6 +2028,7 @@ static struct kvm_arch_ops vmx_arch_ops = {
.vcpu_load = vmx_vcpu_load,
.vcpu_put = vmx_vcpu_put,
+ .vcpu_decache = vmx_vcpu_decache,
.set_guest_debug = set_guest_debug,
.get_msr = vmx_get_msr,
diff --git a/drivers/kvm/vmx.h b/drivers/kvm/vmx.h
index 4c0ab151836..d0dc93df411 100644
--- a/drivers/kvm/vmx.h
+++ b/drivers/kvm/vmx.h
@@ -180,6 +180,7 @@ enum vmcs_field {
#define EXIT_REASON_EXCEPTION_NMI 0
#define EXIT_REASON_EXTERNAL_INTERRUPT 1
+#define EXIT_REASON_TRIPLE_FAULT 2
#define EXIT_REASON_PENDING_INTERRUPT 7