From b286d5d8b0836e76832dafcc5a18b0e8e5a3bc5e Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 25 Nov 2008 20:17:05 +0100 Subject: KVM: SVM: Implement hsave Implement the hsave MSR, that gives the VCPU a GPA to save the old guest state in. v2 allows userspace to save/restore hsave v4 dummys out the hsave MSR, so we use a host page v6 remembers the guest's hsave and exports the MSR Acked-by: Joerg Roedel Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 758b7a155ae..99165a961f0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -456,7 +456,7 @@ static u32 msrs_to_save[] = { MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, - MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT + MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA }; static unsigned num_msrs_to_save; -- cgit v1.2.3 From d80174745ba3938bc6abb8f95ed670ac0631a182 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 25 Nov 2008 20:17:11 +0100 Subject: KVM: SVM: Only allow setting of EFER_SVME when CPUID SVM is set Userspace has to tell the kernel module somehow that nested SVM should be used. The easiest way that doesn't break anything I could think of is to implement if (cpuid & svm) allow write to efer else deny write to efer Old userspaces mask the SVM capability bit, so they don't break. In order to find out that the SVM capability is set, I had to split the kvm_emulate_cpuid into a finding and an emulating part. (introduced in v6) Acked-by: Joerg Roedel Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 58 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 16 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 99165a961f0..b5e9932e0f6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -69,6 +69,8 @@ static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL; static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries); +struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, + u32 function, u32 index); struct kvm_x86_ops *kvm_x86_ops; EXPORT_SYMBOL_GPL(kvm_x86_ops); @@ -173,6 +175,7 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, u32 error_code) { ++vcpu->stat.pf_guest; + if (vcpu->arch.exception.pending) { if (vcpu->arch.exception.nr == PF_VECTOR) { printk(KERN_DEBUG "kvm: inject_page_fault:" @@ -442,6 +445,11 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_get_cr8); +static inline u32 bit(int bitno) +{ + return 1 << (bitno & 31); +} + /* * List of msr numbers which we expose to userspace through KVM_GET_MSRS * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. @@ -481,6 +489,17 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) return; } + if (efer & EFER_SVME) { + struct kvm_cpuid_entry2 *feat; + + feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); + if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { + printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n"); + kvm_inject_gp(vcpu, 0); + return; + } + } + kvm_x86_ops->set_efer(vcpu, efer); efer &= ~EFER_LMA; @@ -1181,11 +1200,6 @@ out: return r; } -static inline u32 bit(int bitno) -{ - return 1 << (bitno & 31); -} - static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, u32 index) { @@ -1228,7 +1242,8 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, const u32 kvm_supported_word3_x86_features = bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16); const u32 kvm_supported_word6_x86_features = - bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY); + bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) | + bit(X86_FEATURE_SVM); /* all func 2 cpuid_count() should be called on the same cpu */ get_cpu(); @@ -2832,20 +2847,15 @@ static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e, return 1; } -void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) +struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, + u32 function, u32 index) { int i; - u32 function, index; - struct kvm_cpuid_entry2 *e, *best; + struct kvm_cpuid_entry2 *best = NULL; - function = kvm_register_read(vcpu, VCPU_REGS_RAX); - index = kvm_register_read(vcpu, VCPU_REGS_RCX); - kvm_register_write(vcpu, VCPU_REGS_RAX, 0); - kvm_register_write(vcpu, VCPU_REGS_RBX, 0); - kvm_register_write(vcpu, VCPU_REGS_RCX, 0); - kvm_register_write(vcpu, VCPU_REGS_RDX, 0); - best = NULL; for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { + struct kvm_cpuid_entry2 *e; + e = &vcpu->arch.cpuid_entries[i]; if (is_matching_cpuid_entry(e, function, index)) { if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) @@ -2860,6 +2870,22 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) if (!best || e->function > best->function) best = e; } + + return best; +} + +void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) +{ + u32 function, index; + struct kvm_cpuid_entry2 *best; + + function = kvm_register_read(vcpu, VCPU_REGS_RAX); + index = kvm_register_read(vcpu, VCPU_REGS_RCX); + kvm_register_write(vcpu, VCPU_REGS_RAX, 0); + kvm_register_write(vcpu, VCPU_REGS_RBX, 0); + kvm_register_write(vcpu, VCPU_REGS_RCX, 0); + kvm_register_write(vcpu, VCPU_REGS_RDX, 0); + best = kvm_find_cpuid_entry(vcpu, function, index); if (best) { kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax); kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx); -- cgit v1.2.3 From d0bfb940ecabf0b44fb1fd80d8d60594e569e5ec Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 15 Dec 2008 13:52:10 +0100 Subject: KVM: New guest debug interface This rips out the support for KVM_DEBUG_GUEST and introduces a new IOCTL instead: KVM_SET_GUEST_DEBUG. The IOCTL payload consists of a generic part, controlling the "main switch" and the single-step feature. The arch specific part adds an x86 interface for intercepting both types of debug exceptions separately and re-injecting them when the host was not interested. Moveover, the foundation for guest debugging via debug registers is layed. To signal breakpoint events properly back to userland, an arch-specific data block is now returned along KVM_EXIT_DEBUG. For x86, the arch block contains the PC, the debug exception, and relevant debug registers to tell debug events properly apart. The availability of this new interface is signaled by KVM_CAP_SET_GUEST_DEBUG. Empty stubs for not yet supported archs are provided. Note that both SVM and VTX are supported, but only the latter was tested yet. Based on the experience with all those VTX corner case, I would be fairly surprised if SVM will work out of the box. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b5e9932e0f6..e990d164b56 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3005,9 +3005,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) goto out; } - if (vcpu->guest_debug.enabled) - kvm_x86_ops->guest_debug_pre(vcpu); - vcpu->guest_mode = 1; /* * Make sure that guest_mode assignment won't happen after @@ -3218,7 +3215,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) /* * Don't leak debug flags in case they were set for guest debugging */ - if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep) + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); vcpu_put(vcpu); @@ -3837,8 +3834,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return 0; } -int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, - struct kvm_debug_guest *dbg) +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) { int r; @@ -3846,6 +3843,11 @@ int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, r = kvm_x86_ops->set_guest_debug(vcpu, dbg); + if (dbg->control & KVM_GUESTDBG_INJECT_DB) + kvm_queue_exception(vcpu, DB_VECTOR); + else if (dbg->control & KVM_GUESTDBG_INJECT_BP) + kvm_queue_exception(vcpu, BP_VECTOR); + vcpu_put(vcpu); return r; -- cgit v1.2.3 From 42dbaa5a057736bf8b5c22aa42dbe975bf1080e5 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 15 Dec 2008 13:52:10 +0100 Subject: KVM: x86: Virtualize debug registers So far KVM only had basic x86 debug register support, once introduced to realize guest debugging that way. The guest itself was not able to use those registers. This patch now adds (almost) full support for guest self-debugging via hardware registers. It refactors the code, moving generic parts out of SVM (VMX was already cleaned up by the KVM_SET_GUEST_DEBUG patches), and it ensures that the registers are properly switched between host and guest. This patch also prepares debug register usage by the host. The latter will (once wired-up by the following patch) allow for hardware breakpoints/watchpoints in guest code. If this is enabled, the guest will only see faked debug registers without functionality, but with content reflecting the guest's modifications. Tested on Intel only, but SVM /should/ work as well, but who knows... Known limitations: Trapping on tss switch won't work - most probably on Intel. Credits also go to Joerg Roedel - I used his once posted debugging series as platform for this patch. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e990d164b56..300bc4d42ab 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3025,10 +3025,34 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_guest_enter(); + get_debugreg(vcpu->arch.host_dr6, 6); + get_debugreg(vcpu->arch.host_dr7, 7); + if (unlikely(vcpu->arch.switch_db_regs)) { + get_debugreg(vcpu->arch.host_db[0], 0); + get_debugreg(vcpu->arch.host_db[1], 1); + get_debugreg(vcpu->arch.host_db[2], 2); + get_debugreg(vcpu->arch.host_db[3], 3); + + set_debugreg(0, 7); + set_debugreg(vcpu->arch.eff_db[0], 0); + set_debugreg(vcpu->arch.eff_db[1], 1); + set_debugreg(vcpu->arch.eff_db[2], 2); + set_debugreg(vcpu->arch.eff_db[3], 3); + } KVMTRACE_0D(VMENTRY, vcpu, entryexit); kvm_x86_ops->run(vcpu, kvm_run); + if (unlikely(vcpu->arch.switch_db_regs)) { + set_debugreg(0, 7); + set_debugreg(vcpu->arch.host_db[0], 0); + set_debugreg(vcpu->arch.host_db[1], 1); + set_debugreg(vcpu->arch.host_db[2], 2); + set_debugreg(vcpu->arch.host_db[3], 3); + } + set_debugreg(vcpu->arch.host_dr6, 6); + set_debugreg(vcpu->arch.host_dr7, 7); + vcpu->guest_mode = 0; local_irq_enable(); @@ -4035,6 +4059,11 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) vcpu->arch.nmi_pending = false; vcpu->arch.nmi_injected = false; + vcpu->arch.switch_db_regs = 0; + memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); + vcpu->arch.dr6 = DR6_FIXED_1; + vcpu->arch.dr7 = DR7_FIXED_1; + return kvm_x86_ops->vcpu_reset(vcpu); } -- cgit v1.2.3 From ae675ef01cd86014acf8da5dee87876b71122495 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 15 Dec 2008 13:52:10 +0100 Subject: KVM: x86: Wire-up hardware breakpoints for guest debugging Add the remaining bits to make use of debug registers also for guest debugging, thus enabling the use of hardware breakpoints and watchpoints. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 300bc4d42ab..2477e87b2f8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3861,10 +3861,22 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - int r; + int i, r; vcpu_load(vcpu); + if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) == + (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) { + for (i = 0; i < KVM_NR_DB_REGS; ++i) + vcpu->arch.eff_db[i] = dbg->arch.debugreg[i]; + vcpu->arch.switch_db_regs = + (dbg->arch.debugreg[7] & DR7_BP_EN_MASK); + } else { + for (i = 0; i < KVM_NR_DB_REGS; i++) + vcpu->arch.eff_db[i] = vcpu->arch.db[i]; + vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); + } + r = kvm_x86_ops->set_guest_debug(vcpu, dbg); if (dbg->control & KVM_GUESTDBG_INJECT_DB) -- cgit v1.2.3 From 2f0b3d60b2c43aef7cd10169c425c052169c622a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 21 Dec 2008 19:27:36 +0200 Subject: KVM: MMU: Segregate mmu pages created with different cr4.pge settings Don't allow a vcpu with cr4.pge cleared to use a shadow page created with cr4.pge set; this might cause a cr3 switch not to sync ptes that have the global bit set (the global bit has no effect if !cr4.pge). This can only occur on smp with different cr4.pge settings for different vcpus (since a cr4 change will resync the shadow ptes), but there's no cost to being correct here. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2477e87b2f8..873602b5edf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -364,6 +364,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) } kvm_x86_ops->set_cr4(vcpu, cr4); vcpu->arch.cr4 = cr4; + vcpu->arch.mmu.base_role.cr4_pge = !!(cr4 & X86_CR4_PGE); kvm_mmu_sync_global(vcpu); kvm_mmu_reset_context(vcpu); } -- cgit v1.2.3 From 53f658b3c33616a4997ee254311b335e59063289 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 11 Dec 2008 20:45:05 +0100 Subject: KVM: VMX: initialize TSC offset relative to vm creation time VMX initializes the TSC offset for each vcpu at different times, and also reinitializes it for vcpus other than 0 on APIC SIPI message. This bug causes the TSC's to appear unsynchronized in the guest, even if the host is good. Older Linux kernels don't handle the situation very well, so gettimeofday is likely to go backwards in time: http://www.mail-archive.com/kvm@vger.kernel.org/msg02955.html http://sourceforge.net/tracker/index.php?func=detail&aid=2025534&group_id=180599&atid=893831 Fix it by initializating the offset of each vcpu relative to vm creation time, and moving it from vmx_vcpu_reset to vmx_vcpu_setup, out of the APIC MP init path. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 873602b5edf..3b2acfd72d7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4170,6 +4170,8 @@ struct kvm *kvm_arch_create_vm(void) /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); + rdtscll(kvm->arch.vm_init_tsc); + return kvm; } -- cgit v1.2.3 From 77c2002e7c6f019f59a6f3cc5f8b16b41748dbe1 Mon Sep 17 00:00:00 2001 From: Izik Eidus Date: Mon, 29 Dec 2008 01:42:19 +0200 Subject: KVM: introduce kvm_read_guest_virt, kvm_write_guest_virt This commit change the name of emulator_read_std into kvm_read_guest_virt, and add new function name kvm_write_guest_virt that allow writing into a guest virtual address. Signed-off-by: Izik Eidus Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 56 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 14 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3b2acfd72d7..67f91764e99 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1976,10 +1976,8 @@ static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, return dev; } -int emulator_read_std(unsigned long addr, - void *val, - unsigned int bytes, - struct kvm_vcpu *vcpu) +int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, + struct kvm_vcpu *vcpu) { void *data = val; int r = X86EMUL_CONTINUE; @@ -1987,27 +1985,57 @@ int emulator_read_std(unsigned long addr, while (bytes) { gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); unsigned offset = addr & (PAGE_SIZE-1); - unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset); + unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); int ret; if (gpa == UNMAPPED_GVA) { r = X86EMUL_PROPAGATE_FAULT; goto out; } - ret = kvm_read_guest(vcpu->kvm, gpa, data, tocopy); + ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); if (ret < 0) { r = X86EMUL_UNHANDLEABLE; goto out; } - bytes -= tocopy; - data += tocopy; - addr += tocopy; + bytes -= toread; + data += toread; + addr += toread; } out: return r; } -EXPORT_SYMBOL_GPL(emulator_read_std); + +int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, + struct kvm_vcpu *vcpu) +{ + void *data = val; + int r = X86EMUL_CONTINUE; + + while (bytes) { + gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); + unsigned offset = addr & (PAGE_SIZE-1); + unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); + int ret; + + if (gpa == UNMAPPED_GVA) { + r = X86EMUL_PROPAGATE_FAULT; + goto out; + } + ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite); + if (ret < 0) { + r = X86EMUL_UNHANDLEABLE; + goto out; + } + + bytes -= towrite; + data += towrite; + addr += towrite; + } +out: + return r; +} + static int emulator_read_emulated(unsigned long addr, void *val, @@ -2029,8 +2057,8 @@ static int emulator_read_emulated(unsigned long addr, if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) goto mmio; - if (emulator_read_std(addr, val, bytes, vcpu) - == X86EMUL_CONTINUE) + if (kvm_read_guest_virt(addr, val, bytes, vcpu) + == X86EMUL_CONTINUE) return X86EMUL_CONTINUE; if (gpa == UNMAPPED_GVA) return X86EMUL_PROPAGATE_FAULT; @@ -2233,7 +2261,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); - emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu); + kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu); printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); @@ -2241,7 +2269,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); static struct x86_emulate_ops emulate_ops = { - .read_std = emulator_read_std, + .read_std = kvm_read_guest_virt, .read_emulated = emulator_read_emulated, .write_emulated = emulator_write_emulated, .cmpxchg_emulated = emulator_cmpxchg_emulated, -- cgit v1.2.3 From 0f346074403bc109f9569f14b45cb09e83729032 Mon Sep 17 00:00:00 2001 From: Izik Eidus Date: Mon, 29 Dec 2008 01:42:20 +0200 Subject: KVM: remove the vmap usage vmap() on guest pages hides those pages from the Linux mm for an extended (userspace determined) amount of time. Get rid of it. Signed-off-by: Izik Eidus Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 62 ++++++++++++------------------------------------------ 1 file changed, 13 insertions(+), 49 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 67f91764e99..2a4f3a69734 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2371,40 +2371,19 @@ int emulate_instruction(struct kvm_vcpu *vcpu, } EXPORT_SYMBOL_GPL(emulate_instruction); -static void free_pio_guest_pages(struct kvm_vcpu *vcpu) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(vcpu->arch.pio.guest_pages); ++i) - if (vcpu->arch.pio.guest_pages[i]) { - kvm_release_page_dirty(vcpu->arch.pio.guest_pages[i]); - vcpu->arch.pio.guest_pages[i] = NULL; - } -} - static int pio_copy_data(struct kvm_vcpu *vcpu) { void *p = vcpu->arch.pio_data; - void *q; + gva_t q = vcpu->arch.pio.guest_gva; unsigned bytes; - int nr_pages = vcpu->arch.pio.guest_pages[1] ? 2 : 1; + int ret; - q = vmap(vcpu->arch.pio.guest_pages, nr_pages, VM_READ|VM_WRITE, - PAGE_KERNEL); - if (!q) { - free_pio_guest_pages(vcpu); - return -ENOMEM; - } - q += vcpu->arch.pio.guest_page_offset; bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; if (vcpu->arch.pio.in) - memcpy(q, p, bytes); + ret = kvm_write_guest_virt(q, p, bytes, vcpu); else - memcpy(p, q, bytes); - q -= vcpu->arch.pio.guest_page_offset; - vunmap(q); - free_pio_guest_pages(vcpu); - return 0; + ret = kvm_read_guest_virt(q, p, bytes, vcpu); + return ret; } int complete_pio(struct kvm_vcpu *vcpu) @@ -2515,7 +2494,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, vcpu->arch.pio.in = in; vcpu->arch.pio.string = 0; vcpu->arch.pio.down = 0; - vcpu->arch.pio.guest_page_offset = 0; vcpu->arch.pio.rep = 0; if (vcpu->run->io.direction == KVM_EXIT_IO_IN) @@ -2543,9 +2521,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, gva_t address, int rep, unsigned port) { unsigned now, in_page; - int i, ret = 0; - int nr_pages = 1; - struct page *page; + int ret = 0; struct kvm_io_device *pio_dev; vcpu->run->exit_reason = KVM_EXIT_IO; @@ -2557,7 +2533,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, vcpu->arch.pio.in = in; vcpu->arch.pio.string = 1; vcpu->arch.pio.down = down; - vcpu->arch.pio.guest_page_offset = offset_in_page(address); vcpu->arch.pio.rep = rep; if (vcpu->run->io.direction == KVM_EXIT_IO_IN) @@ -2577,15 +2552,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, else in_page = offset_in_page(address) + size; now = min(count, (unsigned long)in_page / size); - if (!now) { - /* - * String I/O straddles page boundary. Pin two guest pages - * so that we satisfy atomicity constraints. Do just one - * transaction to avoid complexity. - */ - nr_pages = 2; + if (!now) now = 1; - } if (down) { /* * String I/O in reverse. Yuck. Kill the guest, fix later. @@ -2600,15 +2568,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count) kvm_x86_ops->skip_emulated_instruction(vcpu); - for (i = 0; i < nr_pages; ++i) { - page = gva_to_page(vcpu, address + i * PAGE_SIZE); - vcpu->arch.pio.guest_pages[i] = page; - if (!page) { - kvm_inject_gp(vcpu, 0); - free_pio_guest_pages(vcpu); - return 1; - } - } + vcpu->arch.pio.guest_gva = address; pio_dev = vcpu_find_pio_dev(vcpu, port, vcpu->arch.pio.cur_count, @@ -2616,7 +2576,11 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, if (!vcpu->arch.pio.in) { /* string PIO write */ ret = pio_copy_data(vcpu); - if (ret >= 0 && pio_dev) { + if (ret == X86EMUL_PROPAGATE_FAULT) { + kvm_inject_gp(vcpu, 0); + return 1; + } + if (ret == 0 && pio_dev) { pio_string_write(pio_dev, vcpu); complete_pio(vcpu); if (vcpu->arch.pio.count == 0) -- cgit v1.2.3 From 61a6bd672bda3b9468bf5895c1be085c4e481138 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 29 Dec 2008 17:32:28 +0200 Subject: KVM: Fallback support for MSR_VM_HSAVE_PA Since we advertise MSR_VM_HSAVE_PA, userspace will attempt to read it even on Intel. Implement fake support for this MSR to avoid the warnings. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2a4f3a69734..c3fbe8c55c1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -742,6 +742,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) break; case MSR_IA32_UCODE_REV: case MSR_IA32_UCODE_WRITE: + case MSR_VM_HSAVE_PA: break; case 0x200 ... 0x2ff: return set_msr_mtrr(vcpu, msr, data); @@ -863,6 +864,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case MSR_IA32_LASTBRANCHTOIP: case MSR_IA32_LASTINTFROMIP: case MSR_IA32_LASTINTTOIP: + case MSR_VM_HSAVE_PA: data = 0; break; case MSR_MTRRcap: -- cgit v1.2.3 From 52d939a0bf44081bc9f69b4fbdc9e7f416df27c7 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 30 Dec 2008 15:55:06 -0200 Subject: KVM: PIT: provide an option to disable interrupt reinjection Certain clocks (such as TSC) in older 2.6 guests overaccount for lost ticks, causing severe time drift. Interrupt reinjection magnifies the problem. Provide an option to disable it. [avi: allow room for expansion in case we want to disable reinjection of other timers] Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c3fbe8c55c1..a1f14611f4b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -993,6 +993,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_NOP_IO_DELAY: case KVM_CAP_MP_STATE: case KVM_CAP_SYNC_MMU: + case KVM_CAP_REINJECT_CONTROL: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -1728,6 +1729,15 @@ static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps) return r; } +static int kvm_vm_ioctl_reinject(struct kvm *kvm, + struct kvm_reinject_control *control) +{ + if (!kvm->arch.vpit) + return -ENXIO; + kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject; + return 0; +} + /* * Get (and clear) the dirty memory log for a memory slot. */ @@ -1925,6 +1935,17 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_REINJECT_CONTROL: { + struct kvm_reinject_control control; + r = -EFAULT; + if (copy_from_user(&control, argp, sizeof(control))) + goto out; + r = kvm_vm_ioctl_reinject(kvm, &control); + if (r) + goto out; + r = 0; + break; + } default: ; } -- cgit v1.2.3 From 269e05e48502f1cc06802e9fba90f5100dd6bb0d Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 5 Jan 2009 15:21:42 +0200 Subject: KVM: Properly lock PIT creation Otherwise, two threads can create a PIT in parallel and cause a memory leak. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a1f14611f4b..6fbc3460337 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1837,10 +1837,16 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; break; case KVM_CREATE_PIT: + mutex_lock(&kvm->lock); + r = -EEXIST; + if (kvm->arch.vpit) + goto create_pit_unlock; r = -ENOMEM; kvm->arch.vpit = kvm_create_pit(kvm); if (kvm->arch.vpit) r = 0; + create_pit_unlock: + mutex_unlock(&kvm->lock); break; case KVM_IRQ_LINE: { struct kvm_irq_level irq_event; -- cgit v1.2.3 From 5a41accd3f69c6ef1d58f0c148980bae70515937 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 11 Jan 2009 17:19:35 +0200 Subject: KVM: MMU: Only enable cr4_pge role in shadow mode Two dimensional paging is only confused by it. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6fbc3460337..19ccde621cd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -364,7 +364,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) } kvm_x86_ops->set_cr4(vcpu, cr4); vcpu->arch.cr4 = cr4; - vcpu->arch.mmu.base_role.cr4_pge = !!(cr4 & X86_CR4_PGE); + vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled; kvm_mmu_sync_global(vcpu); kvm_mmu_reset_context(vcpu); } -- cgit v1.2.3 From 193554750441d91e127dd5066b8aebe0f769101c Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Wed, 14 Jan 2009 16:56:00 +0000 Subject: KVM: x86: Fix typos and whitespace errors Some typos, comments, whitespace errors corrected in the cpuid code Signed-off-by: Amit Shah Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 19ccde621cd..141a0166e51 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1067,7 +1067,7 @@ long kvm_arch_dev_ioctl(struct file *filp, if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) goto out; r = kvm_dev_ioctl_get_supported_cpuid(&cpuid, - cpuid_arg->entries); + cpuid_arg->entries); if (r) goto out; @@ -1165,8 +1165,8 @@ out: } static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, - struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries) + struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries) { int r; @@ -1185,8 +1185,8 @@ out: } static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, - struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries) + struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries) { int r; @@ -1195,7 +1195,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, goto out; r = -EFAULT; if (copy_to_user(entries, &vcpu->arch.cpuid_entries, - vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) + vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) goto out; return 0; @@ -1205,12 +1205,12 @@ out: } static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, - u32 index) + u32 index) { entry->function = function; entry->index = index; cpuid_count(entry->function, entry->index, - &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); + &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); entry->flags = 0; } @@ -1249,7 +1249,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) | bit(X86_FEATURE_SVM); - /* all func 2 cpuid_count() should be called on the same cpu */ + /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); do_cpuid_1_ent(entry, function, index); ++*nent; @@ -1323,7 +1323,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, } static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries) + struct kvm_cpuid_entry2 __user *entries) { struct kvm_cpuid_entry2 *cpuid_entries; int limit, nent = 0, r = -E2BIG; @@ -1340,7 +1340,7 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, limit = cpuid_entries[0].eax; for (func = 1; func <= limit && nent < cpuid->nent; ++func) do_cpuid_ent(&cpuid_entries[nent], func, 0, - &nent, cpuid->nent); + &nent, cpuid->nent); r = -E2BIG; if (nent >= cpuid->nent) goto out_free; @@ -1349,10 +1349,10 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, limit = cpuid_entries[nent - 1].eax; for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) do_cpuid_ent(&cpuid_entries[nent], func, 0, - &nent, cpuid->nent); + &nent, cpuid->nent); r = -EFAULT; if (copy_to_user(entries, cpuid_entries, - nent * sizeof(struct kvm_cpuid_entry2))) + nent * sizeof(struct kvm_cpuid_entry2))) goto out_free; cpuid->nent = nent; r = 0; @@ -1496,7 +1496,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) goto out; r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid, - cpuid_arg->entries); + cpuid_arg->entries); if (r) goto out; break; @@ -1509,7 +1509,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) goto out; r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid, - cpuid_arg->entries); + cpuid_arg->entries); if (r) goto out; r = -EFAULT; @@ -2864,7 +2864,7 @@ static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e, if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index) return 0; if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) && - !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT)) + !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT)) return 0; return 1; } @@ -2892,7 +2892,6 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, if (!best || e->function > best->function) best = e; } - return best; } -- cgit v1.2.3 From 399ec807ddc38ecccf8c06dbde04531cbdc63e11 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 19 Nov 2008 13:58:46 +0200 Subject: KVM: Userspace controlled irq routing Currently KVM has a static routing from GSI numbers to interrupts (namely, 0-15 are mapped 1:1 to both PIC and IOAPIC, and 16:23 are mapped 1:1 to the IOAPIC). This is insufficient for several reasons: - HPET requires non 1:1 mapping for the timer interrupt - MSIs need a new method to assign interrupt numbers and dispatch them - ACPI APIC mode needs to be able to reassign the PCI LINK interrupts to the ioapics This patch implements an interrupt routing table (as a linked list, but this can be easily changed) and a userspace interface to replace the table. The routing table is initialized according to the current hardwired mapping. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 141a0166e51..32e3a7ec6ad 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1835,6 +1835,12 @@ long kvm_arch_vm_ioctl(struct file *filp, } } else goto out; + r = kvm_setup_default_irq_routing(kvm); + if (r) { + kfree(kvm->arch.vpic); + kfree(kvm->arch.vioapic); + goto out; + } break; case KVM_CREATE_PIT: mutex_lock(&kvm->lock); -- cgit v1.2.3 From 1b2fd70c4eddef53f32639296818c0253e7ca48d Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 2 Feb 2009 16:23:51 +0100 Subject: KVM: Add FFXSR support AMD K10 CPUs implement the FFXSR feature that gets enabled using EFER. Let's check if the virtual CPU description includes that CPUID feature bit and allow enabling it then. This is required for Windows Server 2008 in Hyper-V mode. v2 adds CPUID capability exposure Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 32e3a7ec6ad..8f83590b47d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -490,6 +490,17 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) return; } + if (efer & EFER_FFXSR) { + struct kvm_cpuid_entry2 *feat; + + feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); + if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { + printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n"); + kvm_inject_gp(vcpu, 0); + return; + } + } + if (efer & EFER_SVME) { struct kvm_cpuid_entry2 *feat; @@ -1240,6 +1251,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, #ifdef CONFIG_X86_64 bit(X86_FEATURE_LM) | #endif + bit(X86_FEATURE_FXSR_OPT) | bit(X86_FEATURE_MMXEXT) | bit(X86_FEATURE_3DNOWEXT) | bit(X86_FEATURE_3DNOW); -- cgit v1.2.3 From c807660407a695f390034e402edfe544a1d2e40c Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 4 Feb 2009 17:52:04 +0100 Subject: KVM: Fix kvmclock on !constant_tsc boxes kvmclock currently falls apart on machines without constant tsc. This patch fixes it. Changes: * keep tsc frequency in a per-cpu variable. * handle kvmclock update using a new request flag, thus checking whenever we need an update each time we enter guest context. * use a cpufreq notifier to track frequency changes and force kvmclock updates. * send ipis to kick cpu out of guest context if needed to make sure the guest doesn't see stale values. Signed-off-by: Gerd Hoffmann Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 94 insertions(+), 9 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8f83590b47d..05d7be89b5e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -617,6 +618,8 @@ static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info * hv_clock->tsc_to_system_mul); } +static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); + static void kvm_write_guest_time(struct kvm_vcpu *v) { struct timespec ts; @@ -627,9 +630,9 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) if ((!vcpu->time_page)) return; - if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) { - kvm_set_time_scale(tsc_khz, &vcpu->hv_clock); - vcpu->hv_clock_tsc_khz = tsc_khz; + if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) { + kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock); + vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz); } /* Keep irq disabled to prevent changes to the clock */ @@ -660,6 +663,16 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); } +static int kvm_request_guest_time_update(struct kvm_vcpu *v) +{ + struct kvm_vcpu_arch *vcpu = &v->arch; + + if (!vcpu->time_page) + return 0; + set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests); + return 1; +} + static bool msr_mtrr_valid(unsigned msr) { switch (msr) { @@ -790,7 +803,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) vcpu->arch.time_page = NULL; } - kvm_write_guest_time(vcpu); + kvm_request_guest_time_update(vcpu); break; } default: @@ -1000,6 +1013,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: case KVM_CAP_SET_TSS_ADDR: case KVM_CAP_EXT_CPUID: + case KVM_CAP_CLOCKSOURCE: case KVM_CAP_PIT: case KVM_CAP_NOP_IO_DELAY: case KVM_CAP_MP_STATE: @@ -1025,9 +1039,6 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_IOMMU: r = iommu_found(); break; - case KVM_CAP_CLOCKSOURCE: - r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC); - break; default: r = 0; break; @@ -1098,7 +1109,7 @@ out: void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { kvm_x86_ops->vcpu_load(vcpu, cpu); - kvm_write_guest_time(vcpu); + kvm_request_guest_time_update(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -2642,9 +2653,72 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, } EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); +static void bounce_off(void *info) +{ + /* nothing */ +} + +static unsigned int ref_freq; +static unsigned long tsc_khz_ref; + +static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + struct kvm *kvm; + struct kvm_vcpu *vcpu; + int i, send_ipi = 0; + + if (!ref_freq) + ref_freq = freq->old; + + if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) + return 0; + if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) + return 0; + per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); + + spin_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) { + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + vcpu = kvm->vcpus[i]; + if (!vcpu) + continue; + if (vcpu->cpu != freq->cpu) + continue; + if (!kvm_request_guest_time_update(vcpu)) + continue; + if (vcpu->cpu != smp_processor_id()) + send_ipi++; + } + } + spin_unlock(&kvm_lock); + + if (freq->old < freq->new && send_ipi) { + /* + * We upscale the frequency. Must make the guest + * doesn't see old kvmclock values while running with + * the new frequency, otherwise we risk the guest sees + * time go backwards. + * + * In case we update the frequency for another cpu + * (which might be in guest context) send an interrupt + * to kick the cpu out of guest context. Next time + * guest context is entered kvmclock will be updated, + * so the guest will not see stale values. + */ + smp_call_function_single(freq->cpu, bounce_off, NULL, 1); + } + return 0; +} + +static struct notifier_block kvmclock_cpufreq_notifier_block = { + .notifier_call = kvmclock_cpufreq_notifier +}; + int kvm_arch_init(void *opaque) { - int r; + int r, cpu; struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; if (kvm_x86_ops) { @@ -2675,6 +2749,15 @@ int kvm_arch_init(void *opaque) kvm_mmu_set_base_ptes(PT_PRESENT_MASK); kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, PT_DIRTY_MASK, PT64_NX_MASK, 0, 0); + + for_each_possible_cpu(cpu) + per_cpu(cpu_tsc_khz, cpu) = tsc_khz; + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { + tsc_khz_ref = tsc_khz; + cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + return 0; out: @@ -3010,6 +3093,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (vcpu->requests) { if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) __kvm_migrate_timers(vcpu); + if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests)) + kvm_write_guest_time(vcpu); if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests)) kvm_mmu_sync_roots(vcpu); if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) -- cgit v1.2.3 From 4925663a079c77d95d8685228ad6675fc5639c8e Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 4 Feb 2009 17:28:14 +0200 Subject: KVM: Report IRQ injection status to userspace. IRQ injection status is either -1 (if there was no CPU found that should except the interrupt because IRQ was masked or ioapic was misconfigured or ...) or >= 0 in that case the number indicates to how many CPUs interrupt was injected. If the value is 0 it means that the interrupt was coalesced and probably should be reinjected. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 05d7be89b5e..e4db5be7c95 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1019,6 +1019,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MP_STATE: case KVM_CAP_SYNC_MMU: case KVM_CAP_REINJECT_CONTROL: + case KVM_CAP_IRQ_INJECT_STATUS: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -1877,6 +1878,7 @@ long kvm_arch_vm_ioctl(struct file *filp, create_pit_unlock: mutex_unlock(&kvm->lock); break; + case KVM_IRQ_LINE_STATUS: case KVM_IRQ_LINE: { struct kvm_irq_level irq_event; @@ -1884,10 +1886,17 @@ long kvm_arch_vm_ioctl(struct file *filp, if (copy_from_user(&irq_event, argp, sizeof irq_event)) goto out; if (irqchip_in_kernel(kvm)) { + __s32 status; mutex_lock(&kvm->lock); - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, - irq_event.irq, irq_event.level); + status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, + irq_event.irq, irq_event.level); mutex_unlock(&kvm->lock); + if (ioctl == KVM_IRQ_LINE_STATUS) { + irq_event.status = status; + if (copy_to_user(argp, &irq_event, + sizeof irq_event)) + goto out; + } r = 0; } break; -- cgit v1.2.3 From cded19f396bc3c9d299331709d0a9fbc6ecc148a Mon Sep 17 00:00:00 2001 From: Hannes Eder Date: Sat, 21 Feb 2009 02:19:13 +0100 Subject: KVM: fix sparse warnings: Should it be static? Impact: Make symbols static. Fix this sparse warnings: arch/x86/kvm/mmu.c:992:5: warning: symbol 'mmu_pages_add' was not declared. Should it be static? arch/x86/kvm/mmu.c:1124:5: warning: symbol 'mmu_pages_next' was not declared. Should it be static? arch/x86/kvm/mmu.c:1144:6: warning: symbol 'mmu_pages_clear_parents' was not declared. Should it be static? arch/x86/kvm/x86.c:2037:5: warning: symbol 'kvm_read_guest_virt' was not declared. Should it be static? arch/x86/kvm/x86.c:2067:5: warning: symbol 'kvm_write_guest_virt' was not declared. Should it be static? virt/kvm/irq_comm.c:220:5: warning: symbol 'setup_routing_entry' was not declared. Should it be static? Signed-off-by: Hannes Eder Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e4db5be7c95..8ca100a9eca 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2043,8 +2043,8 @@ static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, return dev; } -int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu) +static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, + struct kvm_vcpu *vcpu) { void *data = val; int r = X86EMUL_CONTINUE; @@ -2073,8 +2073,8 @@ out: return r; } -int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu) +static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, + struct kvm_vcpu *vcpu) { void *data = val; int r = X86EMUL_CONTINUE; -- cgit v1.2.3