From 2384d2b32640839a4d4d260ca7c5aa4edbf68d91 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 17 Jan 2008 15:14:33 +0800 Subject: KVM: VMX: Enable Virtual Processor Identification (VPID) To allow TLB entries to be retained across VM entry and VM exit, the VMM can now identify distinct address spaces through a new virtual-processor ID (VPID) field of the VMCS. [avi: drop vpid_sync_all()] [avi: add "cc" to asm constraints] Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 75 insertions(+), 5 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8e1462880d1..1157e8a4059 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -37,6 +37,9 @@ MODULE_LICENSE("GPL"); static int bypass_guest_pf = 1; module_param(bypass_guest_pf, bool, 0); +static int enable_vpid = 1; +module_param(enable_vpid, bool, 0); + struct vmcs { u32 revision_id; u32 abort; @@ -71,6 +74,7 @@ struct vcpu_vmx { unsigned rip; } irq; } rmode; + int vpid; }; static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) @@ -86,6 +90,9 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs); static struct page *vmx_io_bitmap_a; static struct page *vmx_io_bitmap_b; +static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); +static DEFINE_SPINLOCK(vmx_vpid_lock); + static struct vmcs_config { int size; int order; @@ -204,6 +211,12 @@ static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) (irqchip_in_kernel(kvm))); } +static inline int cpu_has_vmx_vpid(void) +{ + return (vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_ENABLE_VPID); +} + static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) { int i; @@ -214,6 +227,20 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) return -1; } +static inline void __invvpid(int ext, u16 vpid, gva_t gva) +{ + struct { + u64 vpid : 16; + u64 rsvd : 48; + u64 gva; + } operand = { vpid, 0, gva }; + + asm volatile (ASM_VMX_INVVPID + /* CF==1 or ZF==1 --> rc = -1 */ + "; ja 1f ; ud2 ; 1:" + : : "a"(&operand), "c"(ext) : "cc", "memory"); +} + static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) { int i; @@ -257,6 +284,14 @@ static void vcpu_clear(struct vcpu_vmx *vmx) vmx->launched = 0; } +static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx) +{ + if (vmx->vpid == 0) + return; + + __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); +} + static unsigned long vmcs_readl(unsigned long field) { unsigned long value; @@ -490,6 +525,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (vcpu->cpu != cpu) { vcpu_clear(vmx); kvm_migrate_apic_timer(vcpu); + vpid_sync_vcpu_all(vmx); } if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { @@ -971,7 +1007,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { min = 0; opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | - SECONDARY_EXEC_WBINVD_EXITING; + SECONDARY_EXEC_WBINVD_EXITING | + SECONDARY_EXEC_ENABLE_VPID; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS2, &_cpu_based_2nd_exec_control) < 0) return -EIO; @@ -1239,6 +1276,11 @@ static void exit_lmode(struct kvm_vcpu *vcpu) #endif +static void vmx_flush_tlb(struct kvm_vcpu *vcpu) +{ + vpid_sync_vcpu_all(to_vmx(vcpu)); +} + static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) { vcpu->arch.cr4 &= KVM_GUEST_CR4_MASK; @@ -1275,6 +1317,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { + vmx_flush_tlb(vcpu); vmcs_writel(GUEST_CR3, cr3); if (vcpu->arch.cr0 & X86_CR0_PE) vmx_fpu_deactivate(vcpu); @@ -1494,6 +1537,22 @@ out: return r; } +static void allocate_vpid(struct vcpu_vmx *vmx) +{ + int vpid; + + vmx->vpid = 0; + if (!enable_vpid || !cpu_has_vmx_vpid()) + return; + spin_lock(&vmx_vpid_lock); + vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS); + if (vpid < VMX_NR_VPIDS) { + vmx->vpid = vpid; + __set_bit(vpid, vmx_vpid_bitmap); + } + spin_unlock(&vmx_vpid_lock); +} + /* * Sets up the vmcs for emulated real mode. */ @@ -1532,6 +1591,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) if (!vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + if (vmx->vpid == 0) + exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); } @@ -1704,6 +1765,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); + if (vmx->vpid != 0) + vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); + vmx->vcpu.arch.cr0 = 0x60000010; vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */ vmx_set_cr4(&vmx->vcpu, 0); @@ -1713,6 +1777,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmx_fpu_activate(&vmx->vcpu); update_exception_bitmap(&vmx->vcpu); + vpid_sync_vcpu_all(vmx); + return 0; out: @@ -2221,10 +2287,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) return 0; } -static void vmx_flush_tlb(struct kvm_vcpu *vcpu) -{ -} - static void update_tpr_threshold(struct kvm_vcpu *vcpu) { int max_irr, tpr; @@ -2489,6 +2551,10 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + spin_lock(&vmx_vpid_lock); + if (vmx->vpid != 0) + __clear_bit(vmx->vpid, vmx_vpid_bitmap); + spin_unlock(&vmx_vpid_lock); vmx_free_vmcs(vcpu); kfree(vmx->host_msrs); kfree(vmx->guest_msrs); @@ -2505,6 +2571,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (!vmx) return ERR_PTR(-ENOMEM); + allocate_vpid(vmx); + err = kvm_vcpu_init(&vmx->vcpu, kvm, id); if (err) goto free_vcpu; @@ -2652,6 +2720,8 @@ static int __init vmx_init(void) memset(iova, 0xff, PAGE_SIZE); kunmap(vmx_io_bitmap_b); + set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ + r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); if (r) goto out1; -- cgit v1.2.3 From 50a37eb4e05efaa7bac6a948fd4db1a48c728b99 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 31 Jan 2008 14:57:38 +0100 Subject: KVM: align valid EFER bits with the features of the host system This patch aligns the bits the guest can set in the EFER register with the features in the host processor. Currently it lets EFER.NX disabled if the processor does not support it and enables EFER.LME and EFER.LMA only for KVM on 64 bit hosts. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1157e8a4059..a509910f6b5 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1117,6 +1117,10 @@ static __init int hardware_setup(void) { if (setup_vmcs_config(&vmcs_config) < 0) return -EIO; + + if (boot_cpu_has(X86_FEATURE_NX)) + kvm_enable_efer_bits(EFER_NX); + return alloc_kvm_area(); } -- cgit v1.2.3 From 9f62e19a1107466b9e9501e23a9dd5acb81fdca1 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 31 Jan 2008 14:57:39 +0100 Subject: KVM: VMX: unifdef the EFER specific code To allow access to the EFER register in 32bit KVM the EFER specific code has to be exported to the x86 generic code. This patch does this in a backwards compatible manner. [avi: add check for EFER-less hosts] Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a509910f6b5..76944f2c883 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1335,14 +1335,14 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) vcpu->arch.cr4 = cr4; } -#ifdef CONFIG_X86_64 - static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); vcpu->arch.shadow_efer = efer; + if (!msr) + return; if (efer & EFER_LMA) { vmcs_write32(VM_ENTRY_CONTROLS, vmcs_read32(VM_ENTRY_CONTROLS) | @@ -1359,8 +1359,6 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) setup_msrs(vmx); } -#endif - static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) { struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; @@ -1775,9 +1773,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmx->vcpu.arch.cr0 = 0x60000010; vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */ vmx_set_cr4(&vmx->vcpu, 0); -#ifdef CONFIG_X86_64 vmx_set_efer(&vmx->vcpu, 0); -#endif vmx_fpu_activate(&vmx->vcpu); update_exception_bitmap(&vmx->vcpu); @@ -2668,9 +2664,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .set_cr0 = vmx_set_cr0, .set_cr3 = vmx_set_cr3, .set_cr4 = vmx_set_cr4, -#ifdef CONFIG_X86_64 .set_efer = vmx_set_efer, -#endif .get_idt = vmx_get_idt, .set_idt = vmx_set_idt, .get_gdt = vmx_get_gdt, -- cgit v1.2.3 From 2e11384c2c6f1ce662b1e5b05ba49b216a052f2a Mon Sep 17 00:00:00 2001 From: Ryan Harper Date: Mon, 11 Feb 2008 10:26:38 -0600 Subject: KVM: VMX: fix typo in VMX header define Looking at Intel Volume 3b, page 148, table 20-11 and noticed that the field name is 'Deliver' not 'Deliever'. Attached patch changes the define name and its user in vmx.c Signed-off-by: Ryan Harper Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 76944f2c883..2d5ccec3f9e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -632,7 +632,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, { vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, nr | INTR_TYPE_EXCEPTION - | (has_error_code ? INTR_INFO_DELIEVER_CODE_MASK : 0) + | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0) | INTR_INFO_VALID_MASK); if (has_error_code) vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); @@ -1935,7 +1935,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) error_code = 0; rip = vmcs_readl(GUEST_RIP); - if (intr_info & INTR_INFO_DELIEVER_CODE_MASK) + if (intr_info & INTR_INFO_DELIVER_CODE_MASK) error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); if (is_page_fault(intr_info)) { cr2 = vmcs_readl(EXIT_QUALIFICATION); @@ -2351,7 +2351,7 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); - if (unlikely(idtv_info_field & INTR_INFO_DELIEVER_CODE_MASK)) + if (unlikely(idtv_info_field & INTR_INFO_DELIVER_CODE_MASK)) vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, vmcs_read32(IDT_VECTORING_ERROR_CODE)); if (unlikely(has_ext_irq)) -- cgit v1.2.3 From a5f61300c489e334ddf99781a13a7f8d4b580781 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Feb 2008 17:57:21 +0200 Subject: KVM: Use x86's segment descriptor struct instead of private definition The x86 desc_struct unification allows us to remove segment_descriptor.h. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2d5ccec3f9e..f46ad03c252 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -17,7 +17,6 @@ #include "irq.h" #include "vmx.h" -#include "segment_descriptor.h" #include "mmu.h" #include @@ -388,7 +387,7 @@ static void reload_tss(void) * VT restores TR but not its size. Useless. */ struct descriptor_table gdt; - struct segment_descriptor *descs; + struct desc_struct *descs; get_gdt(&gdt); descs = (void *)gdt.base; -- cgit v1.2.3 From 2d3ad1f40c841bd3e97d30d423eea53915d085dc Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 24 Feb 2008 11:20:43 +0200 Subject: KVM: Prefix control register accessors with kvm_ to avoid namespace pollution Names like 'set_cr3()' look dangerously close to affecting the host. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f46ad03c252..50345032974 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1683,7 +1683,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmx->vcpu.arch.rmode.active = 0; vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); - set_cr8(&vmx->vcpu, 0); + kvm_set_cr8(&vmx->vcpu, 0); msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; if (vmx->vcpu.vcpu_id == 0) msr |= MSR_IA32_APICBASE_BSP; @@ -2026,22 +2026,22 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) switch (cr) { case 0: vcpu_load_rsp_rip(vcpu); - set_cr0(vcpu, vcpu->arch.regs[reg]); + kvm_set_cr0(vcpu, vcpu->arch.regs[reg]); skip_emulated_instruction(vcpu); return 1; case 3: vcpu_load_rsp_rip(vcpu); - set_cr3(vcpu, vcpu->arch.regs[reg]); + kvm_set_cr3(vcpu, vcpu->arch.regs[reg]); skip_emulated_instruction(vcpu); return 1; case 4: vcpu_load_rsp_rip(vcpu); - set_cr4(vcpu, vcpu->arch.regs[reg]); + kvm_set_cr4(vcpu, vcpu->arch.regs[reg]); skip_emulated_instruction(vcpu); return 1; case 8: vcpu_load_rsp_rip(vcpu); - set_cr8(vcpu, vcpu->arch.regs[reg]); + kvm_set_cr8(vcpu, vcpu->arch.regs[reg]); skip_emulated_instruction(vcpu); if (irqchip_in_kernel(vcpu->kvm)) return 1; @@ -2067,14 +2067,14 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; case 8: vcpu_load_rsp_rip(vcpu); - vcpu->arch.regs[reg] = get_cr8(vcpu); + vcpu->arch.regs[reg] = kvm_get_cr8(vcpu); vcpu_put_rsp_rip(vcpu); skip_emulated_instruction(vcpu); return 1; } break; case 3: /* lmsw */ - lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f); + kvm_lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f); skip_emulated_instruction(vcpu); return 1; -- cgit v1.2.3 From b8688d51bbe4872fbcec751e04369606082ac610 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Mon, 3 Mar 2008 12:59:56 -0800 Subject: KVM: replace remaining __FUNCTION__ occurances __FUNCTION__ is gcc-specific, use __func__ Signed-off-by: Harvey Harrison Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 50345032974..7ef710afceb 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1254,7 +1254,7 @@ static void enter_lmode(struct kvm_vcpu *vcpu) guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { printk(KERN_DEBUG "%s: tss fixup for long mode. \n", - __FUNCTION__); + __func__); vmcs_write32(GUEST_TR_AR_BYTES, (guest_tr_ar & ~AR_TYPE_MASK) | AR_TYPE_BUSY_64_TSS); @@ -1909,7 +1909,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if ((vect_info & VECTORING_INFO_VALID_MASK) && !is_page_fault(intr_info)) printk(KERN_ERR "%s: unexpected, vectoring info 0x%x " - "intr info 0x%x\n", __FUNCTION__, vect_info, intr_info); + "intr info 0x%x\n", __func__, vect_info, intr_info); if (!irqchip_in_kernel(vcpu->kvm) && is_external_interrupt(vect_info)) { int irq = vect_info & VECTORING_INFO_VECTOR_MASK; @@ -2275,7 +2275,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if ((vectoring_info & VECTORING_INFO_VALID_MASK) && exit_reason != EXIT_REASON_EXCEPTION_NMI) printk(KERN_WARNING "%s: unexpected, valid vectoring info and " - "exit reason is 0x%x\n", __FUNCTION__, exit_reason); + "exit reason is 0x%x\n", __func__, exit_reason); if (exit_reason < kvm_vmx_max_exit_handlers && kvm_vmx_exit_handlers[exit_reason]) return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); -- cgit v1.2.3 From 019960ae9933161c2809fa4ee608ba30d9639fd2 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 4 Mar 2008 10:44:51 +0200 Subject: KVM: VMX: Don't adjust tsc offset forward Most Intel hosts have a stable tsc, and playing with the offset only reduces accuracy. By limiting tsc offset adjustment only to forward updates, we effectively disable tsc offset adjustment on these hosts. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7ef710afceb..fb0389d3a8d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -519,7 +519,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); u64 phys_addr = __pa(vmx->vmcs); - u64 tsc_this, delta; + u64 tsc_this, delta, new_offset; if (vcpu->cpu != cpu) { vcpu_clear(vmx); @@ -559,8 +559,11 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) * Make sure the time stamp counter is monotonous. */ rdtscll(tsc_this); - delta = vcpu->arch.host_tsc - tsc_this; - vmcs_write64(TSC_OFFSET, vmcs_read64(TSC_OFFSET) + delta); + if (tsc_this < vcpu->arch.host_tsc) { + delta = vcpu->arch.host_tsc - tsc_this; + new_offset = vmcs_read64(TSC_OFFSET) + delta; + vmcs_write64(TSC_OFFSET, new_offset); + } } } -- cgit v1.2.3 From 4c9fc8ef501790732ed035585b491756b75ea4c6 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 24 Mar 2008 18:15:14 +0200 Subject: KVM: VMX: Add module option to disable flexpriority Useful for debugging. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fb0389d3a8d..01559311df8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -39,6 +39,9 @@ module_param(bypass_guest_pf, bool, 0); static int enable_vpid = 1; module_param(enable_vpid, bool, 0); +static int flexpriority_enabled = 1; +module_param(flexpriority_enabled, bool, 0); + struct vmcs { u32 revision_id; u32 abort; @@ -200,8 +203,9 @@ static inline int cpu_has_secondary_exec_ctrls(void) static inline bool cpu_has_vmx_virtualize_apic_accesses(void) { - return (vmcs_config.cpu_based_2nd_exec_ctrl & - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); + return flexpriority_enabled + && (vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); } static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) -- cgit v1.2.3 From 2e4d2653497856b102c90153f970c9e344ba96c6 Mon Sep 17 00:00:00 2001 From: Izik Eidus Date: Mon, 24 Mar 2008 19:38:34 +0200 Subject: KVM: x86: add functions to get the cpl of vcpu Signed-off-by: Izik Eidus Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 01559311df8..9b560325b12 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1395,6 +1395,20 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, var->unusable = (ar >> 16) & 1; } +static int vmx_get_cpl(struct kvm_vcpu *vcpu) +{ + struct kvm_segment kvm_seg; + + if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */ + return 0; + + if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ + return 3; + + vmx_get_segment(vcpu, &kvm_seg, VCPU_SREG_CS); + return kvm_seg.selector & 3; +} + static u32 vmx_segment_access_rights(struct kvm_segment *var) { u32 ar; @@ -2665,6 +2679,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .get_segment_base = vmx_get_segment_base, .get_segment = vmx_get_segment, .set_segment = vmx_set_segment, + .get_cpl = vmx_get_cpl, .get_cs_db_l_bits = vmx_get_cs_db_l_bits, .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, .set_cr0 = vmx_set_cr0, -- cgit v1.2.3 From 37817f2982d0f559f90cecc66e150dd9d2c2df05 Mon Sep 17 00:00:00 2001 From: Izik Eidus Date: Mon, 24 Mar 2008 23:14:53 +0200 Subject: KVM: x86: hardware task switching support This emulates the x86 hardware task switch mechanism in software, as it is unsupported by either vmx or svm. It allows operating systems which use it, like freedos, to run as kvm guests. Signed-off-by: Izik Eidus Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9b560325b12..cbca46acfac 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2249,6 +2249,20 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } +static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + unsigned long exit_qualification; + u16 tss_selector; + int reason; + + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + + reason = (u32)exit_qualification >> 30; + tss_selector = exit_qualification; + + return kvm_task_switch(vcpu, tss_selector, reason); +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -2271,6 +2285,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, [EXIT_REASON_APIC_ACCESS] = handle_apic_access, [EXIT_REASON_WBINVD] = handle_wbinvd, + [EXIT_REASON_TASK_SWITCH] = handle_task_switch, }; static const int kvm_vmx_max_exit_handlers = -- cgit v1.2.3 From 25c5f225beda4fbea878ed8b6203ab4ecc7de2d1 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Fri, 28 Mar 2008 13:18:56 +0800 Subject: KVM: VMX: Enable MSR Bitmap feature MSR Bitmap controls whether the accessing of an MSR causes VM Exit. Eliminating exits on automatically saved and restored MSRs yields a small performance gain. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 60 insertions(+), 7 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index cbca46acfac..87eee7a7f16 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -91,6 +91,7 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs); static struct page *vmx_io_bitmap_a; static struct page *vmx_io_bitmap_b; +static struct page *vmx_msr_bitmap; static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); static DEFINE_SPINLOCK(vmx_vpid_lock); @@ -185,6 +186,11 @@ static inline int is_external_interrupt(u32 intr_info) == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); } +static inline int cpu_has_vmx_msr_bitmap(void) +{ + return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS); +} + static inline int cpu_has_vmx_tpr_shadow(void) { return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW); @@ -1001,6 +1007,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) CPU_BASED_MOV_DR_EXITING | CPU_BASED_USE_TSC_OFFSETING; opt = CPU_BASED_TPR_SHADOW | + CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, &_cpu_based_exec_control) < 0) @@ -1575,6 +1582,30 @@ static void allocate_vpid(struct vcpu_vmx *vmx) spin_unlock(&vmx_vpid_lock); } +void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) +{ + void *va; + + if (!cpu_has_vmx_msr_bitmap()) + return; + + /* + * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals + * have the write-low and read-high bitmap offsets the wrong way round. + * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. + */ + va = kmap(msr_bitmap); + if (msr <= 0x1fff) { + __clear_bit(msr, va + 0x000); /* read-low */ + __clear_bit(msr, va + 0x800); /* write-low */ + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + __clear_bit(msr, va + 0x400); /* read-high */ + __clear_bit(msr, va + 0xc00); /* write-high */ + } + kunmap(msr_bitmap); +} + /* * Sets up the vmcs for emulated real mode. */ @@ -1592,6 +1623,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a)); vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b)); + if (cpu_has_vmx_msr_bitmap()) + vmcs_write64(MSR_BITMAP, page_to_phys(vmx_msr_bitmap)); + vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ /* Control */ @@ -2728,7 +2762,7 @@ static struct kvm_x86_ops vmx_x86_ops = { static int __init vmx_init(void) { - void *iova; + void *va; int r; vmx_io_bitmap_a = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); @@ -2741,30 +2775,48 @@ static int __init vmx_init(void) goto out; } + vmx_msr_bitmap = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + if (!vmx_msr_bitmap) { + r = -ENOMEM; + goto out1; + } + /* * Allow direct access to the PC debug port (it is often used for I/O * delays, but the vmexits simply slow things down). */ - iova = kmap(vmx_io_bitmap_a); - memset(iova, 0xff, PAGE_SIZE); - clear_bit(0x80, iova); + va = kmap(vmx_io_bitmap_a); + memset(va, 0xff, PAGE_SIZE); + clear_bit(0x80, va); kunmap(vmx_io_bitmap_a); - iova = kmap(vmx_io_bitmap_b); - memset(iova, 0xff, PAGE_SIZE); + va = kmap(vmx_io_bitmap_b); + memset(va, 0xff, PAGE_SIZE); kunmap(vmx_io_bitmap_b); + va = kmap(vmx_msr_bitmap); + memset(va, 0xff, PAGE_SIZE); + kunmap(vmx_msr_bitmap); + set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); if (r) - goto out1; + goto out2; + + vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_FS_BASE); + vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_GS_BASE); + vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_CS); + vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); + vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); if (bypass_guest_pf) kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); return 0; +out2: + __free_page(vmx_msr_bitmap); out1: __free_page(vmx_io_bitmap_b); out: @@ -2774,6 +2826,7 @@ out: static void __exit vmx_exit(void) { + __free_page(vmx_msr_bitmap); __free_page(vmx_io_bitmap_b); __free_page(vmx_io_bitmap_a); -- cgit v1.2.3 From 3200f405a1e8e06c8634f11d33614455baa4e6be Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Sat, 29 Mar 2008 20:17:59 -0300 Subject: KVM: MMU: unify slots_lock usage Unify slots_lock acquision around vcpu_run(). This is simpler and less error-prone. Also fix some callsites that were not grabbing the lock properly. [avi: drop slots_lock while in guest mode to avoid holding the lock for indefinite periods] Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 87eee7a7f16..6249810b215 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1505,7 +1505,6 @@ static int init_rmode_tss(struct kvm *kvm) int ret = 0; int r; - down_read(&kvm->slots_lock); r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); if (r < 0) goto out; @@ -1528,7 +1527,6 @@ static int init_rmode_tss(struct kvm *kvm) ret = 1; out: - up_read(&kvm->slots_lock); return ret; } @@ -1730,6 +1728,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) u64 msr; int ret; + down_read(&vcpu->kvm->slots_lock); if (!init_rmode_tss(vmx->vcpu.kvm)) { ret = -ENOMEM; goto out; @@ -1833,9 +1832,10 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vpid_sync_vcpu_all(vmx); - return 0; + ret = 0; out: + up_read(&vcpu->kvm->slots_lock); return ret; } -- cgit v1.2.3 From 2714d1d3d6be882b97cd0125140fccf9976a460a Mon Sep 17 00:00:00 2001 From: "Feng (Eric) Liu" Date: Thu, 10 Apr 2008 15:31:10 -0400 Subject: KVM: Add trace markers Trace markers allow userspace to trace execution of a virtual machine in order to monitor its performance. Signed-off-by: Feng (Eric) Liu Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6249810b215..8e5d6645b90 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1843,6 +1843,8 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) { struct vcpu_vmx *vmx = to_vmx(vcpu); + KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler); + if (vcpu->arch.rmode.active) { vmx->rmode.irq.pending = true; vmx->rmode.irq.vector = irq; @@ -1993,6 +1995,8 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); if (is_page_fault(intr_info)) { cr2 = vmcs_readl(EXIT_QUALIFICATION); + KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, + (u32)((u64)cr2 >> 32), handler); return kvm_mmu_page_fault(vcpu, cr2, error_code); } @@ -2021,6 +2025,7 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { ++vcpu->stat.irq_exits; + KVMTRACE_1D(INTR, vcpu, vmcs_read32(VM_EXIT_INTR_INFO), handler); return 1; } @@ -2078,6 +2083,8 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) reg = (exit_qualification >> 8) & 15; switch ((exit_qualification >> 4) & 3) { case 0: /* mov to cr */ + KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)vcpu->arch.regs[reg], + (u32)((u64)vcpu->arch.regs[reg] >> 32), handler); switch (cr) { case 0: vcpu_load_rsp_rip(vcpu); @@ -2110,6 +2117,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu->arch.cr0 &= ~X86_CR0_TS; vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); vmx_fpu_activate(vcpu); + KVMTRACE_0D(CLTS, vcpu, handler); skip_emulated_instruction(vcpu); return 1; case 1: /*mov from cr*/ @@ -2118,12 +2126,18 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu_load_rsp_rip(vcpu); vcpu->arch.regs[reg] = vcpu->arch.cr3; vcpu_put_rsp_rip(vcpu); + KVMTRACE_3D(CR_READ, vcpu, (u32)cr, + (u32)vcpu->arch.regs[reg], + (u32)((u64)vcpu->arch.regs[reg] >> 32), + handler); skip_emulated_instruction(vcpu); return 1; case 8: vcpu_load_rsp_rip(vcpu); vcpu->arch.regs[reg] = kvm_get_cr8(vcpu); vcpu_put_rsp_rip(vcpu); + KVMTRACE_2D(CR_READ, vcpu, (u32)cr, + (u32)vcpu->arch.regs[reg], handler); skip_emulated_instruction(vcpu); return 1; } @@ -2169,6 +2183,7 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) val = 0; } vcpu->arch.regs[reg] = val; + KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); } else { /* mov to dr */ } @@ -2193,6 +2208,9 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } + KVMTRACE_3D(MSR_READ, vcpu, ecx, (u32)data, (u32)(data >> 32), + handler); + /* FIXME: handling of bits 32:63 of rax, rdx */ vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u; vcpu->arch.regs[VCPU_REGS_RDX] = (data >> 32) & -1u; @@ -2206,6 +2224,9 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); + KVMTRACE_3D(MSR_WRITE, vcpu, ecx, (u32)data, (u32)(data >> 32), + handler); + if (vmx_set_msr(vcpu, ecx, data) != 0) { kvm_inject_gp(vcpu, 0); return 1; @@ -2230,6 +2251,9 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); + + KVMTRACE_0D(PEND_INTR, vcpu, handler); + /* * If the user space waits to inject interrupts, exit as soon as * possible @@ -2272,6 +2296,8 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) exit_qualification = vmcs_read64(EXIT_QUALIFICATION); offset = exit_qualification & 0xffful; + KVMTRACE_1D(APIC_ACCESS, vcpu, (u32)offset, handler); + er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); if (er != EMULATE_DONE) { @@ -2335,6 +2361,9 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); u32 vectoring_info = vmx->idt_vectoring_info; + KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP), + (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit); + if (unlikely(vmx->fail)) { kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; kvm_run->fail_entry.hardware_entry_failure_reason @@ -2416,6 +2445,8 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) return; } + KVMTRACE_1D(REDELIVER_EVT, vcpu, idtv_info_field, handler); + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field); vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); @@ -2601,8 +2632,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) intr_info = vmcs_read32(VM_EXIT_INTR_INFO); /* We need to handle NMIs before interrupts are enabled */ - if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */ + if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */ + KVMTRACE_0D(NMI, vcpu, handler); asm("int $2"); + } } static void vmx_free_vmcs(struct kvm_vcpu *vcpu) -- cgit v1.2.3