From 890ca9aefa78f7831f8f633cab9e4803636dffe4 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 11 May 2009 16:48:15 +0800 Subject: KVM: Add MCE support The related MSRs are emulated. MCE capability is exported via extension KVM_CAP_MCE and ioctl KVM_X86_GET_MCE_CAP_SUPPORTED. A new vcpu ioctl command KVM_X86_SETUP_MCE is used to setup MCE emulation such as the mcg_cap. MCE is injected via vcpu ioctl command KVM_X86_SET_MCE. Extended machine-check state (MCG_EXT_P) and CMCI are not implemented. Signed-off-by: Huang Ying Signed-off-by: Avi Kivity --- include/linux/kvm.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 3db5d8d3748..7b17141c47c 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -415,6 +415,9 @@ struct kvm_trace_rec { #define KVM_CAP_ASSIGN_DEV_IRQ 29 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 +#ifdef __KVM_HAVE_MCE +#define KVM_CAP_MCE 31 +#endif #ifdef KVM_CAP_IRQ_ROUTING @@ -454,6 +457,19 @@ struct kvm_irq_routing { #endif +#ifdef KVM_CAP_MCE +/* x86 MCE */ +struct kvm_x86_mce { + __u64 status; + __u64 addr; + __u64 misc; + __u64 mcg_status; + __u8 bank; + __u8 pad1[7]; + __u64 pad2[3]; +}; +#endif + /* * ioctls for VM fds */ @@ -541,6 +557,10 @@ struct kvm_irq_routing { #define KVM_NMI _IO(KVMIO, 0x9a) /* Available with KVM_CAP_SET_GUEST_DEBUG */ #define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug) +/* MCE for x86 */ +#define KVM_X86_SETUP_MCE _IOW(KVMIO, 0x9c, __u64) +#define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO, 0x9d, __u64) +#define KVM_X86_SET_MCE _IOW(KVMIO, 0x9e, struct kvm_x86_mce) /* * Deprecated interfaces -- cgit v1.2.3 From 721eecbf4fe995ca94a9edec0c9843b1cc0eaaf3 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Wed, 20 May 2009 10:30:49 -0400 Subject: KVM: irqfd KVM provides a complete virtual system environment for guests, including support for injecting interrupts modeled after the real exception/interrupt facilities present on the native platform (such as the IDT on x86). Virtual interrupts can come from a variety of sources (emulated devices, pass-through devices, etc) but all must be injected to the guest via the KVM infrastructure. This patch adds a new mechanism to inject a specific interrupt to a guest using a decoupled eventfd mechnanism: Any legal signal on the irqfd (using eventfd semantics from either userspace or kernel) will translate into an injected interrupt in the guest at the next available interrupt window. Signed-off-by: Gregory Haskins Signed-off-by: Avi Kivity --- include/linux/kvm.h | 11 +++++++++++ include/linux/kvm_host.h | 24 ++++++++++++++++++++++++ 2 files changed, 35 insertions(+) (limited to 'include') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 7b17141c47c..8f53f24e527 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -418,6 +418,7 @@ struct kvm_trace_rec { #ifdef __KVM_HAVE_MCE #define KVM_CAP_MCE 31 #endif +#define KVM_CAP_IRQFD 32 #ifdef KVM_CAP_IRQ_ROUTING @@ -470,6 +471,15 @@ struct kvm_x86_mce { }; #endif +#define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) + +struct kvm_irqfd { + __u32 fd; + __u32 gsi; + __u32 flags; + __u8 pad[20]; +}; + /* * ioctls for VM fds */ @@ -514,6 +524,7 @@ struct kvm_x86_mce { #define KVM_ASSIGN_SET_MSIX_ENTRY \ _IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry) #define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) +#define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) /* * ioctls for vcpu fds diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3060bdc35ff..7724dcb6ff7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -136,6 +136,12 @@ struct kvm { struct list_head vm_list; struct kvm_io_bus mmio_bus; struct kvm_io_bus pio_bus; +#ifdef CONFIG_HAVE_KVM_EVENTFD + struct { + spinlock_t lock; + struct list_head items; + } irqfds; +#endif struct kvm_vm_stat stat; struct kvm_arch arch; atomic_t users_count; @@ -525,4 +531,22 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {} #endif +#ifdef CONFIG_HAVE_KVM_EVENTFD + +void kvm_irqfd_init(struct kvm *kvm); +int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags); +void kvm_irqfd_release(struct kvm *kvm); + +#else + +static inline void kvm_irqfd_init(struct kvm *kvm) {} +static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) +{ + return -EINVAL; +} + +static inline void kvm_irqfd_release(struct kvm *kvm) {} + +#endif /* CONFIG_HAVE_KVM_EVENTFD */ + #endif -- cgit v1.2.3 From c5ff41ce66382d657a76bc06ba252d848826950f Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 14 May 2009 22:42:53 +0200 Subject: KVM: Allow PIT emulation without speaker port The in-kernel speaker emulation is only a dummy and also unneeded from the performance point of view. Rather, it takes user space support to generate sound output on the host, e.g. console beeps. To allow this, introduce KVM_CREATE_PIT2 which controls in-kernel speaker port emulation via a flag passed along the new IOCTL. It also leaves room for future extensions of the PIT configuration interface. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- include/linux/kvm.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 8f53f24e527..65df6f5e2b9 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -70,6 +70,14 @@ struct kvm_irqchip { } chip; }; +/* for KVM_CREATE_PIT2 */ +struct kvm_pit_config { + __u32 flags; + __u32 pad[15]; +}; + +#define KVM_PIT_SPEAKER_DUMMY 1 + #define KVM_EXIT_UNKNOWN 0 #define KVM_EXIT_EXCEPTION 1 #define KVM_EXIT_IO 2 @@ -419,6 +427,9 @@ struct kvm_trace_rec { #define KVM_CAP_MCE 31 #endif #define KVM_CAP_IRQFD 32 +#ifdef __KVM_HAVE_PIT +#define KVM_CAP_PIT2 33 +#endif #ifdef KVM_CAP_IRQ_ROUTING @@ -525,6 +536,7 @@ struct kvm_irqfd { _IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry) #define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) #define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) +#define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) /* * ioctls for vcpu fds -- cgit v1.2.3 From e7333391403b31feb27a05bc0dcd052a471f1276 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 21 May 2009 13:50:13 +0800 Subject: KVM: Downsize max support MSI-X entry to 256 We only trap one page for MSI-X entry now, so it's 4k/(128/8) = 256 entries at most. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- include/linux/kvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 65df6f5e2b9..632a8560dbc 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -676,7 +676,7 @@ struct kvm_assigned_msix_nr { __u16 padding; }; -#define KVM_MAX_MSIX_PER_DEV 512 +#define KVM_MAX_MSIX_PER_DEV 256 struct kvm_assigned_msix_entry { __u32 assigned_dev_id; __u32 gsi; -- cgit v1.2.3 From b188d2d365702cfc660a56d66f4bf77ebf14a5c2 Mon Sep 17 00:00:00 2001 From: Christian Ehrhardt Date: Fri, 29 May 2009 12:58:50 +0200 Subject: KVM: remove redundant declarations Changing s390 code in kvm_arch_vcpu_load/put come across this header declarations. They are complete duplicates, not even useful forward declarations as nothing using it is in between (maybe it was that in the past). This patch removes the two dispensable lines. Signed-off-by: Christian Ehrhardt Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7724dcb6ff7..19240feefe6 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -249,8 +249,6 @@ long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); -void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); -void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); int kvm_dev_ioctl_check_extension(long ext); -- cgit v1.2.3 From 60eead79ad8750f80384cbe48fc44edcc78a0305 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 4 Jun 2009 15:08:23 -0300 Subject: KVM: introduce irq_lock, use it to protect ioapic Introduce irq_lock, and use to protect ioapic data structures. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 19240feefe6..0c71688b1ee 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -124,7 +124,6 @@ struct kvm_kernel_irq_routing_entry { }; struct kvm { - struct mutex lock; /* protects the vcpus array and APIC accesses */ spinlock_t mmu_lock; spinlock_t requests_lock; struct rw_semaphore slots_lock; @@ -134,6 +133,7 @@ struct kvm { KVM_PRIVATE_MEM_SLOTS]; struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; struct list_head vm_list; + struct mutex lock; struct kvm_io_bus mmio_bus; struct kvm_io_bus pio_bus; #ifdef CONFIG_HAVE_KVM_EVENTFD @@ -150,6 +150,7 @@ struct kvm { struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; #endif + struct mutex irq_lock; #ifdef CONFIG_HAVE_KVM_IRQCHIP struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */ struct hlist_head mask_notifier_list; -- cgit v1.2.3 From fa40a8214bb9bcae8d49c234c19d8b4a6c1f37ff Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 4 Jun 2009 15:08:24 -0300 Subject: KVM: switch irq injection/acking data structures to irq_lock Protect irq injection/acking data structures with a separate irq_lock mutex. This fixes the following deadlock: CPU A CPU B kvm_vm_ioctl_deassign_dev_irq() mutex_lock(&kvm->lock); worker_thread() -> kvm_deassign_irq() -> kvm_assigned_dev_interrupt_work_handler() -> deassign_host_irq() mutex_lock(&kvm->lock); -> cancel_work_sync() [blocked] [gleb: fix ia64 path] Reported-by: Alex Williamson Signed-off-by: Marcelo Tosatti Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 0c71688b1ee..a29ea030dd8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -371,7 +371,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); -void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian); +void kvm_unregister_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian); int kvm_request_irq_source_id(struct kvm *kvm); void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); -- cgit v1.2.3 From 6a4a98397331723dce25a7537270548d91523431 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 9 Jun 2009 11:33:36 +0300 Subject: KVM: Reorder ioctls in kvm.h Somehow the VM ioctls got unsorted; resort. Signed-off-by: Avi Kivity --- include/linux/kvm.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 632a8560dbc..f1dada0519e 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -495,11 +495,6 @@ struct kvm_irqfd { * ioctls for VM fds */ #define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) -#define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) -#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) -#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\ - struct kvm_userspace_memory_region) -#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) /* * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns * a vcpu fd. @@ -507,6 +502,11 @@ struct kvm_irqfd { #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) #define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) +#define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) +#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) +#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\ + struct kvm_userspace_memory_region) +#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) /* Device model IOC */ #define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) #define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) -- cgit v1.2.3 From c5af89b68abb26eea5e745f33228f4d672f115e5 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 9 Jun 2009 15:56:26 +0300 Subject: KVM: Introduce kvm_vcpu_is_bsp() function. Use it instead of open code "vcpu_id zero is BSP" assumption. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a29ea030dd8..a5bd429e9bd 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -131,6 +131,7 @@ struct kvm { int nmemslots; struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS]; + struct kvm_vcpu *bsp_vcpu; struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; struct list_head vm_list; struct mutex lock; @@ -549,4 +550,8 @@ static inline void kvm_irqfd_release(struct kvm *kvm) {} #endif /* CONFIG_HAVE_KVM_EVENTFD */ +static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) +{ + return vcpu->kvm->bsp_vcpu == vcpu; +} #endif -- cgit v1.2.3 From 73880c80aa9c8dc353cd0ad26579023213cd5314 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 9 Jun 2009 15:56:28 +0300 Subject: KVM: Break dependency between vcpu index in vcpus array and vcpu_id. Archs are free to use vcpu_id as they see fit. For x86 it is used as vcpu's apic id. New ioctl is added to configure boot vcpu id that was assumed to be 0 till now. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- include/linux/kvm.h | 2 ++ include/linux/kvm_host.h | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index f1dada0519e..5037e170a70 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -430,6 +430,7 @@ struct kvm_trace_rec { #ifdef __KVM_HAVE_PIT #define KVM_CAP_PIT2 33 #endif +#define KVM_CAP_SET_BOOT_CPU_ID 34 #ifdef KVM_CAP_IRQ_ROUTING @@ -537,6 +538,7 @@ struct kvm_irqfd { #define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) #define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) #define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) +#define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78) /* * ioctls for vcpu fds diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a5bd429e9bd..d3fdf1a738c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -131,8 +131,12 @@ struct kvm { int nmemslots; struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS]; +#ifdef CONFIG_KVM_APIC_ARCHITECTURE + u32 bsp_vcpu_id; struct kvm_vcpu *bsp_vcpu; +#endif struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; + atomic_t online_vcpus; struct list_head vm_list; struct mutex lock; struct kvm_io_bus mmio_bus; @@ -550,8 +554,10 @@ static inline void kvm_irqfd_release(struct kvm *kvm) {} #endif /* CONFIG_HAVE_KVM_EVENTFD */ +#ifdef CONFIG_KVM_APIC_ARCHITECTURE static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) { return vcpu->kvm->bsp_vcpu == vcpu; } #endif +#endif -- cgit v1.2.3 From 988a2cae6a3c0dea6df59808a935a9a697bfc28c Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 9 Jun 2009 15:56:29 +0300 Subject: KVM: Use macro to iterate over vcpus. [christian: remove unused variables on s390] Signed-off-by: Gleb Natapov Signed-off-by: Christian Borntraeger Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d3fdf1a738c..c6e4d02067f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -179,6 +179,17 @@ struct kvm { #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt) #define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt) +static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) +{ + smp_rmb(); + return kvm->vcpus[i]; +} + +#define kvm_for_each_vcpu(idx, vcpup, kvm) \ + for (idx = 0, vcpup = kvm_get_vcpu(kvm, idx); \ + idx < atomic_read(&kvm->online_vcpus) && vcpup; \ + vcpup = kvm_get_vcpu(kvm, ++idx)) + int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id); void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From 3f5d18a96577fd78277e08c467041573b9a65eaf Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 11 Jun 2009 15:43:28 +0300 Subject: KVM: Return to userspace on emulation failure Instead of mindlessly retrying to execute the instruction, report the failure to userspace. Signed-off-by: Avi Kivity --- include/linux/kvm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 5037e170a70..671051829da 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -95,6 +95,10 @@ struct kvm_pit_config { #define KVM_EXIT_S390_RESET 14 #define KVM_EXIT_DCR 15 #define KVM_EXIT_NMI 16 +#define KVM_EXIT_INTERNAL_ERROR 17 + +/* For KVM_EXIT_INTERNAL_ERROR */ +#define KVM_INTERNAL_ERROR_EMULATION 1 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { @@ -181,6 +185,9 @@ struct kvm_run { __u32 data; __u8 is_write; } dcr; + struct { + __u32 suberror; + } internal; /* Fix the size of the union. */ char padding[256]; }; -- cgit v1.2.3 From 54dee9933e8d93589ad63ec3d6be39f1921b0767 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 11 Jun 2009 12:07:44 -0300 Subject: KVM: VMX: conditionally disable 2M pages Disable usage of 2M pages if VMX_EPT_2MB_PAGE_BIT (bit 16) is clear in MSR_IA32_VMX_EPT_VPID_CAP and EPT is enabled. [avi: s/largepages_disabled/largepages_enabled/ to avoid negative logic] Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c6e4d02067f..6988858dc56 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -224,6 +224,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, struct kvm_memory_slot old, int user_alloc); +void kvm_disable_largepages(void); void kvm_arch_flush_shadow(struct kvm *kvm); gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); -- cgit v1.2.3 From 229456fc34b1c9031b04f7581e7b755d1cebfe9c Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 17 Jun 2009 09:22:14 -0300 Subject: KVM: convert custom marker based tracing to event traces This allows use of the powerful ftrace infrastructure. See Documentation/trace/ for usage information. [avi, stephen: various build fixes] [sheng: fix control register breakage] Signed-off-by: Marcelo Tosatti Signed-off-by: Stephen Rothwell Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- include/trace/events/kvm.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 include/trace/events/kvm.h (limited to 'include') diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h new file mode 100644 index 00000000000..d74b23d803f --- /dev/null +++ b/include/trace/events/kvm.h @@ -0,0 +1,57 @@ +#if !defined(_TRACE_KVM_MAIN_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVM_MAIN_H + +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm +#define TRACE_INCLUDE_FILE kvm + +#if defined(__KVM_HAVE_IOAPIC) +TRACE_EVENT(kvm_set_irq, + TP_PROTO(unsigned int gsi), + TP_ARGS(gsi), + + TP_STRUCT__entry( + __field( unsigned int, gsi ) + ), + + TP_fast_assign( + __entry->gsi = gsi; + ), + + TP_printk("gsi %u", __entry->gsi) +); + + +#define kvm_irqchips \ + {KVM_IRQCHIP_PIC_MASTER, "PIC master"}, \ + {KVM_IRQCHIP_PIC_SLAVE, "PIC slave"}, \ + {KVM_IRQCHIP_IOAPIC, "IOAPIC"} + +TRACE_EVENT(kvm_ack_irq, + TP_PROTO(unsigned int irqchip, unsigned int pin), + TP_ARGS(irqchip, pin), + + TP_STRUCT__entry( + __field( unsigned int, irqchip ) + __field( unsigned int, pin ) + ), + + TP_fast_assign( + __entry->irqchip = irqchip; + __entry->pin = pin; + ), + + TP_printk("irqchip %s pin %u", + __print_symbolic(__entry->irqchip, kvm_irqchips), + __entry->pin) +); + + + +#endif /* defined(__KVM_HAVE_IOAPIC) */ +#endif /* _TRACE_KVM_MAIN_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From ec04b2604c3707a46db1d26d98f82b11d0844669 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 19 Jun 2009 15:16:23 +0200 Subject: KVM: Prepare memslot data structures for multiple hugepage sizes [avi: fix build on non-x86] Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6988858dc56..06af936a250 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -103,7 +103,7 @@ struct kvm_memory_slot { struct { unsigned long rmap_pde; int write_count; - } *lpage_info; + } *lpage_info[KVM_NR_PAGE_SIZES - 1]; unsigned long userspace_addr; int user_alloc; }; -- cgit v1.2.3 From 2023a29cbe34139afcea8f65f8aef78c325c5dc0 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 18 Jun 2009 11:47:28 -0300 Subject: KVM: remove old KVMTRACE support code Return EOPNOTSUPP for KVM_TRACE_ENABLE/PAUSE/DISABLE ioctls. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- include/linux/kvm.h | 31 +------------------------------ include/linux/kvm_host.h | 31 ------------------------------- 2 files changed, 1 insertion(+), 61 deletions(-) (limited to 'include') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 671051829da..76c640834ea 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -14,7 +14,7 @@ #define KVM_API_VERSION 12 -/* for KVM_TRACE_ENABLE */ +/* for KVM_TRACE_ENABLE, deprecated */ struct kvm_user_trace_setup { __u32 buf_size; /* sub_buffer size of each per-cpu */ __u32 buf_nr; /* the number of sub_buffers of each per-cpu */ @@ -325,35 +325,6 @@ struct kvm_guest_debug { #define KVM_TRC_CYCLE_SIZE 8 #define KVM_TRC_EXTRA_MAX 7 -/* This structure represents a single trace buffer record. */ -struct kvm_trace_rec { - /* variable rec_val - * is split into: - * bits 0 - 27 -> event id - * bits 28 -30 -> number of extra data args of size u32 - * bits 31 -> binary indicator for if tsc is in record - */ - __u32 rec_val; - __u32 pid; - __u32 vcpu_id; - union { - struct { - __u64 timestamp; - __u32 extra_u32[KVM_TRC_EXTRA_MAX]; - } __attribute__((packed)) timestamp; - struct { - __u32 extra_u32[KVM_TRC_EXTRA_MAX]; - } notimestamp; - } u; -}; - -#define TRACE_REC_EVENT_ID(val) \ - (0x0fffffff & (val)) -#define TRACE_REC_NUM_DATA_ARGS(val) \ - (0x70000000 & ((val) << 28)) -#define TRACE_REC_TCS(val) \ - (0x80000000 & ((val) << 31)) - #define KVMIO 0xAE /* diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 06af936a250..0604d56f6ee 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -482,37 +482,6 @@ struct kvm_stats_debugfs_item { extern struct kvm_stats_debugfs_item debugfs_entries[]; extern struct dentry *kvm_debugfs_dir; -#define KVMTRACE_5D(evt, vcpu, d1, d2, d3, d4, d5, name) \ - trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ - vcpu, 5, d1, d2, d3, d4, d5) -#define KVMTRACE_4D(evt, vcpu, d1, d2, d3, d4, name) \ - trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ - vcpu, 4, d1, d2, d3, d4, 0) -#define KVMTRACE_3D(evt, vcpu, d1, d2, d3, name) \ - trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ - vcpu, 3, d1, d2, d3, 0, 0) -#define KVMTRACE_2D(evt, vcpu, d1, d2, name) \ - trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ - vcpu, 2, d1, d2, 0, 0, 0) -#define KVMTRACE_1D(evt, vcpu, d1, name) \ - trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ - vcpu, 1, d1, 0, 0, 0, 0) -#define KVMTRACE_0D(evt, vcpu, name) \ - trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ - vcpu, 0, 0, 0, 0, 0, 0) - -#ifdef CONFIG_KVM_TRACE -int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg); -void kvm_trace_cleanup(void); -#else -static inline -int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg) -{ - return -EINVAL; -} -#define kvm_trace_cleanup() ((void)0) -#endif - #ifdef KVM_ARCH_WANT_MMU_NOTIFIER static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_seq) { -- cgit v1.2.3 From d3efc8efdbaa81e9db3089810fcd526fccba234d Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 17 Jun 2009 10:07:59 -0300 Subject: KVM: use vcpu_id instead of bsp_vcpu pointer in kvm_vcpu_is_bsp Change kvm_vcpu_is_bsp to use vcpu_id instead of bsp_vcpu pointer, which is only initialized at the end of kvm_vm_ioctl_create_vcpu. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 0604d56f6ee..4ea42c95053 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -538,7 +538,7 @@ static inline void kvm_irqfd_release(struct kvm *kvm) {} #ifdef CONFIG_KVM_APIC_ARCHITECTURE static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) { - return vcpu->kvm->bsp_vcpu == vcpu; + return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id; } #endif #endif -- cgit v1.2.3 From 6c474694530f377507f9aca438c17206e051e6e7 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 29 Jun 2009 22:24:26 +0300 Subject: KVM: convert bus to slots_lock Use slots_lock to protect device list on the bus. slots_lock is already taken for read everywhere, so we only need to take it for write when registering devices. This is in preparation to removing in_range and kvm->lock around it. Signed-off-by: Michael S. Tsirkin Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4ea42c95053..96c8c0b0192 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -42,6 +42,7 @@ #define KVM_USERSPACE_IRQ_SOURCE_ID 0 +struct kvm; struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; @@ -61,7 +62,9 @@ void kvm_io_bus_init(struct kvm_io_bus *bus); void kvm_io_bus_destroy(struct kvm_io_bus *bus); struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr, int len, int is_write); -void kvm_io_bus_register_dev(struct kvm_io_bus *bus, +void __kvm_io_bus_register_dev(struct kvm_io_bus *bus, + struct kvm_io_device *dev); +void kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus, struct kvm_io_device *dev); struct kvm_vcpu { -- cgit v1.2.3 From bda9020e2463ec94db9f97e8615f3bae22069838 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 29 Jun 2009 22:24:32 +0300 Subject: KVM: remove in_range from io devices This changes bus accesses to use high-level kvm_io_bus_read/kvm_io_bus_write functions. in_range now becomes unused so it is removed from device ops in favor of read/write callbacks performing range checks internally. This allows aliasing (mostly for in-kernel virtio), as well as better error handling by making it possible to pass errors up to userspace. Signed-off-by: Michael S. Tsirkin Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 96c8c0b0192..077e8bb875a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -60,8 +60,10 @@ struct kvm_io_bus { void kvm_io_bus_init(struct kvm_io_bus *bus); void kvm_io_bus_destroy(struct kvm_io_bus *bus); -struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, - gpa_t addr, int len, int is_write); +int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, int len, + const void *val); +int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, + void *val); void __kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev); void kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus, -- cgit v1.2.3 From ae8c1c4025c2b780616586c3f86a3374a154ef90 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 1 Jul 2009 12:09:41 +0300 Subject: KVM: Trace irq level and source id Signed-off-by: Avi Kivity --- include/trace/events/kvm.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index d74b23d803f..035232dc84e 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -9,18 +9,23 @@ #if defined(__KVM_HAVE_IOAPIC) TRACE_EVENT(kvm_set_irq, - TP_PROTO(unsigned int gsi), - TP_ARGS(gsi), + TP_PROTO(unsigned int gsi, int level, int irq_source_id), + TP_ARGS(gsi, level, irq_source_id), TP_STRUCT__entry( __field( unsigned int, gsi ) + __field( int, level ) + __field( int, irq_source_id ) ), TP_fast_assign( __entry->gsi = gsi; + __entry->level = level; + __entry->irq_source_id = irq_source_id; ), - TP_printk("gsi %u", __entry->gsi) + TP_printk("gsi %u level %d source %d", + __entry->gsi, __entry->level, __entry->irq_source_id) ); -- cgit v1.2.3 From aec51dc4f1584018d7e35269e04e3dde3d2033e6 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 1 Jul 2009 16:01:02 +0300 Subject: KVM: Trace mmio Signed-off-by: Avi Kivity --- include/trace/events/kvm.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include') diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 035232dc84e..77022af4849 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -56,6 +56,39 @@ TRACE_EVENT(kvm_ack_irq, #endif /* defined(__KVM_HAVE_IOAPIC) */ + +#define KVM_TRACE_MMIO_READ_UNSATISFIED 0 +#define KVM_TRACE_MMIO_READ 1 +#define KVM_TRACE_MMIO_WRITE 2 + +#define kvm_trace_symbol_mmio \ + { KVM_TRACE_MMIO_READ_UNSATISFIED, "unsatisfied-read" }, \ + { KVM_TRACE_MMIO_READ, "read" }, \ + { KVM_TRACE_MMIO_WRITE, "write" } + +TRACE_EVENT(kvm_mmio, + TP_PROTO(int type, int len, u64 gpa, u64 val), + TP_ARGS(type, len, gpa, val), + + TP_STRUCT__entry( + __field( u32, type ) + __field( u32, len ) + __field( u64, gpa ) + __field( u64, val ) + ), + + TP_fast_assign( + __entry->type = type; + __entry->len = len; + __entry->gpa = gpa; + __entry->val = val; + ), + + TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx", + __print_symbolic(__entry->type, kvm_trace_symbol_mmio), + __entry->len, __entry->gpa, __entry->val) +); + #endif /* _TRACE_KVM_MAIN_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 1000ff8d893765d7b56e32fe16dbe4814f172588 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 7 Jul 2009 16:00:57 +0300 Subject: KVM: Add trace points in irqchip code Add tracepoint in msi/ioapic/pic set_irq() functions, in IPI sending and in the point where IRQ is placed into apic's IRR. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- include/trace/events/kvm.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'include') diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 77022af4849..dbe10845527 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -28,6 +28,62 @@ TRACE_EVENT(kvm_set_irq, __entry->gsi, __entry->level, __entry->irq_source_id) ); +#define kvm_deliver_mode \ + {0x0, "Fixed"}, \ + {0x1, "LowPrio"}, \ + {0x2, "SMI"}, \ + {0x3, "Res3"}, \ + {0x4, "NMI"}, \ + {0x5, "INIT"}, \ + {0x6, "SIPI"}, \ + {0x7, "ExtINT"} + +TRACE_EVENT(kvm_ioapic_set_irq, + TP_PROTO(__u64 e, int pin, bool coalesced), + TP_ARGS(e, pin, coalesced), + + TP_STRUCT__entry( + __field( __u64, e ) + __field( int, pin ) + __field( bool, coalesced ) + ), + + TP_fast_assign( + __entry->e = e; + __entry->pin = pin; + __entry->coalesced = coalesced; + ), + + TP_printk("pin %u dst %x vec=%u (%s|%s|%s%s)%s", + __entry->pin, (u8)(__entry->e >> 56), (u8)__entry->e, + __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), + (__entry->e & (1<<11)) ? "logical" : "physical", + (__entry->e & (1<<15)) ? "level" : "edge", + (__entry->e & (1<<16)) ? "|masked" : "", + __entry->coalesced ? " (coalesced)" : "") +); + +TRACE_EVENT(kvm_msi_set_irq, + TP_PROTO(__u64 address, __u64 data), + TP_ARGS(address, data), + + TP_STRUCT__entry( + __field( __u64, address ) + __field( __u64, data ) + ), + + TP_fast_assign( + __entry->address = address; + __entry->data = data; + ), + + TP_printk("dst %u vec %x (%s|%s|%s%s)", + (u8)(__entry->address >> 12), (u8)__entry->data, + __print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode), + (__entry->address & (1<<2)) ? "logical" : "physical", + (__entry->data & (1<<15)) ? "level" : "edge", + (__entry->address & (1<<3)) ? "|rh" : "") +); #define kvm_irqchips \ {KVM_IRQCHIP_PIC_MASTER, "PIC master"}, \ -- cgit v1.2.3 From e9f4275732add046fed4a548b8dbb98dbe500d2f Mon Sep 17 00:00:00 2001 From: Beth Kon Date: Tue, 7 Jul 2009 11:50:38 -0400 Subject: KVM: PIT support for HPET legacy mode When kvm is in hpet_legacy_mode, the hpet is providing the timer interrupt and the pit should not be. So in legacy mode, the pit timer is destroyed, but the *state* of the pit is maintained. So if kvm or the guest tries to modify the state of the pit, this modification is accepted, *except* that the timer isn't actually started. When we exit hpet_legacy_mode, the current state of the pit (which is up to date since we've been accepting modifications) is used to restart the pit timer. The saved_mode code in kvm_pit_load_count temporarily changes mode to 0xff in order to destroy the timer, but then restores the actual value, again maintaining "current" state of the pit for possible later reenablement. [avi: add some reserved storage in the ioctl; make SET_PIT2 IOW] [marcelo: fix memory corruption due to reserved storage] Signed-off-by: Beth Kon Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- include/linux/kvm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 76c640834ea..a74a1fcc28e 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -409,6 +409,9 @@ struct kvm_guest_debug { #define KVM_CAP_PIT2 33 #endif #define KVM_CAP_SET_BOOT_CPU_ID 34 +#ifdef __KVM_HAVE_PIT_STATE2 +#define KVM_CAP_PIT_STATE2 35 +#endif #ifdef KVM_CAP_IRQ_ROUTING @@ -586,6 +589,9 @@ struct kvm_debug_guest { #define KVM_IA64_VCPU_GET_STACK _IOR(KVMIO, 0x9a, void *) #define KVM_IA64_VCPU_SET_STACK _IOW(KVMIO, 0x9b, void *) +#define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2) +#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) + #define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) #define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) #define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) -- cgit v1.2.3 From 090b7aff27120cdae76a346a70db394844fea598 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Tue, 7 Jul 2009 17:08:44 -0400 Subject: KVM: make io_bus interface more robust Today kvm_io_bus_regsiter_dev() returns void and will internally BUG_ON if it fails. We want to create dynamic MMIO/PIO entries driven from userspace later in the series, so we need to enhance the code to be more robust with the following changes: 1) Add a return value to the registration function 2) Fix up all the callsites to check the return code, handle any failures, and percolate the error up to the caller. 3) Add an unregister function that collapses holes in the array Signed-off-by: Gregory Haskins Acked-by: Michael S. Tsirkin Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 077e8bb875a..983b0bdeb3f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -64,10 +64,14 @@ int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, int len, const void *val); int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val); -void __kvm_io_bus_register_dev(struct kvm_io_bus *bus, +int __kvm_io_bus_register_dev(struct kvm_io_bus *bus, + struct kvm_io_device *dev); +int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus, + struct kvm_io_device *dev); +void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus, + struct kvm_io_device *dev); +void kvm_io_bus_unregister_dev(struct kvm *kvm, struct kvm_io_bus *bus, struct kvm_io_device *dev); -void kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus, - struct kvm_io_device *dev); struct kvm_vcpu { struct kvm *kvm; -- cgit v1.2.3 From d34e6b175e61821026893ec5298cc8e7558df43a Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Tue, 7 Jul 2009 17:08:49 -0400 Subject: KVM: add ioeventfd support ioeventfd is a mechanism to register PIO/MMIO regions to trigger an eventfd signal when written to by a guest. Host userspace can register any arbitrary IO address with a corresponding eventfd and then pass the eventfd to a specific end-point of interest for handling. Normal IO requires a blocking round-trip since the operation may cause side-effects in the emulated model or may return data to the caller. Therefore, an IO in KVM traps from the guest to the host, causes a VMX/SVM "heavy-weight" exit back to userspace, and is ultimately serviced by qemu's device model synchronously before returning control back to the vcpu. However, there is a subclass of IO which acts purely as a trigger for other IO (such as to kick off an out-of-band DMA request, etc). For these patterns, the synchronous call is particularly expensive since we really only want to simply get our notification transmitted asychronously and return as quickly as possible. All the sychronous infrastructure to ensure proper data-dependencies are met in the normal IO case are just unecessary overhead for signalling. This adds additional computational load on the system, as well as latency to the signalling path. Therefore, we provide a mechanism for registration of an in-kernel trigger point that allows the VCPU to only require a very brief, lightweight exit just long enough to signal an eventfd. This also means that any clients compatible with the eventfd interface (which includes userspace and kernelspace equally well) can now register to be notified. The end result should be a more flexible and higher performance notification API for the backend KVM hypervisor and perhipheral components. To test this theory, we built a test-harness called "doorbell". This module has a function called "doorbell_ring()" which simply increments a counter for each time the doorbell is signaled. It supports signalling from either an eventfd, or an ioctl(). We then wired up two paths to the doorbell: One via QEMU via a registered io region and through the doorbell ioctl(). The other is direct via ioeventfd. You can download this test harness here: ftp://ftp.novell.com/dev/ghaskins/doorbell.tar.bz2 The measured results are as follows: qemu-mmio: 110000 iops, 9.09us rtt ioeventfd-mmio: 200100 iops, 5.00us rtt ioeventfd-pio: 367300 iops, 2.72us rtt I didn't measure qemu-pio, because I have to figure out how to register a PIO region with qemu's device model, and I got lazy. However, for now we can extrapolate based on the data from the NULLIO runs of +2.56us for MMIO, and -350ns for HC, we get: qemu-pio: 153139 iops, 6.53us rtt ioeventfd-hc: 412585 iops, 2.37us rtt these are just for fun, for now, until I can gather more data. Here is a graph for your convenience: http://developer.novell.com/wiki/images/7/76/Iofd-chart.png The conclusion to draw is that we save about 4us by skipping the userspace hop. -------------------- Signed-off-by: Gregory Haskins Acked-by: Michael S. Tsirkin Signed-off-by: Avi Kivity --- include/linux/kvm.h | 24 ++++++++++++++++++++++++ include/linux/kvm_host.h | 10 ++++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index a74a1fcc28e..230a91aa61c 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -307,6 +307,28 @@ struct kvm_guest_debug { struct kvm_guest_debug_arch arch; }; +enum { + kvm_ioeventfd_flag_nr_datamatch, + kvm_ioeventfd_flag_nr_pio, + kvm_ioeventfd_flag_nr_deassign, + kvm_ioeventfd_flag_nr_max, +}; + +#define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch) +#define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio) +#define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign) + +#define KVM_IOEVENTFD_VALID_FLAG_MASK ((1 << kvm_ioeventfd_flag_nr_max) - 1) + +struct kvm_ioeventfd { + __u64 datamatch; + __u64 addr; /* legal pio/mmio address */ + __u32 len; /* 1, 2, 4, or 8 bytes */ + __s32 fd; + __u32 flags; + __u8 pad[36]; +}; + #define KVM_TRC_SHIFT 16 /* * kvm trace categories @@ -412,6 +434,7 @@ struct kvm_guest_debug { #ifdef __KVM_HAVE_PIT_STATE2 #define KVM_CAP_PIT_STATE2 35 #endif +#define KVM_CAP_IOEVENTFD 36 #ifdef KVM_CAP_IRQ_ROUTING @@ -520,6 +543,7 @@ struct kvm_irqfd { #define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) #define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) #define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78) +#define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd) /* * ioctls for vcpu fds diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 983b0bdeb3f..6ec9fc56a49 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -155,6 +155,7 @@ struct kvm { spinlock_t lock; struct list_head items; } irqfds; + struct list_head ioeventfds; #endif struct kvm_vm_stat stat; struct kvm_arch arch; @@ -528,19 +529,24 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {} #ifdef CONFIG_HAVE_KVM_EVENTFD -void kvm_irqfd_init(struct kvm *kvm); +void kvm_eventfd_init(struct kvm *kvm); int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags); void kvm_irqfd_release(struct kvm *kvm); +int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); #else -static inline void kvm_irqfd_init(struct kvm *kvm) {} +static inline void kvm_eventfd_init(struct kvm *kvm) {} static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) { return -EINVAL; } static inline void kvm_irqfd_release(struct kvm *kvm) {} +static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +{ + return -ENOSYS; +} #endif /* CONFIG_HAVE_KVM_EVENTFD */ -- cgit v1.2.3 From 0b71785dc05f1f66e6268022b9953c0d6a9985c6 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 9 Jul 2009 15:33:53 +0300 Subject: KVM: Move kvm_cpu_get_interrupt() declaration to x86 code It is implemented only by x86. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6ec9fc56a49..2c6a5f00874 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -341,7 +341,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm); void kvm_free_all_assigned_devices(struct kvm *kvm); void kvm_arch_sync_events(struct kvm *kvm); -int kvm_cpu_get_interrupt(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *v); int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From a1b37100d9e29c1f8dc3e2f5490a205c80180e01 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 9 Jul 2009 15:33:52 +0300 Subject: KVM: Reduce runnability interface with arch support code Remove kvm_cpu_has_interrupt() and kvm_arch_interrupt_allowed() from interface between general code and arch code. kvm_arch_vcpu_runnable() checks for interrupts instead. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2c6a5f00874..4af56036a6b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -332,7 +332,6 @@ int kvm_arch_hardware_setup(void); void kvm_arch_hardware_unsetup(void); void kvm_arch_check_processor_compat(void *rtn); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); -int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); void kvm_free_physmem(struct kvm *kvm); @@ -341,7 +340,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm); void kvm_free_all_assigned_devices(struct kvm *kvm); void kvm_arch_sync_events(struct kvm *kvm); -int kvm_cpu_has_interrupt(struct kvm_vcpu *v); int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From b927a3cec081a605142f5b7e90b730611bee28b1 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 21 Jul 2009 10:42:48 +0800 Subject: KVM: VMX: Introduce KVM_SET_IDENTITY_MAP_ADDR ioctl Now KVM allow guest to modify guest's physical address of EPT's identity mapping page. (change from v1, discard unnecessary check, change ioctl to accept parameter address rather than value) Signed-off-by: Sheng Yang Signed-off-by: Marcelo Tosatti --- include/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 230a91aa61c..f8f8900fc5e 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -435,6 +435,7 @@ struct kvm_ioeventfd { #define KVM_CAP_PIT_STATE2 35 #endif #define KVM_CAP_IOEVENTFD 36 +#define KVM_CAP_SET_IDENTITY_MAP_ADDR 37 #ifdef KVM_CAP_IRQ_ROUTING @@ -512,6 +513,7 @@ struct kvm_irqfd { #define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\ struct kvm_userspace_memory_region) #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) +#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) /* Device model IOC */ #define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) #define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) -- cgit v1.2.3 From 07708c4af1346ab1521b26a202f438366b7bcffd Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 3 Aug 2009 18:43:28 +0200 Subject: KVM: x86: Disallow hypercalls for guest callers in rings > 0 So far unprivileged guest callers running in ring 3 can issue, e.g., MMU hypercalls. Normally, such callers cannot provide any hand-crafted MMU command structure as it has to be passed by its physical address, but they can still crash the guest kernel by passing random addresses. To close the hole, this patch considers hypercalls valid only if issued from guest ring 0. This may still be relaxed on a per-hypercall base in the future once required. Cc: stable@kernel.org Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- include/linux/kvm_para.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h index 3ddce03766c..d73109243fd 100644 --- a/include/linux/kvm_para.h +++ b/include/linux/kvm_para.h @@ -13,6 +13,7 @@ #define KVM_ENOSYS 1000 #define KVM_EFAULT EFAULT #define KVM_E2BIG E2BIG +#define KVM_EPERM EPERM #define KVM_HC_VAPIC_POLL_IRQ 1 #define KVM_HC_MMU_OP 2 -- cgit v1.2.3 From da18acffc3f13c4f187c20bc5dcfcb110b374b48 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 10 Aug 2009 15:59:25 +0300 Subject: KVM: export kvm_para.h kvm_para.h contains userspace interface and so should be exported. Signed-off-by: Michael S. Tsirkin Signed-off-by: Avi Kivity --- include/asm-generic/Kbuild.asm | 5 +++++ include/linux/Kbuild | 4 ++++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm index 290910e4ede..96d7c9804dc 100644 --- a/include/asm-generic/Kbuild.asm +++ b/include/asm-generic/Kbuild.asm @@ -3,6 +3,11 @@ ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm.h \ header-y += kvm.h endif +ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm_para.h \ + $(srctree)/include/asm-$(SRCARCH)/kvm_para.h),) +header-y += kvm_para.h +endif + ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/a.out.h \ $(srctree)/include/asm-$(SRCARCH)/a.out.h),) unifdef-y += a.out.h diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 334a3593cdf..cff4a101f26 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -268,6 +268,10 @@ ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm.h \ $(srctree)/include/asm-$(SRCARCH)/kvm.h),) unifdef-y += kvm.h endif +ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm_para.h \ + $(srctree)/include/asm-$(SRCARCH)/kvm_para.h),) +unifdef-y += kvm_para.h +endif unifdef-y += llc.h unifdef-y += loop.h unifdef-y += lp.h -- cgit v1.2.3