From f0b85051d0f0891378a83db22c0786f1d756fbff Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 25 Nov 2008 20:17:01 +0100
Subject: KVM: SVM: Clean up VINTR setting

The current VINTR intercept setters don't look clean to me. To make
the code easier to read and enable the possibilty to trap on a VINTR
set, this uses a helper function to set the VINTR intercept.

v2 uses two distinct functions for setting and clearing the bit

Acked-by: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/svm.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a9e769e4e25..33407d95761 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -718,6 +718,16 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 	to_svm(vcpu)->vmcb->save.rflags = rflags;
 }
 
+static void svm_set_vintr(struct vcpu_svm *svm)
+{
+	svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR;
+}
+
+static void svm_clear_vintr(struct vcpu_svm *svm)
+{
+	svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
+}
+
 static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
 {
 	struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
@@ -1380,7 +1390,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
 {
 	KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler);
 
-	svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
+	svm_clear_vintr(svm);
 	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
 	/*
 	 * If the user space waits to inject interrupts, exit as soon as
@@ -1593,7 +1603,7 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
 	    (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
 	    (vmcb->control.event_inj & SVM_EVTINJ_VALID)) {
 		/* unable to deliver irq, set pending irq */
-		vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR);
+		svm_set_vintr(svm);
 		svm_inject_irq(svm, 0x0);
 		goto out;
 	}
@@ -1652,9 +1662,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 	 */
 	if (!svm->vcpu.arch.interrupt_window_open &&
 	    (svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window))
-		control->intercept |= 1ULL << INTERCEPT_VINTR;
-	 else
-		control->intercept &= ~(1ULL << INTERCEPT_VINTR);
+		svm_set_vintr(svm);
+	else
+		svm_clear_vintr(svm);
 }
 
 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
-- 
cgit v1.2.3


From 9962d032bbff0268f22068787831405f8468c8b4 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 25 Nov 2008 20:17:02 +0100
Subject: KVM: SVM: Move EFER and MSR constants to generic x86 code

MSR_EFER_SVME_MASK, MSR_VM_CR and MSR_VM_HSAVE_PA are set in KVM
specific headers. Linux does have nice header files to collect
EFER bits and MSR IDs, so IMHO we should put them there.

While at it, I also changed the naming scheme to match that
of the other defines.

(introduced in v6)

Acked-by: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h  | 1 +
 arch/x86/include/asm/msr-index.h | 7 +++++++
 arch/x86/include/asm/svm.h       | 4 ----
 arch/x86/include/asm/virtext.h   | 2 +-
 arch/x86/kvm/svm.c               | 6 +++---
 5 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 730843d1d2f..2998efe8927 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -22,6 +22,7 @@
 #include <asm/pvclock-abi.h>
 #include <asm/desc.h>
 #include <asm/mtrr.h>
+#include <asm/msr-index.h>
 
 #define KVM_MAX_VCPUS 16
 #define KVM_MEMORY_SLOTS 32
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 358acc59ae0..46e9646e7a6 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -18,11 +18,13 @@
 #define _EFER_LME		8  /* Long mode enable */
 #define _EFER_LMA		10 /* Long mode active (read-only) */
 #define _EFER_NX		11 /* No execute enable */
+#define _EFER_SVME		12 /* Enable virtualization */
 
 #define EFER_SCE		(1<<_EFER_SCE)
 #define EFER_LME		(1<<_EFER_LME)
 #define EFER_LMA		(1<<_EFER_LMA)
 #define EFER_NX			(1<<_EFER_NX)
+#define EFER_SVME		(1<<_EFER_SVME)
 
 /* Intel MSRs. Some also available on other CPUs */
 #define MSR_IA32_PERFCTR0		0x000000c1
@@ -360,4 +362,9 @@
 #define MSR_IA32_VMX_PROCBASED_CTLS2    0x0000048b
 #define MSR_IA32_VMX_EPT_VPID_CAP       0x0000048c
 
+/* AMD-V MSRs */
+
+#define MSR_VM_CR                       0xc0010114
+#define MSR_VM_HSAVE_PA                 0xc0010117
+
 #endif /* _ASM_X86_MSR_INDEX_H */
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 1b8afa78e86..82ada75f3eb 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -174,10 +174,6 @@ struct __attribute__ ((__packed__)) vmcb {
 #define SVM_CPUID_FEATURE_SHIFT 2
 #define SVM_CPUID_FUNC 0x8000000a
 
-#define MSR_EFER_SVME_MASK (1ULL << 12)
-#define MSR_VM_CR       0xc0010114
-#define MSR_VM_HSAVE_PA 0xc0010117ULL
-
 #define SVM_VM_CR_SVM_DISABLE 4
 
 #define SVM_SELECTOR_S_SHIFT 4
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 59363627523..e0f9aa16358 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -118,7 +118,7 @@ static inline void cpu_svm_disable(void)
 
 	wrmsrl(MSR_VM_HSAVE_PA, 0);
 	rdmsrl(MSR_EFER, efer);
-	wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
+	wrmsrl(MSR_EFER, efer & ~EFER_SVME);
 }
 
 /** Makes sure SVM is disabled, if it is supported on the CPU
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 33407d95761..e4eb3fd91b9 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -198,7 +198,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 	if (!npt_enabled && !(efer & EFER_LMA))
 		efer &= ~EFER_LME;
 
-	to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
+	to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
 	vcpu->arch.shadow_efer = efer;
 }
 
@@ -292,7 +292,7 @@ static void svm_hardware_enable(void *garbage)
 	svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
 
 	rdmsrl(MSR_EFER, efer);
-	wrmsrl(MSR_EFER, efer | MSR_EFER_SVME_MASK);
+	wrmsrl(MSR_EFER, efer | EFER_SVME);
 
 	wrmsrl(MSR_VM_HSAVE_PA,
 	       page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
@@ -559,7 +559,7 @@ static void init_vmcb(struct vcpu_svm *svm)
 	init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
 	init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
 
-	save->efer = MSR_EFER_SVME_MASK;
+	save->efer = EFER_SVME;
 	save->dr6 = 0xffff0ff0;
 	save->dr7 = 0x400;
 	save->rflags = 2;
-- 
cgit v1.2.3


From c0725420cfdcf6dd9705b164a8c6cba86684080d Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 25 Nov 2008 20:17:03 +0100
Subject: KVM: SVM: Add helper functions for nested SVM

These are helpers for the nested SVM implementation.

- nsvm_printk implements a debug printk variant
- nested_svm_do calls a handler that can accesses gpa-based memory

v3 makes use of the new permission checker
v6 changes:
- streamline nsvm_debug()
- remove printk(KERN_ERR)
- SVME check before CPL check
- give GP error code
- use new EFER constant

Acked-by: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/svm.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e4eb3fd91b9..87debdcd1b9 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -50,6 +50,15 @@ MODULE_LICENSE("GPL");
 
 #define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
 
+/* Turn on to get debugging output*/
+/* #define NESTED_DEBUG */
+
+#ifdef NESTED_DEBUG
+#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
+#else
+#define nsvm_printk(fmt, args...) do {} while(0)
+#endif
+
 /* enable NPT for AMD64 and X86 with PAE */
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
 static bool npt_enabled = true;
@@ -1149,6 +1158,82 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	return 1;
 }
 
+static int nested_svm_check_permissions(struct vcpu_svm *svm)
+{
+	if (!(svm->vcpu.arch.shadow_efer & EFER_SVME)
+	    || !is_paging(&svm->vcpu)) {
+		kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+		return 1;
+	}
+
+	if (svm->vmcb->save.cpl) {
+		kvm_inject_gp(&svm->vcpu, 0);
+		return 1;
+	}
+
+       return 0;
+}
+
+static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa)
+{
+	struct page *page;
+
+	down_read(&current->mm->mmap_sem);
+	page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
+	up_read(&current->mm->mmap_sem);
+
+	if (is_error_page(page)) {
+		printk(KERN_INFO "%s: could not find page at 0x%llx\n",
+		       __func__, gpa);
+		kvm_release_page_clean(page);
+		kvm_inject_gp(&svm->vcpu, 0);
+		return NULL;
+	}
+	return page;
+}
+
+static int nested_svm_do(struct vcpu_svm *svm,
+			 u64 arg1_gpa, u64 arg2_gpa, void *opaque,
+			 int (*handler)(struct vcpu_svm *svm,
+					void *arg1,
+					void *arg2,
+					void *opaque))
+{
+	struct page *arg1_page;
+	struct page *arg2_page = NULL;
+	void *arg1;
+	void *arg2 = NULL;
+	int retval;
+
+	arg1_page = nested_svm_get_page(svm, arg1_gpa);
+	if(arg1_page == NULL)
+		return 1;
+
+	if (arg2_gpa) {
+		arg2_page = nested_svm_get_page(svm, arg2_gpa);
+		if(arg2_page == NULL) {
+			kvm_release_page_clean(arg1_page);
+			return 1;
+		}
+	}
+
+	arg1 = kmap_atomic(arg1_page, KM_USER0);
+	if (arg2_gpa)
+		arg2 = kmap_atomic(arg2_page, KM_USER1);
+
+	retval = handler(svm, arg1, arg2, opaque);
+
+	kunmap_atomic(arg1, KM_USER0);
+	if (arg2_gpa)
+		kunmap_atomic(arg2, KM_USER1);
+
+	kvm_release_page_dirty(arg1_page);
+	if (arg2_gpa)
+		kvm_release_page_dirty(arg2_page);
+
+	return retval;
+}
+
 static int invalid_op_interception(struct vcpu_svm *svm,
 				   struct kvm_run *kvm_run)
 {
-- 
cgit v1.2.3


From 1371d90460189d02bf1bcca19dbfe6bd10dc6031 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 25 Nov 2008 20:17:04 +0100
Subject: KVM: SVM: Implement GIF, clgi and stgi

This patch implements the GIF flag and the clgi and stgi instructions that
set this flag. Only if the flag is set (default), interrupts can be received by
the CPU.

To keep the information about that somewhere, this patch adds a new hidden
flags vector. that is used to store information that does not go into the
vmcb, but is SVM specific.

I tried to write some code to make -no-kvm-irqchip work too, but the first
level guest won't even boot with that atm, so I ditched it.

v2 moves the hflags to x86 generic code
v3 makes use of the new permission helper
v6 only enables interrupt_window if GIF=1

Acked-by: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  3 +++
 arch/x86/kvm/svm.c              | 47 ++++++++++++++++++++++++++++++++++++-----
 2 files changed, 45 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2998efe8927..29e4157732d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -259,6 +259,7 @@ struct kvm_vcpu_arch {
 	unsigned long cr3;
 	unsigned long cr4;
 	unsigned long cr8;
+	u32 hflags;
 	u64 pdptrs[4]; /* pae */
 	u64 shadow_efer;
 	u64 apic_base;
@@ -738,6 +739,8 @@ enum {
 	TASK_SWITCH_GATE = 3,
 };
 
+#define HF_GIF_MASK		(1 << 0)
+
 /*
  * Hardware virtualization extension instructions may fault if a
  * reboot turns off virtualization while processes are running.
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 87debdcd1b9..79cc06bfe57 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -251,7 +251,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 	kvm_rip_write(vcpu, svm->next_rip);
 	svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
 
-	vcpu->arch.interrupt_window_open = 1;
+	vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK);
 }
 
 static int has_svm(void)
@@ -600,6 +600,8 @@ static void init_vmcb(struct vcpu_svm *svm)
 		save->cr4 = 0;
 	}
 	force_new_asid(&svm->vcpu);
+
+	svm->vcpu.arch.hflags = HF_GIF_MASK;
 }
 
 static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -1234,6 +1236,36 @@ static int nested_svm_do(struct vcpu_svm *svm,
 	return retval;
 }
 
+static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+	if (nested_svm_check_permissions(svm))
+		return 1;
+
+	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+	skip_emulated_instruction(&svm->vcpu);
+
+	svm->vcpu.arch.hflags |= HF_GIF_MASK;
+
+	return 1;
+}
+
+static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+	if (nested_svm_check_permissions(svm))
+		return 1;
+
+	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+	skip_emulated_instruction(&svm->vcpu);
+
+	svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
+
+	/* After a CLGI no interrupts should come */
+	svm_clear_vintr(svm);
+	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
+
+	return 1;
+}
+
 static int invalid_op_interception(struct vcpu_svm *svm,
 				   struct kvm_run *kvm_run)
 {
@@ -1535,8 +1567,8 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
 	[SVM_EXIT_VMMCALL]			= vmmcall_interception,
 	[SVM_EXIT_VMLOAD]			= invalid_op_interception,
 	[SVM_EXIT_VMSAVE]			= invalid_op_interception,
-	[SVM_EXIT_STGI]				= invalid_op_interception,
-	[SVM_EXIT_CLGI]				= invalid_op_interception,
+	[SVM_EXIT_STGI]				= stgi_interception,
+	[SVM_EXIT_CLGI]				= clgi_interception,
 	[SVM_EXIT_SKINIT]			= invalid_op_interception,
 	[SVM_EXIT_WBINVD]                       = emulate_on_interception,
 	[SVM_EXIT_MONITOR]			= invalid_op_interception,
@@ -1684,6 +1716,9 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
 	if (!kvm_cpu_has_interrupt(vcpu))
 		goto out;
 
+	if (!(svm->vcpu.arch.hflags & HF_GIF_MASK))
+		goto out;
+
 	if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
 	    (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
 	    (vmcb->control.event_inj & SVM_EVTINJ_VALID)) {
@@ -1710,7 +1745,8 @@ static void kvm_reput_irq(struct vcpu_svm *svm)
 	}
 
 	svm->vcpu.arch.interrupt_window_open =
-		!(control->int_state & SVM_INTERRUPT_SHADOW_MASK);
+		!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
+		 (svm->vcpu.arch.hflags & HF_GIF_MASK);
 }
 
 static void svm_do_inject_vector(struct vcpu_svm *svm)
@@ -1734,7 +1770,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 
 	svm->vcpu.arch.interrupt_window_open =
 		(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
-		 (svm->vmcb->save.rflags & X86_EFLAGS_IF));
+		 (svm->vmcb->save.rflags & X86_EFLAGS_IF) &&
+		 (svm->vcpu.arch.hflags & HF_GIF_MASK));
 
 	if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary)
 		/*
-- 
cgit v1.2.3


From b286d5d8b0836e76832dafcc5a18b0e8e5a3bc5e Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 25 Nov 2008 20:17:05 +0100
Subject: KVM: SVM: Implement hsave

Implement the hsave MSR, that gives the VCPU a GPA to save the
old guest state in.

v2 allows userspace to save/restore hsave
v4 dummys out the hsave MSR, so we use a host page
v6 remembers the guest's hsave and exports the MSR

Acked-by: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/kvm_svm.h |  2 ++
 arch/x86/kvm/svm.c     | 13 +++++++++++++
 arch/x86/kvm/x86.c     |  2 +-
 3 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
index 8e5ee99551f..a0877cac7b9 100644
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -41,6 +41,8 @@ struct vcpu_svm {
 	unsigned long host_dr7;
 
 	u32 *msrpm;
+	struct vmcb *hsave;
+	u64 hsave_msr;
 };
 
 #endif
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 79cc06bfe57..59aaff1c959 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -626,6 +626,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 	struct vcpu_svm *svm;
 	struct page *page;
 	struct page *msrpm_pages;
+	struct page *hsave_page;
 	int err;
 
 	svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
@@ -651,6 +652,11 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 	svm->msrpm = page_address(msrpm_pages);
 	svm_vcpu_init_msrpm(svm->msrpm);
 
+	hsave_page = alloc_page(GFP_KERNEL);
+	if (!hsave_page)
+		goto uninit;
+	svm->hsave = page_address(hsave_page);
+
 	svm->vmcb = page_address(page);
 	clear_page(svm->vmcb);
 	svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
@@ -680,6 +686,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 
 	__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
 	__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
+	__free_page(virt_to_page(svm->hsave));
 	kvm_vcpu_uninit(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, svm);
 }
@@ -1377,6 +1384,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
 	case MSR_IA32_LASTINTTOIP:
 		*data = svm->vmcb->save.last_excp_to;
 		break;
+	case MSR_VM_HSAVE_PA:
+		*data = svm->hsave_msr;
+		break;
 	default:
 		return kvm_get_msr_common(vcpu, ecx, data);
 	}
@@ -1470,6 +1480,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
 		 */
 		pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data);
 
+		break;
+	case MSR_VM_HSAVE_PA:
+		svm->hsave_msr = data;
 		break;
 	default:
 		return kvm_set_msr_common(vcpu, ecx, data);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 758b7a155ae..99165a961f0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -456,7 +456,7 @@ static u32 msrs_to_save[] = {
 	MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
 #endif
 	MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
-	MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT
+	MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
 };
 
 static unsigned num_msrs_to_save;
-- 
cgit v1.2.3


From 5542675baa7e62ca4d18278c8758b6a4ec410639 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 25 Nov 2008 20:17:06 +0100
Subject: KVM: SVM: Add VMLOAD and VMSAVE handlers

This implements the VMLOAD and VMSAVE instructions, that usually surround
the VMRUN instructions. Both instructions load / restore the same elements,
so we only need to implement them once.

v2 fixes CPL checking and replaces memcpy by assignments
v3 makes use of the new permission checking

Acked-by: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/svm.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 58 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 59aaff1c959..a83c94eb577 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1243,6 +1243,62 @@ static int nested_svm_do(struct vcpu_svm *svm,
 	return retval;
 }
 
+static int nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
+{
+	to_vmcb->save.fs = from_vmcb->save.fs;
+	to_vmcb->save.gs = from_vmcb->save.gs;
+	to_vmcb->save.tr = from_vmcb->save.tr;
+	to_vmcb->save.ldtr = from_vmcb->save.ldtr;
+	to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
+	to_vmcb->save.star = from_vmcb->save.star;
+	to_vmcb->save.lstar = from_vmcb->save.lstar;
+	to_vmcb->save.cstar = from_vmcb->save.cstar;
+	to_vmcb->save.sfmask = from_vmcb->save.sfmask;
+	to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
+	to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
+	to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
+
+	return 1;
+}
+
+static int nested_svm_vmload(struct vcpu_svm *svm, void *nested_vmcb,
+			     void *arg2, void *opaque)
+{
+	return nested_svm_vmloadsave((struct vmcb *)nested_vmcb, svm->vmcb);
+}
+
+static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
+			     void *arg2, void *opaque)
+{
+	return nested_svm_vmloadsave(svm->vmcb, (struct vmcb *)nested_vmcb);
+}
+
+static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+	if (nested_svm_check_permissions(svm))
+		return 1;
+
+	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+	skip_emulated_instruction(&svm->vcpu);
+
+	nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmload);
+
+	return 1;
+}
+
+static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+	if (nested_svm_check_permissions(svm))
+		return 1;
+
+	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+	skip_emulated_instruction(&svm->vcpu);
+
+	nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmsave);
+
+	return 1;
+}
+
 static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	if (nested_svm_check_permissions(svm))
@@ -1578,8 +1634,8 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
 	[SVM_EXIT_SHUTDOWN]			= shutdown_interception,
 	[SVM_EXIT_VMRUN]			= invalid_op_interception,
 	[SVM_EXIT_VMMCALL]			= vmmcall_interception,
-	[SVM_EXIT_VMLOAD]			= invalid_op_interception,
-	[SVM_EXIT_VMSAVE]			= invalid_op_interception,
+	[SVM_EXIT_VMLOAD]			= vmload_interception,
+	[SVM_EXIT_VMSAVE]			= vmsave_interception,
 	[SVM_EXIT_STGI]				= stgi_interception,
 	[SVM_EXIT_CLGI]				= clgi_interception,
 	[SVM_EXIT_SKINIT]			= invalid_op_interception,
-- 
cgit v1.2.3


From 3d6368ef580a4dff012960834bba4e28d3c1430c Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 25 Nov 2008 20:17:07 +0100
Subject: KVM: SVM: Add VMRUN handler

This patch implements VMRUN. VMRUN enters a virtual CPU and runs that
in the same context as the normal guest CPU would run.
So basically it is implemented the same way, a normal CPU would do it.

We also prepare all intercepts that get OR'ed with the original
intercepts, as we do not allow a level 2 guest to be intercepted less
than the first level guest.

v2 implements the following improvements:

- fixes the CPL check
- does not allocate iopm when not used
- remembers the host's IF in the HIF bit in the hflags

v3:

- make use of the new permission checking
- add support for V_INTR_MASKING_MASK

v4:

- use host page backed hsave

v5:

- remove IOPM merging code

v6:

- save cr4 so PAE l1 guests work

v7:

- return 0 on vmrun so we check the MSRs too
- fix MSR check to use the correct variable

Acked-by: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |   2 +
 arch/x86/kvm/kvm_svm.h          |   8 ++
 arch/x86/kvm/svm.c              | 157 +++++++++++++++++++++++++++++++++++++++-
 3 files changed, 165 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 29e4157732d..53779309514 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -740,6 +740,8 @@ enum {
 };
 
 #define HF_GIF_MASK		(1 << 0)
+#define HF_HIF_MASK		(1 << 1)
+#define HF_VINTR_MASK		(1 << 2)
 
 /*
  * Hardware virtualization extension instructions may fault if a
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
index a0877cac7b9..91673413d8f 100644
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -43,6 +43,14 @@ struct vcpu_svm {
 	u32 *msrpm;
 	struct vmcb *hsave;
 	u64 hsave_msr;
+
+	u64 nested_vmcb;
+
+	/* These are the merged vectors */
+	u32 *nested_msrpm;
+
+	/* gpa pointers to the real vectors */
+	u64 nested_vmcb_msrpm;
 };
 
 #endif
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a83c94eb577..fad187cbfab 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -77,6 +77,11 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
 	return container_of(vcpu, struct vcpu_svm, vcpu);
 }
 
+static inline bool is_nested(struct vcpu_svm *svm)
+{
+	return svm->nested_vmcb;
+}
+
 static unsigned long iopm_base;
 
 struct kvm_ldttss_desc {
@@ -601,6 +606,7 @@ static void init_vmcb(struct vcpu_svm *svm)
 	}
 	force_new_asid(&svm->vcpu);
 
+	svm->nested_vmcb = 0;
 	svm->vcpu.arch.hflags = HF_GIF_MASK;
 }
 
@@ -627,6 +633,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 	struct page *page;
 	struct page *msrpm_pages;
 	struct page *hsave_page;
+	struct page *nested_msrpm_pages;
 	int err;
 
 	svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
@@ -649,6 +656,11 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 	msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
 	if (!msrpm_pages)
 		goto uninit;
+
+	nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
+	if (!nested_msrpm_pages)
+		goto uninit;
+
 	svm->msrpm = page_address(msrpm_pages);
 	svm_vcpu_init_msrpm(svm->msrpm);
 
@@ -657,6 +669,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 		goto uninit;
 	svm->hsave = page_address(hsave_page);
 
+	svm->nested_msrpm = page_address(nested_msrpm_pages);
+
 	svm->vmcb = page_address(page);
 	clear_page(svm->vmcb);
 	svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
@@ -687,6 +701,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 	__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
 	__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
 	__free_page(virt_to_page(svm->hsave));
+	__free_pages(virt_to_page(svm->nested_msrpm), MSRPM_ALLOC_ORDER);
 	kvm_vcpu_uninit(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, svm);
 }
@@ -1243,6 +1258,123 @@ static int nested_svm_do(struct vcpu_svm *svm,
 	return retval;
 }
 
+
+static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1,
+				  void *arg2, void *opaque)
+{
+	int i;
+	u32 *nested_msrpm = (u32*)arg1;
+	for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++)
+		svm->nested_msrpm[i] = svm->msrpm[i] | nested_msrpm[i];
+	svm->vmcb->control.msrpm_base_pa = __pa(svm->nested_msrpm);
+
+	return 0;
+}
+
+static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1,
+			    void *arg2, void *opaque)
+{
+	struct vmcb *nested_vmcb = (struct vmcb *)arg1;
+	struct vmcb *hsave = svm->hsave;
+
+	/* nested_vmcb is our indicator if nested SVM is activated */
+	svm->nested_vmcb = svm->vmcb->save.rax;
+
+	/* Clear internal status */
+	svm->vcpu.arch.exception.pending = false;
+
+	/* Save the old vmcb, so we don't need to pick what we save, but
+	   can restore everything when a VMEXIT occurs */
+	memcpy(hsave, svm->vmcb, sizeof(struct vmcb));
+	/* We need to remember the original CR3 in the SPT case */
+	if (!npt_enabled)
+		hsave->save.cr3 = svm->vcpu.arch.cr3;
+	hsave->save.cr4 = svm->vcpu.arch.cr4;
+	hsave->save.rip = svm->next_rip;
+
+	if (svm->vmcb->save.rflags & X86_EFLAGS_IF)
+		svm->vcpu.arch.hflags |= HF_HIF_MASK;
+	else
+		svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
+
+	/* Load the nested guest state */
+	svm->vmcb->save.es = nested_vmcb->save.es;
+	svm->vmcb->save.cs = nested_vmcb->save.cs;
+	svm->vmcb->save.ss = nested_vmcb->save.ss;
+	svm->vmcb->save.ds = nested_vmcb->save.ds;
+	svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
+	svm->vmcb->save.idtr = nested_vmcb->save.idtr;
+	svm->vmcb->save.rflags = nested_vmcb->save.rflags;
+	svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
+	svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
+	svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
+	if (npt_enabled) {
+		svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
+		svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
+	} else {
+		kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
+		kvm_mmu_reset_context(&svm->vcpu);
+	}
+	svm->vmcb->save.cr2 = nested_vmcb->save.cr2;
+	kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
+	kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
+	kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
+	/* In case we don't even reach vcpu_run, the fields are not updated */
+	svm->vmcb->save.rax = nested_vmcb->save.rax;
+	svm->vmcb->save.rsp = nested_vmcb->save.rsp;
+	svm->vmcb->save.rip = nested_vmcb->save.rip;
+	svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
+	svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
+	svm->vmcb->save.cpl = nested_vmcb->save.cpl;
+
+	/* We don't want a nested guest to be more powerful than the guest,
+	   so all intercepts are ORed */
+	svm->vmcb->control.intercept_cr_read |=
+		nested_vmcb->control.intercept_cr_read;
+	svm->vmcb->control.intercept_cr_write |=
+		nested_vmcb->control.intercept_cr_write;
+	svm->vmcb->control.intercept_dr_read |=
+		nested_vmcb->control.intercept_dr_read;
+	svm->vmcb->control.intercept_dr_write |=
+		nested_vmcb->control.intercept_dr_write;
+	svm->vmcb->control.intercept_exceptions |=
+		nested_vmcb->control.intercept_exceptions;
+
+	svm->vmcb->control.intercept |= nested_vmcb->control.intercept;
+
+	svm->nested_vmcb_msrpm = nested_vmcb->control.msrpm_base_pa;
+
+	force_new_asid(&svm->vcpu);
+	svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info;
+	svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err;
+	svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
+	if (nested_vmcb->control.int_ctl & V_IRQ_MASK) {
+		nsvm_printk("nSVM Injecting Interrupt: 0x%x\n",
+				nested_vmcb->control.int_ctl);
+	}
+	if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
+		svm->vcpu.arch.hflags |= HF_VINTR_MASK;
+	else
+		svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
+
+	nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n",
+			nested_vmcb->control.exit_int_info,
+			nested_vmcb->control.int_state);
+
+	svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
+	svm->vmcb->control.int_state = nested_vmcb->control.int_state;
+	svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
+	if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID)
+		nsvm_printk("Injecting Event: 0x%x\n",
+				nested_vmcb->control.event_inj);
+	svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
+	svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
+
+	svm->vcpu.arch.hflags |= HF_GIF_MASK;
+
+	return 0;
+}
+
 static int nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
 {
 	to_vmcb->save.fs = from_vmcb->save.fs;
@@ -1299,6 +1431,26 @@ static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	return 1;
 }
 
+static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+	nsvm_printk("VMrun\n");
+	if (nested_svm_check_permissions(svm))
+		return 1;
+
+	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+	skip_emulated_instruction(&svm->vcpu);
+
+	if (nested_svm_do(svm, svm->vmcb->save.rax, 0,
+			  NULL, nested_svm_vmrun))
+		return 1;
+
+	if (nested_svm_do(svm, svm->nested_vmcb_msrpm, 0,
+		      NULL, nested_svm_vmrun_msrpm))
+		return 1;
+
+	return 1;
+}
+
 static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	if (nested_svm_check_permissions(svm))
@@ -1632,7 +1784,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
 	[SVM_EXIT_MSR]				= msr_interception,
 	[SVM_EXIT_TASK_SWITCH]			= task_switch_interception,
 	[SVM_EXIT_SHUTDOWN]			= shutdown_interception,
-	[SVM_EXIT_VMRUN]			= invalid_op_interception,
+	[SVM_EXIT_VMRUN]			= vmrun_interception,
 	[SVM_EXIT_VMMCALL]			= vmmcall_interception,
 	[SVM_EXIT_VMLOAD]			= vmload_interception,
 	[SVM_EXIT_VMSAVE]			= vmsave_interception,
@@ -1939,7 +2091,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	svm->host_cr2 = kvm_read_cr2();
 	svm->host_dr6 = read_dr6();
 	svm->host_dr7 = read_dr7();
-	svm->vmcb->save.cr2 = vcpu->arch.cr2;
+	if (!is_nested(svm))
+		svm->vmcb->save.cr2 = vcpu->arch.cr2;
 	/* required for live migration with NPT */
 	if (npt_enabled)
 		svm->vmcb->save.cr3 = vcpu->arch.cr3;
-- 
cgit v1.2.3


From cf74a78b229d07f77416d2fe1f029f183a8a31df Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 25 Nov 2008 20:17:08 +0100
Subject: KVM: SVM: Add VMEXIT handler and intercepts

This adds the #VMEXIT intercept, so we return to the level 1 guest
when something happens in the level 2 guest that should return to
the level 1 guest.

v2 implements HIF handling and cleans up exception interception
v3 adds support for V_INTR_MASKING_MASK
v4 uses the host page hsave
v5 removes IOPM merging code
v6 moves mmu code out of the atomic section

Acked-by: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/svm.c | 293 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 293 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index fad187cbfab..4cb2920b152 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -72,6 +72,13 @@ module_param(npt, int, S_IRUGO);
 static void kvm_reput_irq(struct vcpu_svm *svm);
 static void svm_flush_tlb(struct kvm_vcpu *vcpu);
 
+static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override);
+static int nested_svm_vmexit(struct vcpu_svm *svm);
+static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
+			     void *arg2, void *opaque);
+static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
+				      bool has_error_code, u32 error_code);
+
 static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
 {
 	return container_of(vcpu, struct vcpu_svm, vcpu);
@@ -221,6 +228,11 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
+	/* If we are within a nested VM we'd better #VMEXIT and let the
+	   guest handle the exception */
+	if (nested_svm_check_exception(svm, nr, has_error_code, error_code))
+		return;
+
 	svm->vmcb->control.event_inj = nr
 		| SVM_EVTINJ_VALID
 		| (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
@@ -1198,6 +1210,46 @@ static int nested_svm_check_permissions(struct vcpu_svm *svm)
        return 0;
 }
 
+static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
+				      bool has_error_code, u32 error_code)
+{
+	if (is_nested(svm)) {
+		svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
+		svm->vmcb->control.exit_code_hi = 0;
+		svm->vmcb->control.exit_info_1 = error_code;
+		svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
+		if (nested_svm_exit_handled(svm, false)) {
+			nsvm_printk("VMexit -> EXCP 0x%x\n", nr);
+
+			nested_svm_vmexit(svm);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static inline int nested_svm_intr(struct vcpu_svm *svm)
+{
+	if (is_nested(svm)) {
+		if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
+			return 0;
+
+		if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
+			return 0;
+
+		svm->vmcb->control.exit_code = SVM_EXIT_INTR;
+
+		if (nested_svm_exit_handled(svm, false)) {
+			nsvm_printk("VMexit -> INTR\n");
+			nested_svm_vmexit(svm);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
 static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa)
 {
 	struct page *page;
@@ -1258,6 +1310,228 @@ static int nested_svm_do(struct vcpu_svm *svm,
 	return retval;
 }
 
+static int nested_svm_exit_handled_real(struct vcpu_svm *svm,
+					void *arg1,
+					void *arg2,
+					void *opaque)
+{
+	struct vmcb *nested_vmcb = (struct vmcb *)arg1;
+	bool kvm_overrides = *(bool *)opaque;
+	u32 exit_code = svm->vmcb->control.exit_code;
+
+	if (kvm_overrides) {
+		switch (exit_code) {
+		case SVM_EXIT_INTR:
+		case SVM_EXIT_NMI:
+			return 0;
+		/* For now we are always handling NPFs when using them */
+		case SVM_EXIT_NPF:
+			if (npt_enabled)
+				return 0;
+			break;
+		/* When we're shadowing, trap PFs */
+		case SVM_EXIT_EXCP_BASE + PF_VECTOR:
+			if (!npt_enabled)
+				return 0;
+			break;
+		default:
+			break;
+		}
+	}
+
+	switch (exit_code) {
+	case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: {
+		u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0);
+		if (nested_vmcb->control.intercept_cr_read & cr_bits)
+			return 1;
+		break;
+	}
+	case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: {
+		u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0);
+		if (nested_vmcb->control.intercept_cr_write & cr_bits)
+			return 1;
+		break;
+	}
+	case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: {
+		u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0);
+		if (nested_vmcb->control.intercept_dr_read & dr_bits)
+			return 1;
+		break;
+	}
+	case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: {
+		u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0);
+		if (nested_vmcb->control.intercept_dr_write & dr_bits)
+			return 1;
+		break;
+	}
+	case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
+		u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
+		if (nested_vmcb->control.intercept_exceptions & excp_bits)
+			return 1;
+		break;
+	}
+	default: {
+		u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
+		nsvm_printk("exit code: 0x%x\n", exit_code);
+		if (nested_vmcb->control.intercept & exit_bits)
+			return 1;
+	}
+	}
+
+	return 0;
+}
+
+static int nested_svm_exit_handled_msr(struct vcpu_svm *svm,
+				       void *arg1, void *arg2,
+				       void *opaque)
+{
+	struct vmcb *nested_vmcb = (struct vmcb *)arg1;
+	u8 *msrpm = (u8 *)arg2;
+        u32 t0, t1;
+	u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
+	u32 param = svm->vmcb->control.exit_info_1 & 1;
+
+	if (!(nested_vmcb->control.intercept & (1ULL << INTERCEPT_MSR_PROT)))
+		return 0;
+
+	switch(msr) {
+	case 0 ... 0x1fff:
+		t0 = (msr * 2) % 8;
+		t1 = msr / 8;
+		break;
+	case 0xc0000000 ... 0xc0001fff:
+		t0 = (8192 + msr - 0xc0000000) * 2;
+		t1 = (t0 / 8);
+		t0 %= 8;
+		break;
+	case 0xc0010000 ... 0xc0011fff:
+		t0 = (16384 + msr - 0xc0010000) * 2;
+		t1 = (t0 / 8);
+		t0 %= 8;
+		break;
+	default:
+		return 1;
+		break;
+	}
+	if (msrpm[t1] & ((1 << param) << t0))
+		return 1;
+
+	return 0;
+}
+
+static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override)
+{
+	bool k = kvm_override;
+
+	switch (svm->vmcb->control.exit_code) {
+	case SVM_EXIT_MSR:
+		return nested_svm_do(svm, svm->nested_vmcb,
+				     svm->nested_vmcb_msrpm, NULL,
+				     nested_svm_exit_handled_msr);
+	default: break;
+	}
+
+	return nested_svm_do(svm, svm->nested_vmcb, 0, &k,
+			     nested_svm_exit_handled_real);
+}
+
+static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1,
+				  void *arg2, void *opaque)
+{
+	struct vmcb *nested_vmcb = (struct vmcb *)arg1;
+	struct vmcb *hsave = svm->hsave;
+	u64 nested_save[] = { nested_vmcb->save.cr0,
+			      nested_vmcb->save.cr3,
+			      nested_vmcb->save.cr4,
+			      nested_vmcb->save.efer,
+			      nested_vmcb->control.intercept_cr_read,
+			      nested_vmcb->control.intercept_cr_write,
+			      nested_vmcb->control.intercept_dr_read,
+			      nested_vmcb->control.intercept_dr_write,
+			      nested_vmcb->control.intercept_exceptions,
+			      nested_vmcb->control.intercept,
+			      nested_vmcb->control.msrpm_base_pa,
+			      nested_vmcb->control.iopm_base_pa,
+			      nested_vmcb->control.tsc_offset };
+
+	/* Give the current vmcb to the guest */
+	memcpy(nested_vmcb, svm->vmcb, sizeof(struct vmcb));
+	nested_vmcb->save.cr0 = nested_save[0];
+	if (!npt_enabled)
+		nested_vmcb->save.cr3 = nested_save[1];
+	nested_vmcb->save.cr4 = nested_save[2];
+	nested_vmcb->save.efer = nested_save[3];
+	nested_vmcb->control.intercept_cr_read = nested_save[4];
+	nested_vmcb->control.intercept_cr_write = nested_save[5];
+	nested_vmcb->control.intercept_dr_read = nested_save[6];
+	nested_vmcb->control.intercept_dr_write = nested_save[7];
+	nested_vmcb->control.intercept_exceptions = nested_save[8];
+	nested_vmcb->control.intercept = nested_save[9];
+	nested_vmcb->control.msrpm_base_pa = nested_save[10];
+	nested_vmcb->control.iopm_base_pa = nested_save[11];
+	nested_vmcb->control.tsc_offset = nested_save[12];
+
+	/* We always set V_INTR_MASKING and remember the old value in hflags */
+	if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
+		nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+
+	if ((nested_vmcb->control.int_ctl & V_IRQ_MASK) &&
+	    (nested_vmcb->control.int_vector)) {
+		nsvm_printk("WARNING: IRQ 0x%x still enabled on #VMEXIT\n",
+				nested_vmcb->control.int_vector);
+	}
+
+	/* Restore the original control entries */
+	svm->vmcb->control = hsave->control;
+
+	/* Kill any pending exceptions */
+	if (svm->vcpu.arch.exception.pending == true)
+		nsvm_printk("WARNING: Pending Exception\n");
+	svm->vcpu.arch.exception.pending = false;
+
+	/* Restore selected save entries */
+	svm->vmcb->save.es = hsave->save.es;
+	svm->vmcb->save.cs = hsave->save.cs;
+	svm->vmcb->save.ss = hsave->save.ss;
+	svm->vmcb->save.ds = hsave->save.ds;
+	svm->vmcb->save.gdtr = hsave->save.gdtr;
+	svm->vmcb->save.idtr = hsave->save.idtr;
+	svm->vmcb->save.rflags = hsave->save.rflags;
+	svm_set_efer(&svm->vcpu, hsave->save.efer);
+	svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
+	svm_set_cr4(&svm->vcpu, hsave->save.cr4);
+	if (npt_enabled) {
+		svm->vmcb->save.cr3 = hsave->save.cr3;
+		svm->vcpu.arch.cr3 = hsave->save.cr3;
+	} else {
+		kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
+	}
+	kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
+	kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
+	kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
+	svm->vmcb->save.dr7 = 0;
+	svm->vmcb->save.cpl = 0;
+	svm->vmcb->control.exit_int_info = 0;
+
+	svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
+	/* Exit nested SVM mode */
+	svm->nested_vmcb = 0;
+
+	return 0;
+}
+
+static int nested_svm_vmexit(struct vcpu_svm *svm)
+{
+	nsvm_printk("VMexit\n");
+	if (nested_svm_do(svm, svm->nested_vmcb, 0,
+			  NULL, nested_svm_vmexit_real))
+		return 1;
+
+	kvm_mmu_reset_context(&svm->vcpu);
+	kvm_mmu_load(&svm->vcpu);
+
+	return 0;
+}
 
 static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1,
 				  void *arg2, void *opaque)
@@ -1805,6 +2079,17 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip,
 		    (u32)((u64)svm->vmcb->save.rip >> 32), entryexit);
 
+	if (is_nested(svm)) {
+		nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
+			    exit_code, svm->vmcb->control.exit_info_1,
+			    svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
+		if (nested_svm_exit_handled(svm, true)) {
+			nested_svm_vmexit(svm);
+			nsvm_printk("-> #VMEXIT\n");
+			return 1;
+		}
+	}
+
 	if (npt_enabled) {
 		int mmu_reload = 0;
 		if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
@@ -1892,6 +2177,8 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
+	nested_svm_intr(svm);
+
 	svm_inject_irq(svm, irq);
 }
 
@@ -1937,6 +2224,9 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
 	if (!kvm_cpu_has_interrupt(vcpu))
 		goto out;
 
+	if (nested_svm_intr(svm))
+		goto out;
+
 	if (!(svm->vcpu.arch.hflags & HF_GIF_MASK))
 		goto out;
 
@@ -1989,6 +2279,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct vmcb_control_area *control = &svm->vmcb->control;
 
+	if (nested_svm_intr(svm))
+		return;
+
 	svm->vcpu.arch.interrupt_window_open =
 		(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
 		 (svm->vmcb->save.rflags & X86_EFLAGS_IF) &&
-- 
cgit v1.2.3


From eb6f302edff9da963e687bf6c15dcf88843b1c3b Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 25 Nov 2008 20:17:09 +0100
Subject: KVM: SVM: Allow read access to MSR_VM_VR

KVM tries to read the VM_CR MSR to find out if SVM was disabled by
the BIOS. So implement read support for this MSR to make nested
SVM running.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/svm.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 4cb2920b152..df5b4119266 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1869,6 +1869,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
 	case MSR_VM_HSAVE_PA:
 		*data = svm->hsave_msr;
 		break;
+	case MSR_VM_CR:
+		*data = 0;
+		break;
 	default:
 		return kvm_get_msr_common(vcpu, ecx, data);
 	}
-- 
cgit v1.2.3


From 236de05553a7ef8f6940de8686ae9bf1272cd2cf Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 25 Nov 2008 20:17:10 +0100
Subject: KVM: SVM: Allow setting the SVME bit

Normally setting the SVME bit in EFER is not allowed, as we did
not support SVM. Not since we do, we should also allow enabling
SVM mode.

v2 comes as last patch, so we don't enable half-ready code
v4 introduces a module option to enable SVM
v6 warns that nesting is enabled

Acked-by: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/svm.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index df5b4119266..0fbbde54eca 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -69,6 +69,9 @@ static int npt = 1;
 
 module_param(npt, int, S_IRUGO);
 
+static int nested = 0;
+module_param(nested, int, S_IRUGO);
+
 static void kvm_reput_irq(struct vcpu_svm *svm);
 static void svm_flush_tlb(struct kvm_vcpu *vcpu);
 
@@ -443,6 +446,11 @@ static __init int svm_hardware_setup(void)
 	if (boot_cpu_has(X86_FEATURE_NX))
 		kvm_enable_efer_bits(EFER_NX);
 
+	if (nested) {
+		printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
+		kvm_enable_efer_bits(EFER_SVME);
+	}
+
 	for_each_online_cpu(cpu) {
 		r = svm_cpu_init(cpu);
 		if (r)
-- 
cgit v1.2.3


From d80174745ba3938bc6abb8f95ed670ac0631a182 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 25 Nov 2008 20:17:11 +0100
Subject: KVM: SVM: Only allow setting of EFER_SVME when CPUID SVM is set

Userspace has to tell the kernel module somehow that nested SVM should be used.
The easiest way that doesn't break anything I could think of is to implement

if (cpuid & svm)
    allow write to efer
else
    deny write to efer

Old userspaces mask the SVM capability bit, so they don't break.
In order to find out that the SVM capability is set, I had to split the
kvm_emulate_cpuid into a finding and an emulating part.

(introduced in v6)

Acked-by: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 58 +++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 42 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 99165a961f0..b5e9932e0f6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -69,6 +69,8 @@ static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
 
 static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 				    struct kvm_cpuid_entry2 __user *entries);
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
+					      u32 function, u32 index);
 
 struct kvm_x86_ops *kvm_x86_ops;
 EXPORT_SYMBOL_GPL(kvm_x86_ops);
@@ -173,6 +175,7 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
 			   u32 error_code)
 {
 	++vcpu->stat.pf_guest;
+
 	if (vcpu->arch.exception.pending) {
 		if (vcpu->arch.exception.nr == PF_VECTOR) {
 			printk(KERN_DEBUG "kvm: inject_page_fault:"
@@ -442,6 +445,11 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_get_cr8);
 
+static inline u32 bit(int bitno)
+{
+	return 1 << (bitno & 31);
+}
+
 /*
  * List of msr numbers which we expose to userspace through KVM_GET_MSRS
  * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
@@ -481,6 +489,17 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
 		return;
 	}
 
+	if (efer & EFER_SVME) {
+		struct kvm_cpuid_entry2 *feat;
+
+		feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+		if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
+			printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n");
+			kvm_inject_gp(vcpu, 0);
+			return;
+		}
+	}
+
 	kvm_x86_ops->set_efer(vcpu, efer);
 
 	efer &= ~EFER_LMA;
@@ -1181,11 +1200,6 @@ out:
 	return r;
 }
 
-static inline u32 bit(int bitno)
-{
-	return 1 << (bitno & 31);
-}
-
 static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 			  u32 index)
 {
@@ -1228,7 +1242,8 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	const u32 kvm_supported_word3_x86_features =
 		bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16);
 	const u32 kvm_supported_word6_x86_features =
-		bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY);
+		bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) |
+		bit(X86_FEATURE_SVM);
 
 	/* all func 2 cpuid_count() should be called on the same cpu */
 	get_cpu();
@@ -2832,20 +2847,15 @@ static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
 	return 1;
 }
 
-void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
+					      u32 function, u32 index)
 {
 	int i;
-	u32 function, index;
-	struct kvm_cpuid_entry2 *e, *best;
+	struct kvm_cpuid_entry2 *best = NULL;
 
-	function = kvm_register_read(vcpu, VCPU_REGS_RAX);
-	index = kvm_register_read(vcpu, VCPU_REGS_RCX);
-	kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
-	kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
-	kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
-	kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
-	best = NULL;
 	for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
+		struct kvm_cpuid_entry2 *e;
+
 		e = &vcpu->arch.cpuid_entries[i];
 		if (is_matching_cpuid_entry(e, function, index)) {
 			if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
@@ -2860,6 +2870,22 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 			if (!best || e->function > best->function)
 				best = e;
 	}
+
+	return best;
+}
+
+void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
+{
+	u32 function, index;
+	struct kvm_cpuid_entry2 *best;
+
+	function = kvm_register_read(vcpu, VCPU_REGS_RAX);
+	index = kvm_register_read(vcpu, VCPU_REGS_RCX);
+	kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
+	kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
+	kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
+	kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
+	best = kvm_find_cpuid_entry(vcpu, function, index);
 	if (best) {
 		kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
 		kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
-- 
cgit v1.2.3


From 8ab2d2e231062814bd89bba2d6d92563190aa2bb Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Mon, 15 Dec 2008 13:52:10 +0100
Subject: KVM: VMX: Support for injecting software exceptions

VMX differentiates between processor and software generated exceptions
when injecting them into the guest. Extend vmx_queue_exception
accordingly (and refactor related constants) so that we can use this
service reliably for the new guest debugging framework.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/vmx.h |  3 ++-
 arch/x86/kvm/vmx.c         | 35 ++++++++++++++++++++---------------
 2 files changed, 22 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index d0238e6151d..32159f034ef 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -270,8 +270,9 @@ enum vmcs_field {
 
 #define INTR_TYPE_EXT_INTR              (0 << 8) /* external interrupt */
 #define INTR_TYPE_NMI_INTR		(2 << 8) /* NMI */
-#define INTR_TYPE_EXCEPTION             (3 << 8) /* processor exception */
+#define INTR_TYPE_HARD_EXCEPTION	(3 << 8) /* processor exception */
 #define INTR_TYPE_SOFT_INTR             (4 << 8) /* software interrupt */
+#define INTR_TYPE_SOFT_EXCEPTION	(6 << 8) /* software exception */
 
 /* GUEST_INTERRUPTIBILITY_INFO flags. */
 #define GUEST_INTR_STATE_STI		0x00000001
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7611af57682..1d974c1eaa7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -189,21 +189,21 @@ static inline int is_page_fault(u32 intr_info)
 {
 	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
 			     INTR_INFO_VALID_MASK)) ==
-		(INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
+		(INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
 }
 
 static inline int is_no_device(u32 intr_info)
 {
 	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
 			     INTR_INFO_VALID_MASK)) ==
-		(INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
+		(INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
 }
 
 static inline int is_invalid_opcode(u32 intr_info)
 {
 	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
 			     INTR_INFO_VALID_MASK)) ==
-		(INTR_TYPE_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
+		(INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
 }
 
 static inline int is_external_interrupt(u32 intr_info)
@@ -747,29 +747,33 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
 				bool has_error_code, u32 error_code)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	u32 intr_info = nr | INTR_INFO_VALID_MASK;
 
-	if (has_error_code)
+	if (has_error_code) {
 		vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
+		intr_info |= INTR_INFO_DELIVER_CODE_MASK;
+	}
 
 	if (vcpu->arch.rmode.active) {
 		vmx->rmode.irq.pending = true;
 		vmx->rmode.irq.vector = nr;
 		vmx->rmode.irq.rip = kvm_rip_read(vcpu);
-		if (nr == BP_VECTOR)
+		if (nr == BP_VECTOR || nr == OF_VECTOR)
 			vmx->rmode.irq.rip++;
-		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
-			     nr | INTR_TYPE_SOFT_INTR
-			     | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0)
-			     | INTR_INFO_VALID_MASK);
+		intr_info |= INTR_TYPE_SOFT_INTR;
+		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
 		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
 		kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
 		return;
 	}
 
-	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
-		     nr | INTR_TYPE_EXCEPTION
-		     | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0)
-		     | INTR_INFO_VALID_MASK);
+	if (nr == BP_VECTOR || nr == OF_VECTOR) {
+		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
+		intr_info |= INTR_TYPE_SOFT_EXCEPTION;
+	} else
+		intr_info |= INTR_TYPE_HARD_EXCEPTION;
+
+	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
 }
 
 static bool vmx_exception_injected(struct kvm_vcpu *vcpu)
@@ -2650,7 +2654,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	}
 
 	if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) ==
-	    (INTR_TYPE_EXCEPTION | 1)) {
+	    (INTR_TYPE_HARD_EXCEPTION | 1)) {
 		kvm_run->exit_reason = KVM_EXIT_DEBUG;
 		return 0;
 	}
@@ -3238,7 +3242,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 			vmx->vcpu.arch.nmi_injected = false;
 	}
 	kvm_clear_exception_queue(&vmx->vcpu);
-	if (idtv_info_valid && type == INTR_TYPE_EXCEPTION) {
+	if (idtv_info_valid && (type == INTR_TYPE_HARD_EXCEPTION ||
+				type == INTR_TYPE_SOFT_EXCEPTION)) {
 		if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
 			error = vmcs_read32(IDT_VECTORING_ERROR_CODE);
 			kvm_queue_exception_e(&vmx->vcpu, vector, error);
-- 
cgit v1.2.3


From d0bfb940ecabf0b44fb1fd80d8d60594e569e5ec Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Mon, 15 Dec 2008 13:52:10 +0100
Subject: KVM: New guest debug interface

This rips out the support for KVM_DEBUG_GUEST and introduces a new IOCTL
instead: KVM_SET_GUEST_DEBUG. The IOCTL payload consists of a generic
part, controlling the "main switch" and the single-step feature. The
arch specific part adds an x86 interface for intercepting both types of
debug exceptions separately and re-injecting them when the host was not
interested. Moveover, the foundation for guest debugging via debug
registers is layed.

To signal breakpoint events properly back to userland, an arch-specific
data block is now returned along KVM_EXIT_DEBUG. For x86, the arch block
contains the PC, the debug exception, and relevant debug registers to
tell debug events properly apart.

The availability of this new interface is signaled by
KVM_CAP_SET_GUEST_DEBUG. Empty stubs for not yet supported archs are
provided.

Note that both SVM and VTX are supported, but only the latter was tested
yet. Based on the experience with all those VTX corner case, I would be
fairly surprised if SVM will work out of the box.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/include/asm/kvm.h     |  7 ++++
 arch/ia64/kvm/kvm-ia64.c        |  4 +-
 arch/powerpc/include/asm/kvm.h  |  7 ++++
 arch/powerpc/kvm/powerpc.c      |  4 +-
 arch/s390/include/asm/kvm.h     |  7 ++++
 arch/s390/kvm/kvm-s390.c        |  4 +-
 arch/x86/include/asm/kvm.h      | 18 ++++++++
 arch/x86/include/asm/kvm_host.h |  9 +---
 arch/x86/kvm/svm.c              | 50 +++++++++++++++++++++-
 arch/x86/kvm/vmx.c              | 93 ++++++++++++++++-------------------------
 arch/x86/kvm/x86.c              | 14 ++++---
 11 files changed, 138 insertions(+), 79 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h
index bfa86b6af7c..be3fdb89121 100644
--- a/arch/ia64/include/asm/kvm.h
+++ b/arch/ia64/include/asm/kvm.h
@@ -214,4 +214,11 @@ struct kvm_sregs {
 struct kvm_fpu {
 };
 
+struct kvm_debug_exit_arch {
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
 #endif
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 28f982045f2..de47467a0e6 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1303,8 +1303,8 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 	return -EINVAL;
 }
 
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
-		struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+					struct kvm_guest_debug *dbg)
 {
 	return -EINVAL;
 }
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index f993e4198d5..755f1b1948c 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -52,4 +52,11 @@ struct kvm_fpu {
 	__u64 fpr[32];
 };
 
+struct kvm_debug_exit_arch {
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 5f81256287f..7c2ad4017d6 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -240,8 +240,8 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 	kvmppc_core_vcpu_put(vcpu);
 }
 
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
-                                    struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+					struct kvm_guest_debug *dbg)
 {
 	int i;
 
diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h
index e1f54654e3a..0b2f829f6d5 100644
--- a/arch/s390/include/asm/kvm.h
+++ b/arch/s390/include/asm/kvm.h
@@ -42,4 +42,11 @@ struct kvm_fpu {
 	__u64 fprs[16];
 };
 
+struct kvm_debug_exit_arch {
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
 #endif
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 0d33893e1e8..cbfe91e1012 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -422,8 +422,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 	return -EINVAL; /* not implemented yet */
 }
 
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
-				    struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+					struct kvm_guest_debug *dbg)
 {
 	return -EINVAL; /* not implemented yet */
 }
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 886c9402ec4..32eb96c7ca2 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -212,6 +212,24 @@ struct kvm_pit_channel_state {
 	__s64 count_load_time;
 };
 
+struct kvm_debug_exit_arch {
+	__u32 exception;
+	__u32 pad;
+	__u64 pc;
+	__u64 dr6;
+	__u64 dr7;
+};
+
+#define KVM_GUESTDBG_USE_SW_BP		0x00010000
+#define KVM_GUESTDBG_USE_HW_BP		0x00020000
+#define KVM_GUESTDBG_INJECT_DB		0x00040000
+#define KVM_GUESTDBG_INJECT_BP		0x00080000
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+	__u64 debugreg[8];
+};
+
 struct kvm_pit_state {
 	struct kvm_pit_channel_state channels[3];
 };
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 53779309514..c430cd580ee 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -135,12 +135,6 @@ enum {
 
 #define KVM_NR_MEM_OBJS 40
 
-struct kvm_guest_debug {
-	int enabled;
-	unsigned long bp[4];
-	int singlestep;
-};
-
 /*
  * We don't want allocation failures within the mmu code, so we preallocate
  * enough memory for a single page fault in a cache.
@@ -448,8 +442,7 @@ struct kvm_x86_ops {
 	void (*vcpu_put)(struct kvm_vcpu *vcpu);
 
 	int (*set_guest_debug)(struct kvm_vcpu *vcpu,
-			       struct kvm_debug_guest *dbg);
-	void (*guest_debug_pre)(struct kvm_vcpu *vcpu);
+			       struct kvm_guest_debug *dbg);
 	int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
 	int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
 	u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 0fbbde54eca..88d9062f454 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -968,9 +968,32 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
 
 }
 
-static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
+static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
 {
-	return -EOPNOTSUPP;
+	int old_debug = vcpu->guest_debug;
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	vcpu->guest_debug = dbg->control;
+
+	svm->vmcb->control.intercept_exceptions &=
+		~((1 << DB_VECTOR) | (1 << BP_VECTOR));
+	if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
+		if (vcpu->guest_debug &
+		    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+			svm->vmcb->control.intercept_exceptions |=
+				1 << DB_VECTOR;
+		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+			svm->vmcb->control.intercept_exceptions |=
+				1 << BP_VECTOR;
+	} else
+		vcpu->guest_debug = 0;
+
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+		svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
+	else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
+		svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+
+	return 0;
 }
 
 static int svm_get_irq(struct kvm_vcpu *vcpu)
@@ -1094,6 +1117,27 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
 }
 
+static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+	if (!(svm->vcpu.guest_debug &
+	      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
+		kvm_queue_exception(&svm->vcpu, DB_VECTOR);
+		return 1;
+	}
+	kvm_run->exit_reason = KVM_EXIT_DEBUG;
+	kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
+	kvm_run->debug.arch.exception = DB_VECTOR;
+	return 0;
+}
+
+static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+	kvm_run->exit_reason = KVM_EXIT_DEBUG;
+	kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
+	kvm_run->debug.arch.exception = BP_VECTOR;
+	return 0;
+}
+
 static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	int er;
@@ -2050,6 +2094,8 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
 	[SVM_EXIT_WRITE_DR3]			= emulate_on_interception,
 	[SVM_EXIT_WRITE_DR5]			= emulate_on_interception,
 	[SVM_EXIT_WRITE_DR7]			= emulate_on_interception,
+	[SVM_EXIT_EXCP_BASE + DB_VECTOR]	= db_interception,
+	[SVM_EXIT_EXCP_BASE + BP_VECTOR]	= bp_interception,
 	[SVM_EXIT_EXCP_BASE + UD_VECTOR]	= ud_interception,
 	[SVM_EXIT_EXCP_BASE + PF_VECTOR] 	= pf_interception,
 	[SVM_EXIT_EXCP_BASE + NM_VECTOR] 	= nm_interception,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1d974c1eaa7..f55690ddb3a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -480,8 +480,13 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 	eb = (1u << PF_VECTOR) | (1u << UD_VECTOR);
 	if (!vcpu->fpu_active)
 		eb |= 1u << NM_VECTOR;
-	if (vcpu->guest_debug.enabled)
-		eb |= 1u << DB_VECTOR;
+	if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
+		if (vcpu->guest_debug &
+		    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+			eb |= 1u << DB_VECTOR;
+		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+			eb |= 1u << BP_VECTOR;
+	}
 	if (vcpu->arch.rmode.active)
 		eb = ~0;
 	if (vm_need_ept())
@@ -1003,40 +1008,23 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 	}
 }
 
-static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
+static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
 {
-	unsigned long dr7 = 0x400;
-	int old_singlestep;
-
-	old_singlestep = vcpu->guest_debug.singlestep;
-
-	vcpu->guest_debug.enabled = dbg->enabled;
-	if (vcpu->guest_debug.enabled) {
-		int i;
-
-		dr7 |= 0x200;  /* exact */
-		for (i = 0; i < 4; ++i) {
-			if (!dbg->breakpoints[i].enabled)
-				continue;
-			vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address;
-			dr7 |= 2 << (i*2);    /* global enable */
-			dr7 |= 0 << (i*4+16); /* execution breakpoint */
-		}
-
-		vcpu->guest_debug.singlestep = dbg->singlestep;
-	} else
-		vcpu->guest_debug.singlestep = 0;
+	int old_debug = vcpu->guest_debug;
+	unsigned long flags;
 
-	if (old_singlestep && !vcpu->guest_debug.singlestep) {
-		unsigned long flags;
+	vcpu->guest_debug = dbg->control;
+	if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
+		vcpu->guest_debug = 0;
 
-		flags = vmcs_readl(GUEST_RFLAGS);
+	flags = vmcs_readl(GUEST_RFLAGS);
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+		flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
+	else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
 		flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
-		vmcs_writel(GUEST_RFLAGS, flags);
-	}
+	vmcs_writel(GUEST_RFLAGS, flags);
 
 	update_exception_bitmap(vcpu);
-	vmcs_writel(GUEST_DR7, dr7);
 
 	return 0;
 }
@@ -2540,24 +2528,6 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
 	return 0;
 }
 
-static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu)
-{
-	struct kvm_guest_debug *dbg = &vcpu->guest_debug;
-
-	set_debugreg(dbg->bp[0], 0);
-	set_debugreg(dbg->bp[1], 1);
-	set_debugreg(dbg->bp[2], 2);
-	set_debugreg(dbg->bp[3], 3);
-
-	if (dbg->singlestep) {
-		unsigned long flags;
-
-		flags = vmcs_readl(GUEST_RFLAGS);
-		flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
-		vmcs_writel(GUEST_RFLAGS, flags);
-	}
-}
-
 static int handle_rmode_exception(struct kvm_vcpu *vcpu,
 				  int vec, u32 err_code)
 {
@@ -2574,9 +2544,17 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
 	 *        the required debugging infrastructure rework.
 	 */
 	switch (vec) {
-	case DE_VECTOR:
 	case DB_VECTOR:
+		if (vcpu->guest_debug &
+		    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+			return 0;
+		kvm_queue_exception(vcpu, vec);
+		return 1;
 	case BP_VECTOR:
+		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+			return 0;
+		/* fall through */
+	case DE_VECTOR:
 	case OF_VECTOR:
 	case BR_VECTOR:
 	case UD_VECTOR:
@@ -2593,7 +2571,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
 static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	u32 intr_info, error_code;
+	u32 intr_info, ex_no, error_code;
 	unsigned long cr2, rip;
 	u32 vect_info;
 	enum emulation_result er;
@@ -2653,14 +2631,16 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		return 1;
 	}
 
-	if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) ==
-	    (INTR_TYPE_HARD_EXCEPTION | 1)) {
+	ex_no = intr_info & INTR_INFO_VECTOR_MASK;
+	if (ex_no == DB_VECTOR || ex_no == BP_VECTOR) {
 		kvm_run->exit_reason = KVM_EXIT_DEBUG;
-		return 0;
+		kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
+		kvm_run->debug.arch.exception = ex_no;
+	} else {
+		kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
+		kvm_run->ex.exception = ex_no;
+		kvm_run->ex.error_code = error_code;
 	}
-	kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
-	kvm_run->ex.exception = intr_info & INTR_INFO_VECTOR_MASK;
-	kvm_run->ex.error_code = error_code;
 	return 0;
 }
 
@@ -3600,7 +3580,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.vcpu_put = vmx_vcpu_put,
 
 	.set_guest_debug = set_guest_debug,
-	.guest_debug_pre = kvm_guest_debug_pre,
 	.get_msr = vmx_get_msr,
 	.set_msr = vmx_set_msr,
 	.get_segment_base = vmx_get_segment_base,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b5e9932e0f6..e990d164b56 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3005,9 +3005,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		goto out;
 	}
 
-	if (vcpu->guest_debug.enabled)
-		kvm_x86_ops->guest_debug_pre(vcpu);
-
 	vcpu->guest_mode = 1;
 	/*
 	 * Make sure that guest_mode assignment won't happen after
@@ -3218,7 +3215,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	/*
 	 * Don't leak debug flags in case they were set for guest debugging
 	 */
-	if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
 		regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
 
 	vcpu_put(vcpu);
@@ -3837,8 +3834,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
-				    struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+					struct kvm_guest_debug *dbg)
 {
 	int r;
 
@@ -3846,6 +3843,11 @@ int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
 
 	r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
 
+	if (dbg->control & KVM_GUESTDBG_INJECT_DB)
+		kvm_queue_exception(vcpu, DB_VECTOR);
+	else if (dbg->control & KVM_GUESTDBG_INJECT_BP)
+		kvm_queue_exception(vcpu, BP_VECTOR);
+
 	vcpu_put(vcpu);
 
 	return r;
-- 
cgit v1.2.3


From 55934c0bd3bb232a9cf902820dd63ad18ed65e49 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Mon, 15 Dec 2008 13:52:10 +0100
Subject: KVM: VMX: Allow single-stepping when uninterruptible

When single-stepping over STI and MOV SS, we must clear the
corresponding interruptibility bits in the guest state. Otherwise
vmentry fails as it then expects bit 14 (BS) in pending debug exceptions
being set, but that's not correct for the guest debugging case.

Note that clearing those bits is safe as we check for interruptibility
based on the original state and do not inject interrupts or NMIs if
guest interruptibility was blocked.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f55690ddb3a..c776868ffe4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2478,6 +2478,11 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 {
 	vmx_update_window_states(vcpu);
 
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+		vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+				GUEST_INTR_STATE_STI |
+				GUEST_INTR_STATE_MOV_SS);
+
 	if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
 		if (vcpu->arch.interrupt.pending) {
 			enable_nmi_window(vcpu);
@@ -3244,6 +3249,11 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 
 	vmx_update_window_states(vcpu);
 
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+		vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+				GUEST_INTR_STATE_STI |
+				GUEST_INTR_STATE_MOV_SS);
+
 	if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
 		if (vcpu->arch.interrupt.pending) {
 			enable_nmi_window(vcpu);
-- 
cgit v1.2.3


From 42dbaa5a057736bf8b5c22aa42dbe975bf1080e5 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Mon, 15 Dec 2008 13:52:10 +0100
Subject: KVM: x86: Virtualize debug registers

So far KVM only had basic x86 debug register support, once introduced to
realize guest debugging that way. The guest itself was not able to use
those registers.

This patch now adds (almost) full support for guest self-debugging via
hardware registers. It refactors the code, moving generic parts out of
SVM (VMX was already cleaned up by the KVM_SET_GUEST_DEBUG patches), and
it ensures that the registers are properly switched between host and
guest.

This patch also prepares debug register usage by the host. The latter
will (once wired-up by the following patch) allow for hardware
breakpoints/watchpoints in guest code. If this is enabled, the guest
will only see faked debug registers without functionality, but with
content reflecting the guest's modifications.

Tested on Intel only, but SVM /should/ work as well, but who knows...

Known limitations: Trapping on tss switch won't work - most probably on
Intel.

Credits also go to Joerg Roedel - I used his once posted debugging
series as platform for this patch.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  22 ++++++++
 arch/x86/include/asm/vmx.h      |   2 +-
 arch/x86/kvm/kvm_svm.h          |   6 ---
 arch/x86/kvm/svm.c              | 116 +++++++++++++++-------------------------
 arch/x86/kvm/vmx.c              | 114 +++++++++++++++++++++++++++++++++------
 arch/x86/kvm/x86.c              |  29 ++++++++++
 6 files changed, 193 insertions(+), 96 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c430cd580ee..0a4dab25a91 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -135,6 +135,19 @@ enum {
 
 #define KVM_NR_MEM_OBJS 40
 
+#define KVM_NR_DB_REGS	4
+
+#define DR6_BD		(1 << 13)
+#define DR6_BS		(1 << 14)
+#define DR6_FIXED_1	0xffff0ff0
+#define DR6_VOLATILE	0x0000e00f
+
+#define DR7_BP_EN_MASK	0x000000ff
+#define DR7_GE		(1 << 9)
+#define DR7_GD		(1 << 13)
+#define DR7_FIXED_1	0x00000400
+#define DR7_VOLATILE	0xffff23ff
+
 /*
  * We don't want allocation failures within the mmu code, so we preallocate
  * enough memory for a single page fault in a cache.
@@ -334,6 +347,15 @@ struct kvm_vcpu_arch {
 
 	struct mtrr_state_type mtrr_state;
 	u32 pat;
+
+	int switch_db_regs;
+	unsigned long host_db[KVM_NR_DB_REGS];
+	unsigned long host_dr6;
+	unsigned long host_dr7;
+	unsigned long db[KVM_NR_DB_REGS];
+	unsigned long dr6;
+	unsigned long dr7;
+	unsigned long eff_db[KVM_NR_DB_REGS];
 };
 
 struct kvm_mem_alias {
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 32159f034ef..498f944010b 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -312,7 +312,7 @@ enum vmcs_field {
 #define DEBUG_REG_ACCESS_TYPE           0x10    /* 4, direction of access */
 #define TYPE_MOV_TO_DR                  (0 << 4)
 #define TYPE_MOV_FROM_DR                (1 << 4)
-#define DEBUG_REG_ACCESS_REG            0xf00   /* 11:8, general purpose reg. */
+#define DEBUG_REG_ACCESS_REG(eq)        (((eq) >> 8) & 0xf) /* 11:8, general purpose reg. */
 
 
 /* segment AR */
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
index 91673413d8f..ed66e4c078d 100644
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -18,7 +18,6 @@ static const u32 host_save_user_msrs[] = {
 };
 
 #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
-#define NUM_DB_REGS 4
 
 struct kvm_vcpu;
 
@@ -29,16 +28,11 @@ struct vcpu_svm {
 	struct svm_cpu_data *svm_data;
 	uint64_t asid_generation;
 
-	unsigned long db_regs[NUM_DB_REGS];
-
 	u64 next_rip;
 
 	u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
 	u64 host_gs_base;
 	unsigned long host_cr2;
-	unsigned long host_db_regs[NUM_DB_REGS];
-	unsigned long host_dr6;
-	unsigned long host_dr7;
 
 	u32 *msrpm;
 	struct vmcb *hsave;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 88d9062f454..815f50e425a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -38,9 +38,6 @@ MODULE_LICENSE("GPL");
 #define IOPM_ALLOC_ORDER 2
 #define MSRPM_ALLOC_ORDER 1
 
-#define DR7_GD_MASK (1 << 13)
-#define DR6_BD_MASK (1 << 13)
-
 #define SEG_TYPE_LDT 2
 #define SEG_TYPE_BUSY_TSS16 3
 
@@ -181,32 +178,6 @@ static inline void kvm_write_cr2(unsigned long val)
 	asm volatile ("mov %0, %%cr2" :: "r" (val));
 }
 
-static inline unsigned long read_dr6(void)
-{
-	unsigned long dr6;
-
-	asm volatile ("mov %%dr6, %0" : "=r" (dr6));
-	return dr6;
-}
-
-static inline void write_dr6(unsigned long val)
-{
-	asm volatile ("mov %0, %%dr6" :: "r" (val));
-}
-
-static inline unsigned long read_dr7(void)
-{
-	unsigned long dr7;
-
-	asm volatile ("mov %%dr7, %0" : "=r" (dr7));
-	return dr7;
-}
-
-static inline void write_dr7(unsigned long val)
-{
-	asm volatile ("mov %0, %%dr7" :: "r" (val));
-}
-
 static inline void force_new_asid(struct kvm_vcpu *vcpu)
 {
 	to_svm(vcpu)->asid_generation--;
@@ -695,7 +666,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 	clear_page(svm->vmcb);
 	svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
 	svm->asid_generation = 0;
-	memset(svm->db_regs, 0, sizeof(svm->db_regs));
 	init_vmcb(svm);
 
 	fx_init(&svm->vcpu);
@@ -1035,7 +1005,29 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data)
 
 static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
 {
-	unsigned long val = to_svm(vcpu)->db_regs[dr];
+	struct vcpu_svm *svm = to_svm(vcpu);
+	unsigned long val;
+
+	switch (dr) {
+	case 0 ... 3:
+		val = vcpu->arch.db[dr];
+		break;
+	case 6:
+		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+			val = vcpu->arch.dr6;
+		else
+			val = svm->vmcb->save.dr6;
+		break;
+	case 7:
+		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+			val = vcpu->arch.dr7;
+		else
+			val = svm->vmcb->save.dr7;
+		break;
+	default:
+		val = 0;
+	}
+
 	KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
 	return val;
 }
@@ -1045,33 +1037,40 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	*exception = 0;
+	KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)value, handler);
 
-	if (svm->vmcb->save.dr7 & DR7_GD_MASK) {
-		svm->vmcb->save.dr7 &= ~DR7_GD_MASK;
-		svm->vmcb->save.dr6 |= DR6_BD_MASK;
-		*exception = DB_VECTOR;
-		return;
-	}
+	*exception = 0;
 
 	switch (dr) {
 	case 0 ... 3:
-		svm->db_regs[dr] = value;
+		vcpu->arch.db[dr] = value;
+		if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
+			vcpu->arch.eff_db[dr] = value;
 		return;
 	case 4 ... 5:
-		if (vcpu->arch.cr4 & X86_CR4_DE) {
+		if (vcpu->arch.cr4 & X86_CR4_DE)
 			*exception = UD_VECTOR;
+		return;
+	case 6:
+		if (value & 0xffffffff00000000ULL) {
+			*exception = GP_VECTOR;
 			return;
 		}
-	case 7: {
-		if (value & ~((1ULL << 32) - 1)) {
+		vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1;
+		return;
+	case 7:
+		if (value & 0xffffffff00000000ULL) {
 			*exception = GP_VECTOR;
 			return;
 		}
-		svm->vmcb->save.dr7 = value;
+		vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1;
+		if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
+			svm->vmcb->save.dr7 = vcpu->arch.dr7;
+			vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK);
+		}
 		return;
-	}
 	default:
+		/* FIXME: Possible case? */
 		printk(KERN_DEBUG "%s: unexpected dr %u\n",
 		       __func__, dr);
 		*exception = UD_VECTOR;
@@ -2365,22 +2364,6 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
 	return 0;
 }
 
-static void save_db_regs(unsigned long *db_regs)
-{
-	asm volatile ("mov %%dr0, %0" : "=r"(db_regs[0]));
-	asm volatile ("mov %%dr1, %0" : "=r"(db_regs[1]));
-	asm volatile ("mov %%dr2, %0" : "=r"(db_regs[2]));
-	asm volatile ("mov %%dr3, %0" : "=r"(db_regs[3]));
-}
-
-static void load_db_regs(unsigned long *db_regs)
-{
-	asm volatile ("mov %0, %%dr0" : : "r"(db_regs[0]));
-	asm volatile ("mov %0, %%dr1" : : "r"(db_regs[1]));
-	asm volatile ("mov %0, %%dr2" : : "r"(db_regs[2]));
-	asm volatile ("mov %0, %%dr3" : : "r"(db_regs[3]));
-}
-
 static void svm_flush_tlb(struct kvm_vcpu *vcpu)
 {
 	force_new_asid(vcpu);
@@ -2439,20 +2422,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	gs_selector = kvm_read_gs();
 	ldt_selector = kvm_read_ldt();
 	svm->host_cr2 = kvm_read_cr2();
-	svm->host_dr6 = read_dr6();
-	svm->host_dr7 = read_dr7();
 	if (!is_nested(svm))
 		svm->vmcb->save.cr2 = vcpu->arch.cr2;
 	/* required for live migration with NPT */
 	if (npt_enabled)
 		svm->vmcb->save.cr3 = vcpu->arch.cr3;
 
-	if (svm->vmcb->save.dr7 & 0xff) {
-		write_dr7(0);
-		save_db_regs(svm->host_db_regs);
-		load_db_regs(svm->db_regs);
-	}
-
 	clgi();
 
 	local_irq_enable();
@@ -2528,16 +2503,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 #endif
 		);
 
-	if ((svm->vmcb->save.dr7 & 0xff))
-		load_db_regs(svm->host_db_regs);
-
 	vcpu->arch.cr2 = svm->vmcb->save.cr2;
 	vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
 	vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
 	vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
 
-	write_dr6(svm->host_dr6);
-	write_dr7(svm->host_dr7);
 	kvm_write_cr2(svm->host_cr2);
 
 	kvm_load_fs(fs_selector);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c776868ffe4..0989776ee7b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2311,7 +2311,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 		kvm_rip_write(vcpu, 0);
 	kvm_register_write(vcpu, VCPU_REGS_RSP, 0);
 
-	/* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */
 	vmcs_writel(GUEST_DR7, 0x400);
 
 	vmcs_writel(GUEST_GDTR_BASE, 0);
@@ -2577,7 +2576,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 intr_info, ex_no, error_code;
-	unsigned long cr2, rip;
+	unsigned long cr2, rip, dr6;
 	u32 vect_info;
 	enum emulation_result er;
 
@@ -2637,14 +2636,28 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	}
 
 	ex_no = intr_info & INTR_INFO_VECTOR_MASK;
-	if (ex_no == DB_VECTOR || ex_no == BP_VECTOR) {
+	switch (ex_no) {
+	case DB_VECTOR:
+		dr6 = vmcs_readl(EXIT_QUALIFICATION);
+		if (!(vcpu->guest_debug &
+		      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
+			vcpu->arch.dr6 = dr6 | DR6_FIXED_1;
+			kvm_queue_exception(vcpu, DB_VECTOR);
+			return 1;
+		}
+		kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
+		kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
+		/* fall through */
+	case BP_VECTOR:
 		kvm_run->exit_reason = KVM_EXIT_DEBUG;
 		kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
 		kvm_run->debug.arch.exception = ex_no;
-	} else {
+		break;
+	default:
 		kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
 		kvm_run->ex.exception = ex_no;
 		kvm_run->ex.error_code = error_code;
+		break;
 	}
 	return 0;
 }
@@ -2784,21 +2797,44 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	unsigned long val;
 	int dr, reg;
 
-	/*
-	 * FIXME: this code assumes the host is debugging the guest.
-	 *        need to deal with guest debugging itself too.
-	 */
+	dr = vmcs_readl(GUEST_DR7);
+	if (dr & DR7_GD) {
+		/*
+		 * As the vm-exit takes precedence over the debug trap, we
+		 * need to emulate the latter, either for the host or the
+		 * guest debugging itself.
+		 */
+		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
+			kvm_run->debug.arch.dr6 = vcpu->arch.dr6;
+			kvm_run->debug.arch.dr7 = dr;
+			kvm_run->debug.arch.pc =
+				vmcs_readl(GUEST_CS_BASE) +
+				vmcs_readl(GUEST_RIP);
+			kvm_run->debug.arch.exception = DB_VECTOR;
+			kvm_run->exit_reason = KVM_EXIT_DEBUG;
+			return 0;
+		} else {
+			vcpu->arch.dr7 &= ~DR7_GD;
+			vcpu->arch.dr6 |= DR6_BD;
+			vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
+			kvm_queue_exception(vcpu, DB_VECTOR);
+			return 1;
+		}
+	}
+
 	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
-	dr = exit_qualification & 7;
-	reg = (exit_qualification >> 8) & 15;
-	if (exit_qualification & 16) {
-		/* mov from dr */
+	dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
+	reg = DEBUG_REG_ACCESS_REG(exit_qualification);
+	if (exit_qualification & TYPE_MOV_FROM_DR) {
 		switch (dr) {
+		case 0 ... 3:
+			val = vcpu->arch.db[dr];
+			break;
 		case 6:
-			val = 0xffff0ff0;
+			val = vcpu->arch.dr6;
 			break;
 		case 7:
-			val = 0x400;
+			val = vcpu->arch.dr7;
 			break;
 		default:
 			val = 0;
@@ -2806,7 +2842,38 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		kvm_register_write(vcpu, reg, val);
 		KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
 	} else {
-		/* mov to dr */
+		val = vcpu->arch.regs[reg];
+		switch (dr) {
+		case 0 ... 3:
+			vcpu->arch.db[dr] = val;
+			if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
+				vcpu->arch.eff_db[dr] = val;
+			break;
+		case 4 ... 5:
+			if (vcpu->arch.cr4 & X86_CR4_DE)
+				kvm_queue_exception(vcpu, UD_VECTOR);
+			break;
+		case 6:
+			if (val & 0xffffffff00000000ULL) {
+				kvm_queue_exception(vcpu, GP_VECTOR);
+				break;
+			}
+			vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
+			break;
+		case 7:
+			if (val & 0xffffffff00000000ULL) {
+				kvm_queue_exception(vcpu, GP_VECTOR);
+				break;
+			}
+			vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
+			if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
+				vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
+				vcpu->arch.switch_db_regs =
+					(val & DR7_BP_EN_MASK);
+			}
+			break;
+		}
+		KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)val, handler);
 	}
 	skip_emulated_instruction(vcpu);
 	return 1;
@@ -2957,7 +3024,18 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	}
 	tss_selector = exit_qualification;
 
-	return kvm_task_switch(vcpu, tss_selector, reason);
+	if (!kvm_task_switch(vcpu, tss_selector, reason))
+		return 0;
+
+	/* clear all local breakpoint enable flags */
+	vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55);
+
+	/*
+	 * TODO: What about debug traps on tss switch?
+	 *       Are we supposed to inject them and update dr6?
+	 */
+
+	return 1;
 }
 
 static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -3342,6 +3420,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	 */
 	vmcs_writel(HOST_CR0, read_cr0());
 
+	set_debugreg(vcpu->arch.dr6, 6);
+
 	asm(
 		/* Store host registers */
 		"push %%"R"dx; push %%"R"bp;"
@@ -3436,6 +3516,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
 	vcpu->arch.regs_dirty = 0;
 
+	get_debugreg(vcpu->arch.dr6, 6);
+
 	vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
 	if (vmx->rmode.irq.pending)
 		fixup_rmode_irq(vmx);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e990d164b56..300bc4d42ab 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3025,10 +3025,34 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	kvm_guest_enter();
 
+	get_debugreg(vcpu->arch.host_dr6, 6);
+	get_debugreg(vcpu->arch.host_dr7, 7);
+	if (unlikely(vcpu->arch.switch_db_regs)) {
+		get_debugreg(vcpu->arch.host_db[0], 0);
+		get_debugreg(vcpu->arch.host_db[1], 1);
+		get_debugreg(vcpu->arch.host_db[2], 2);
+		get_debugreg(vcpu->arch.host_db[3], 3);
+
+		set_debugreg(0, 7);
+		set_debugreg(vcpu->arch.eff_db[0], 0);
+		set_debugreg(vcpu->arch.eff_db[1], 1);
+		set_debugreg(vcpu->arch.eff_db[2], 2);
+		set_debugreg(vcpu->arch.eff_db[3], 3);
+	}
 
 	KVMTRACE_0D(VMENTRY, vcpu, entryexit);
 	kvm_x86_ops->run(vcpu, kvm_run);
 
+	if (unlikely(vcpu->arch.switch_db_regs)) {
+		set_debugreg(0, 7);
+		set_debugreg(vcpu->arch.host_db[0], 0);
+		set_debugreg(vcpu->arch.host_db[1], 1);
+		set_debugreg(vcpu->arch.host_db[2], 2);
+		set_debugreg(vcpu->arch.host_db[3], 3);
+	}
+	set_debugreg(vcpu->arch.host_dr6, 6);
+	set_debugreg(vcpu->arch.host_dr7, 7);
+
 	vcpu->guest_mode = 0;
 	local_irq_enable();
 
@@ -4035,6 +4059,11 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
 	vcpu->arch.nmi_pending = false;
 	vcpu->arch.nmi_injected = false;
 
+	vcpu->arch.switch_db_regs = 0;
+	memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
+	vcpu->arch.dr6 = DR6_FIXED_1;
+	vcpu->arch.dr7 = DR7_FIXED_1;
+
 	return kvm_x86_ops->vcpu_reset(vcpu);
 }
 
-- 
cgit v1.2.3


From ae675ef01cd86014acf8da5dee87876b71122495 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Mon, 15 Dec 2008 13:52:10 +0100
Subject: KVM: x86: Wire-up hardware breakpoints for guest debugging

Add the remaining bits to make use of debug registers also for guest
debugging, thus enabling the use of hardware breakpoints and
watchpoints.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/svm.c |  5 +++++
 arch/x86/kvm/vmx.c |  5 +++++
 arch/x86/kvm/x86.c | 14 +++++++++++++-
 3 files changed, 23 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 815f50e425a..b3a7a314d55 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -958,6 +958,11 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
 	} else
 		vcpu->guest_debug = 0;
 
+	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+		svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
+	else
+		svm->vmcb->save.dr7 = vcpu->arch.dr7;
+
 	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
 		svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
 	else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0989776ee7b..cee81c9a665 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1017,6 +1017,11 @@ static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
 	if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
 		vcpu->guest_debug = 0;
 
+	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+		vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]);
+	else
+		vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
+
 	flags = vmcs_readl(GUEST_RFLAGS);
 	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
 		flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 300bc4d42ab..2477e87b2f8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3861,10 +3861,22 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 					struct kvm_guest_debug *dbg)
 {
-	int r;
+	int i, r;
 
 	vcpu_load(vcpu);
 
+	if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) ==
+	    (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) {
+		for (i = 0; i < KVM_NR_DB_REGS; ++i)
+			vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
+		vcpu->arch.switch_db_regs =
+			(dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
+	} else {
+		for (i = 0; i < KVM_NR_DB_REGS; i++)
+			vcpu->arch.eff_db[i] = vcpu->arch.db[i];
+		vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
+	}
+
 	r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
 
 	if (dbg->control & KVM_GUESTDBG_INJECT_DB)
-- 
cgit v1.2.3


From e9a999fe1feaddb71bffbacbbd68e0da8ca8b50b Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Thu, 18 Dec 2008 12:17:51 +0100
Subject: KVM: ia64: stack get/restore patch

Implement KVM_IA64_VCPU_[GS]ET_STACK ioctl calls. This is required
for live migrations.

Patch is based on previous implementation that was part of old
GET/SET_REGS ioctl calls.

Signed-off-by: Jes Sorensen <jes@sgi.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/include/asm/kvm.h      |  7 +++
 arch/ia64/include/asm/kvm_host.h |  6 ++-
 arch/ia64/kvm/kvm-ia64.c         | 92 ++++++++++++++++++++++++++++++++++++++--
 3 files changed, 101 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h
index be3fdb89121..b5145784233 100644
--- a/arch/ia64/include/asm/kvm.h
+++ b/arch/ia64/include/asm/kvm.h
@@ -214,6 +214,13 @@ struct kvm_sregs {
 struct kvm_fpu {
 };
 
+#define KVM_IA64_VCPU_STACK_SHIFT	16
+#define KVM_IA64_VCPU_STACK_SIZE	(1UL << KVM_IA64_VCPU_STACK_SHIFT)
+
+struct kvm_ia64_vcpu_stack {
+	unsigned char stack[KVM_IA64_VCPU_STACK_SIZE];
+};
+
 struct kvm_debug_exit_arch {
 };
 
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 34866366165..7da0c096322 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -112,7 +112,11 @@
 #define VCPU_STRUCT_SHIFT	16
 #define VCPU_STRUCT_SIZE	(__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT)
 
-#define KVM_STK_OFFSET		VCPU_STRUCT_SIZE
+/*
+ * This must match KVM_IA64_VCPU_STACK_{SHIFT,SIZE} arch/ia64/include/asm/kvm.h
+ */
+#define KVM_STK_SHIFT		16
+#define KVM_STK_OFFSET		(__IA64_UL_CONST(1)<< KVM_STK_SHIFT)
 
 #define KVM_VM_STRUCT_SHIFT	19
 #define KVM_VM_STRUCT_SIZE	(__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT)
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index de47467a0e6..1477f91617a 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1421,6 +1421,23 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	return 0;
 }
 
+int kvm_arch_vcpu_ioctl_get_stack(struct kvm_vcpu *vcpu,
+				  struct kvm_ia64_vcpu_stack *stack)
+{
+	memcpy(stack, vcpu, sizeof(struct kvm_ia64_vcpu_stack));
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_stack(struct kvm_vcpu *vcpu,
+				  struct kvm_ia64_vcpu_stack *stack)
+{
+	memcpy(vcpu + 1, &stack->stack[0] + sizeof(struct kvm_vcpu),
+	       sizeof(struct kvm_ia64_vcpu_stack) - sizeof(struct kvm_vcpu));
+
+	vcpu->arch.exit_data = ((struct kvm_vcpu *)stack)->arch.exit_data;
+	return 0;
+}
+
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 
@@ -1430,9 +1447,78 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 
 
 long kvm_arch_vcpu_ioctl(struct file *filp,
-		unsigned int ioctl, unsigned long arg)
+			 unsigned int ioctl, unsigned long arg)
 {
-	return -EINVAL;
+	struct kvm_vcpu *vcpu = filp->private_data;
+	void __user *argp = (void __user *)arg;
+	struct kvm_ia64_vcpu_stack *stack = NULL;
+	long r;
+
+	switch (ioctl) {
+	case KVM_IA64_VCPU_GET_STACK: {
+		struct kvm_ia64_vcpu_stack __user *user_stack;
+	        void __user *first_p = argp;
+
+		r = -EFAULT;
+		if (copy_from_user(&user_stack, first_p, sizeof(void *)))
+			goto out;
+
+		if (!access_ok(VERIFY_WRITE, user_stack,
+			       sizeof(struct kvm_ia64_vcpu_stack))) {
+			printk(KERN_INFO "KVM_IA64_VCPU_GET_STACK: "
+			       "Illegal user destination address for stack\n");
+			goto out;
+		}
+		stack = kzalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
+		if (!stack) {
+			r = -ENOMEM;
+			goto out;
+		}
+
+		r = kvm_arch_vcpu_ioctl_get_stack(vcpu, stack);
+		if (r)
+			goto out;
+
+		if (copy_to_user(user_stack, stack,
+				 sizeof(struct kvm_ia64_vcpu_stack)))
+			goto out;
+
+		break;
+	}
+	case KVM_IA64_VCPU_SET_STACK: {
+		struct kvm_ia64_vcpu_stack __user *user_stack;
+	        void __user *first_p = argp;
+
+		r = -EFAULT;
+		if (copy_from_user(&user_stack, first_p, sizeof(void *)))
+			goto out;
+
+		if (!access_ok(VERIFY_READ, user_stack,
+			    sizeof(struct kvm_ia64_vcpu_stack))) {
+			printk(KERN_INFO "KVM_IA64_VCPU_SET_STACK: "
+			       "Illegal user address for stack\n");
+			goto out;
+		}
+		stack = kmalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
+		if (!stack) {
+			r = -ENOMEM;
+			goto out;
+		}
+		if (copy_from_user(stack, user_stack,
+				   sizeof(struct kvm_ia64_vcpu_stack)))
+			goto out;
+
+		r = kvm_arch_vcpu_ioctl_set_stack(vcpu, stack);
+		break;
+	}
+
+	default:
+		r = -EINVAL;
+	}
+
+out:
+	kfree(stack);
+	return r;
 }
 
 int kvm_arch_set_memory_region(struct kvm *kvm,
@@ -1472,7 +1558,7 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
 }
 
 long kvm_arch_dev_ioctl(struct file *filp,
-		unsigned int ioctl, unsigned long arg)
+			unsigned int ioctl, unsigned long arg)
 {
 	return -EINVAL;
 }
-- 
cgit v1.2.3


From 989c0f0ed56468a4aa48711cef5acf122a40d1dd Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Thu, 18 Dec 2008 12:33:18 +0100
Subject: KVM: Remove old kvm_guest_debug structs

Remove the remaining arch fragments of the old guest debug interface
that now break non-x86 builds.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/include/asm/kvm_host.h    | 2 --
 arch/powerpc/include/asm/kvm_host.h | 6 ------
 arch/s390/include/asm/kvm_host.h    | 3 ---
 3 files changed, 11 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 7da0c096322..46f8b0eb568 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -239,8 +239,6 @@ struct kvm_vm_data {
 
 struct kvm;
 struct kvm_vcpu;
-struct kvm_guest_debug{
-};
 
 struct kvm_mmio_req {
 	uint64_t addr;          /*  physical address		*/
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index c1e436fe773..a22600537a8 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -200,10 +200,4 @@ struct kvm_vcpu_arch {
 	unsigned long pending_exceptions;
 };
 
-struct kvm_guest_debug {
-	int enabled;
-	unsigned long bp[4];
-	int singlestep;
-};
-
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 3c55e4107dc..c6e674f5fca 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -21,9 +21,6 @@
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 4
 
-struct kvm_guest_debug {
-};
-
 struct sca_entry {
 	atomic_t scn;
 	__u64	reserved;
-- 
cgit v1.2.3


From 22ccb14203d59a8bcf6f3fea76b3594d710569fa Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <xiantao.zhang@intel.com>
Date: Thu, 18 Dec 2008 10:23:58 +0800
Subject: KVM: ia64: Code cleanup

Remove some unnecessary blank lines to accord with Kernel's coding style.
Also remove vcpu_get_itir_on_fault due to no reference to it.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/process.c | 15 ---------------
 arch/ia64/kvm/vcpu.c    | 39 ++-------------------------------------
 arch/ia64/kvm/vtlb.c    |  5 -----
 3 files changed, 2 insertions(+), 57 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
index 230eae482f3..f9c9504144f 100644
--- a/arch/ia64/kvm/process.c
+++ b/arch/ia64/kvm/process.c
@@ -167,7 +167,6 @@ static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa)
 	return (rr1.val);
 }
 
-
 /*
  * Set vIFA & vITIR & vIHA, when vPSR.ic =1
  * Parameter:
@@ -222,8 +221,6 @@ void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
 	inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR);
 }
 
-
-
 /*
  * Data Nested TLB Fault
  *  @ Data Nested TLB Vector
@@ -245,7 +242,6 @@ void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr)
 	inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR);
 }
 
-
 /*
  * Data TLB Fault
  *  @ Data TLB vector
@@ -265,8 +261,6 @@ static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
 	/* If vPSR.ic, IFA, ITIR, IHA*/
 	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
 	inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR);
-
-
 }
 
 /*
@@ -279,7 +273,6 @@ void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
 	_vhpt_fault(vcpu, vadr);
 }
 
-
 /*
  * VHPT Data Fault
  *  @ VHPT Translation vector
@@ -290,8 +283,6 @@ void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
 	_vhpt_fault(vcpu, vadr);
 }
 
-
-
 /*
  * Deal with:
  *  General Exception vector
@@ -301,7 +292,6 @@ void _general_exception(struct kvm_vcpu *vcpu)
 	inject_guest_interruption(vcpu, IA64_GENEX_VECTOR);
 }
 
-
 /*
  * Illegal Operation Fault
  *  @ General Exception Vector
@@ -419,19 +409,16 @@ static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
 	inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR);
 }
 
-
 void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
 {
 	__page_not_present(vcpu, vadr);
 }
 
-
 void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
 {
 	__page_not_present(vcpu, vadr);
 }
 
-
 /* Deal with
  *  Data access rights vector
  */
@@ -703,7 +690,6 @@ void vhpi_detection(struct kvm_vcpu *vcpu)
 	}
 }
 
-
 void leave_hypervisor_tail(void)
 {
 	struct kvm_vcpu *v = current_vcpu;
@@ -737,7 +723,6 @@ void leave_hypervisor_tail(void)
 	}
 }
 
-
 static inline void handle_lds(struct kvm_pt_regs *regs)
 {
 	regs->cr_ipsr |= IA64_PSR_ED;
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index ecd526b5532..4d8be4c252f 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -112,7 +112,6 @@ void switch_to_physical_rid(struct kvm_vcpu *vcpu)
 	return;
 }
 
-
 void switch_to_virtual_rid(struct kvm_vcpu *vcpu)
 {
 	unsigned long psr;
@@ -166,8 +165,6 @@ void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr,
 	return;
 }
 
-
-
 /*
  * In physical mode, insert tc/tr for region 0 and 4 uses
  * RID[0] and RID[4] which is for physical mode emulation.
@@ -269,7 +266,6 @@ static inline unsigned long fph_index(struct kvm_pt_regs *regs,
 	return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
 }
 
-
 /*
  * The inverse of the above: given bspstore and the number of
  * registers, calculate ar.bsp.
@@ -1039,8 +1035,6 @@ u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr)
 	return key;
 }
 
-
-
 void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst)
 {
 	unsigned long thash, vadr;
@@ -1050,7 +1044,6 @@ void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst)
 	vcpu_set_gr(vcpu, inst.M46.r1, thash, 0);
 }
 
-
 void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst)
 {
 	unsigned long tag, vadr;
@@ -1131,7 +1124,6 @@ int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, u64 *padr)
 	return IA64_NO_FAULT;
 }
 
-
 int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst)
 {
 	unsigned long r1, r3;
@@ -1154,7 +1146,6 @@ void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst)
 	vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
 }
 
-
 /************************************
  * Insert/Purge translation register/cache
  ************************************/
@@ -1385,7 +1376,6 @@ void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
 	vcpu_set_itc(vcpu, r2);
 }
 
-
 void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
 {
 	unsigned long r1;
@@ -1393,8 +1383,9 @@ void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
 	r1 = vcpu_get_itc(vcpu);
 	vcpu_set_gr(vcpu, inst.M31.r1, r1, 0);
 }
+
 /**************************************************************************
-  struct kvm_vcpu*protection key register access routines
+  struct kvm_vcpu protection key register access routines
  **************************************************************************/
 
 unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg)
@@ -1407,20 +1398,6 @@ void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val)
 	ia64_set_pkr(reg, val);
 }
 
-
-unsigned long vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, unsigned long ifa)
-{
-	union ia64_rr rr, rr1;
-
-	rr.val = vcpu_get_rr(vcpu, ifa);
-	rr1.val = 0;
-	rr1.ps = rr.ps;
-	rr1.rid = rr.rid;
-	return (rr1.val);
-}
-
-
-
 /********************************
  * Moves to privileged registers
  ********************************/
@@ -1464,8 +1441,6 @@ unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg,
 	return (IA64_NO_FAULT);
 }
 
-
-
 void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst)
 {
 	unsigned long r3, r2;
@@ -1510,8 +1485,6 @@ void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst)
 	vcpu_set_pkr(vcpu, r3, r2);
 }
 
-
-
 void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst)
 {
 	unsigned long r3, r1;
@@ -1557,7 +1530,6 @@ void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst)
 	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
 }
 
-
 unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg)
 {
 	/* FIXME: This could get called as a result of a rsvd-reg fault */
@@ -1609,7 +1581,6 @@ unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst)
 	return 0;
 }
 
-
 unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst)
 {
 	unsigned long tgt = inst.M33.r1;
@@ -1633,8 +1604,6 @@ unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst)
 	return 0;
 }
 
-
-
 void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
 {
 
@@ -1776,9 +1745,6 @@ void vcpu_bsw1(struct kvm_vcpu *vcpu)
 	}
 }
 
-
-
-
 void vcpu_rfi(struct kvm_vcpu *vcpu)
 {
 	unsigned long ifs, psr;
@@ -1796,7 +1762,6 @@ void vcpu_rfi(struct kvm_vcpu *vcpu)
 	regs->cr_iip = VCPU(vcpu, iip);
 }
 
-
 /*
    VPSR can't keep track of below bits of guest PSR
    This function gets guest PSR
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
index 6b6307a3bd5..ac94867f826 100644
--- a/arch/ia64/kvm/vtlb.c
+++ b/arch/ia64/kvm/vtlb.c
@@ -509,7 +509,6 @@ void thash_purge_all(struct kvm_vcpu *v)
 	local_flush_tlb_all();
 }
 
-
 /*
  * Lookup the hash table and its collision chain to find an entry
  * covering this address rid:va or the entry.
@@ -517,7 +516,6 @@ void thash_purge_all(struct kvm_vcpu *v)
  * INPUT:
  *  in: TLB format for both VHPT & TLB.
  */
-
 struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data)
 {
 	struct thash_data  *cch;
@@ -547,7 +545,6 @@ struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data)
 	return NULL;
 }
 
-
 /*
  * Initialize internal control data before service.
  */
@@ -589,7 +586,6 @@ u64 kvm_gpa_to_mpa(u64 gpa)
 	return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK);
 }
 
-
 /*
  * Fetch guest bundle code.
  * INPUT:
@@ -631,7 +627,6 @@ int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle)
 	return IA64_NO_FAULT;
 }
 
-
 void kvm_init_vhpt(struct kvm_vcpu *v)
 {
 	v->arch.vhpt.num = VHPT_NUM_ENTRIES;
-- 
cgit v1.2.3


From a770f6f28b1a9287189f3dc8333eb694d9a2f0ab Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 21 Dec 2008 19:20:09 +0200
Subject: KVM: MMU: Inherit a shadow page's guest level count from vcpu setup

Instead of "calculating" it on every shadow page allocation, set it once
when switching modes, and copy it when allocating pages.

This doesn't buy us much, but sets up the stage for inheriting more
information related to the mmu setup.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/mmu.c              | 17 +++++++++++------
 2 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0a4dab25a91..28f875f28f5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -244,6 +244,7 @@ struct kvm_mmu {
 	hpa_t root_hpa;
 	int root_level;
 	int shadow_root_level;
+	union kvm_mmu_page_role base_role;
 
 	u64 *pae_root;
 };
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2d4477c7147..f15023c11fe 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1204,8 +1204,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	struct kvm_mmu_page *sp;
 	struct hlist_node *node, *tmp;
 
-	role.word = 0;
-	role.glevels = vcpu->arch.mmu.root_level;
+	role = vcpu->arch.mmu.base_role;
 	role.level = level;
 	role.metaphysical = metaphysical;
 	role.access = access;
@@ -2251,17 +2250,23 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 
 static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
 {
+	int r;
+
 	ASSERT(vcpu);
 	ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
 
 	if (!is_paging(vcpu))
-		return nonpaging_init_context(vcpu);
+		r = nonpaging_init_context(vcpu);
 	else if (is_long_mode(vcpu))
-		return paging64_init_context(vcpu);
+		r = paging64_init_context(vcpu);
 	else if (is_pae(vcpu))
-		return paging32E_init_context(vcpu);
+		r = paging32E_init_context(vcpu);
 	else
-		return paging32_init_context(vcpu);
+		r = paging32_init_context(vcpu);
+
+	vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level;
+
+	return r;
 }
 
 static int init_kvm_mmu(struct kvm_vcpu *vcpu)
-- 
cgit v1.2.3


From 2f0b3d60b2c43aef7cd10169c425c052169c622a Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 21 Dec 2008 19:27:36 +0200
Subject: KVM: MMU: Segregate mmu pages created with different cr4.pge settings

Don't allow a vcpu with cr4.pge cleared to use a shadow page created with
cr4.pge set; this might cause a cr3 switch not to sync ptes that have the
global bit set (the global bit has no effect if !cr4.pge).

This can only occur on smp with different cr4.pge settings for different
vcpus (since a cr4 change will resync the shadow ptes), but there's no
cost to being correct here.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 +
 arch/x86/kvm/x86.c              | 1 +
 2 files changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 28f875f28f5..c2a01d0513f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -183,6 +183,7 @@ union kvm_mmu_page_role {
 		unsigned metaphysical:1;
 		unsigned access:3;
 		unsigned invalid:1;
+		unsigned cr4_pge:1;
 	};
 };
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2477e87b2f8..873602b5edf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -364,6 +364,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 	}
 	kvm_x86_ops->set_cr4(vcpu, cr4);
 	vcpu->arch.cr4 = cr4;
+	vcpu->arch.mmu.base_role.cr4_pge = !!(cr4 & X86_CR4_PGE);
 	kvm_mmu_sync_global(vcpu);
 	kvm_mmu_reset_context(vcpu);
 }
-- 
cgit v1.2.3


From e2078318042569682d0496cfd7bd962a766e82b1 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 21 Dec 2008 19:36:59 +0200
Subject: KVM: MMU: Initialize a shadow page's global attribute from cr4.pge

If cr4.pge is cleared, we ought to treat any ptes in the page as non-global.
This allows us to remove the check from set_spte().

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f15023c11fe..5f03ec324e3 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -797,7 +797,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
 	ASSERT(is_empty_shadow_page(sp->spt));
 	bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
 	sp->multimapped = 0;
-	sp->global = 1;
 	sp->parent_pte = parent_pte;
 	--vcpu->kvm->arch.n_free_mmu_pages;
 	return sp;
@@ -1241,6 +1240,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word);
 	sp->gfn = gfn;
 	sp->role = role;
+	sp->global = role.cr4_pge;
 	hlist_add_head(&sp->hash_link, bucket);
 	if (!metaphysical) {
 		if (rmap_write_protect(vcpu->kvm, gfn))
@@ -1668,8 +1668,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 	u64 mt_mask = shadow_mt_mask;
 	struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
 
-	if (!(vcpu->arch.cr4 & X86_CR4_PGE))
-		global = 0;
 	if (!global && sp->global) {
 		sp->global = 0;
 		if (sp->unsync) {
-- 
cgit v1.2.3


From 2b3d2a206037b1471de6a6dc51427af034cfdb47 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 23 Dec 2008 19:46:01 +0200
Subject: KVM: Fix vmload and friends misinterpreted as lidt

The AMD SVM instruction family all overload the 0f 01 /3 opcode, further
multiplexing on the three r/m bits.  But the code decided that anything that
isn't a vmmcall must be an lidt (which shares the 0f 01 /3 opcode, for the
case that mod = 3).

Fix by aborting emulation if this isn't a vmmcall.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86_emulate.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index d174db7a337..54fb09889a8 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1908,11 +1908,16 @@ twobyte_insn:
 			c->dst.type = OP_NONE;
 			break;
 		case 3: /* lidt/vmmcall */
-			if (c->modrm_mod == 3 && c->modrm_rm == 1) {
-				rc = kvm_fix_hypercall(ctxt->vcpu);
-				if (rc)
-					goto done;
-				kvm_emulate_hypercall(ctxt->vcpu);
+			if (c->modrm_mod == 3) {
+				switch (c->modrm_rm) {
+				case 1:
+					rc = kvm_fix_hypercall(ctxt->vcpu);
+					if (rc)
+						goto done;
+					break;
+				default:
+					goto cannot_emulate;
+				}
 			} else {
 				rc = read_descriptor(ctxt, ops, c->src.ptr,
 						     &size, &address,
-- 
cgit v1.2.3


From 2d11123a77e54d5cea262c958e8498f4a08bce3d Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 25 Dec 2008 14:39:47 +0200
Subject: KVM: MMU: Add for_each_shadow_entry(), a simpler alternative to
 walk_shadow()

Using a for_each loop style removes the need to write callback and nasty
casts.

Implement the walk_shadow() using the for_each_shadow_entry().

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 69 ++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 49 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 5f03ec324e3..c669f2af1d1 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -150,6 +150,20 @@ struct kvm_shadow_walk {
 		     u64 addr, u64 *spte, int level);
 };
 
+struct kvm_shadow_walk_iterator {
+	u64 addr;
+	hpa_t shadow_addr;
+	int level;
+	u64 *sptep;
+	unsigned index;
+};
+
+#define for_each_shadow_entry(_vcpu, _addr, _walker)    \
+	for (shadow_walk_init(&(_walker), _vcpu, _addr);	\
+	     shadow_walk_okay(&(_walker));			\
+	     shadow_walk_next(&(_walker)))
+
+
 struct kvm_unsync_walk {
 	int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk);
 };
@@ -1254,33 +1268,48 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	return sp;
 }
 
+static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
+			     struct kvm_vcpu *vcpu, u64 addr)
+{
+	iterator->addr = addr;
+	iterator->shadow_addr = vcpu->arch.mmu.root_hpa;
+	iterator->level = vcpu->arch.mmu.shadow_root_level;
+	if (iterator->level == PT32E_ROOT_LEVEL) {
+		iterator->shadow_addr
+			= vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
+		iterator->shadow_addr &= PT64_BASE_ADDR_MASK;
+		--iterator->level;
+		if (!iterator->shadow_addr)
+			iterator->level = 0;
+	}
+}
+
+static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
+{
+	if (iterator->level < PT_PAGE_TABLE_LEVEL)
+		return false;
+	iterator->index = SHADOW_PT_INDEX(iterator->addr, iterator->level);
+	iterator->sptep	= ((u64 *)__va(iterator->shadow_addr)) + iterator->index;
+	return true;
+}
+
+static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
+{
+	iterator->shadow_addr = *iterator->sptep & PT64_BASE_ADDR_MASK;
+	--iterator->level;
+}
+
 static int walk_shadow(struct kvm_shadow_walk *walker,
 		       struct kvm_vcpu *vcpu, u64 addr)
 {
-	hpa_t shadow_addr;
-	int level;
+	struct kvm_shadow_walk_iterator iterator;
 	int r;
-	u64 *sptep;
-	unsigned index;
-
-	shadow_addr = vcpu->arch.mmu.root_hpa;
-	level = vcpu->arch.mmu.shadow_root_level;
-	if (level == PT32E_ROOT_LEVEL) {
-		shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
-		shadow_addr &= PT64_BASE_ADDR_MASK;
-		if (!shadow_addr)
-			return 1;
-		--level;
-	}
 
-	while (level >= PT_PAGE_TABLE_LEVEL) {
-		index = SHADOW_PT_INDEX(addr, level);
-		sptep = ((u64 *)__va(shadow_addr)) + index;
-		r = walker->entry(walker, vcpu, addr, sptep, level);
+	for_each_shadow_entry(vcpu, addr, iterator) {
+		r = walker->entry(walker, vcpu, addr,
+				  iterator.sptep, iterator.level);
 		if (r)
 			return r;
-		shadow_addr = *sptep & PT64_BASE_ADDR_MASK;
-		--level;
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From 9f652d21c3f887075a33abae85bf53fec64e67b1 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 25 Dec 2008 14:54:25 +0200
Subject: KVM: MMU: Use for_each_shadow_entry() in __direct_map()

Eliminating a callback and a useless structure.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 83 +++++++++++++++++++-----------------------------------
 1 file changed, 29 insertions(+), 54 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index c669f2af1d1..a25e1adb5cf 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1846,67 +1846,42 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
 {
 }
 
-struct direct_shadow_walk {
-	struct kvm_shadow_walk walker;
-	pfn_t pfn;
-	int write;
-	int largepage;
-	int pt_write;
-};
-
-static int direct_map_entry(struct kvm_shadow_walk *_walk,
-			    struct kvm_vcpu *vcpu,
-			    u64 addr, u64 *sptep, int level)
+static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
+			int largepage, gfn_t gfn, pfn_t pfn)
 {
-	struct direct_shadow_walk *walk =
-		container_of(_walk, struct direct_shadow_walk, walker);
+	struct kvm_shadow_walk_iterator iterator;
 	struct kvm_mmu_page *sp;
+	int pt_write = 0;
 	gfn_t pseudo_gfn;
-	gfn_t gfn = addr >> PAGE_SHIFT;
-
-	if (level == PT_PAGE_TABLE_LEVEL
-	    || (walk->largepage && level == PT_DIRECTORY_LEVEL)) {
-		mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL,
-			     0, walk->write, 1, &walk->pt_write,
-			     walk->largepage, 0, gfn, walk->pfn, false);
-		++vcpu->stat.pf_fixed;
-		return 1;
-	}
 
-	if (*sptep == shadow_trap_nonpresent_pte) {
-		pseudo_gfn = (addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
-		sp = kvm_mmu_get_page(vcpu, pseudo_gfn, (gva_t)addr, level - 1,
-				      1, ACC_ALL, sptep);
-		if (!sp) {
-			pgprintk("nonpaging_map: ENOMEM\n");
-			kvm_release_pfn_clean(walk->pfn);
-			return -ENOMEM;
+	for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
+		if (iterator.level == PT_PAGE_TABLE_LEVEL
+		    || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) {
+			mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
+				     0, write, 1, &pt_write,
+				     largepage, 0, gfn, pfn, false);
+			++vcpu->stat.pf_fixed;
+			break;
 		}
 
-		set_shadow_pte(sptep,
-			       __pa(sp->spt)
-			       | PT_PRESENT_MASK | PT_WRITABLE_MASK
-			       | shadow_user_mask | shadow_x_mask);
-	}
-	return 0;
-}
+		if (*iterator.sptep == shadow_trap_nonpresent_pte) {
+			pseudo_gfn = (iterator.addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
+			sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
+					      iterator.level - 1,
+					      1, ACC_ALL, iterator.sptep);
+			if (!sp) {
+				pgprintk("nonpaging_map: ENOMEM\n");
+				kvm_release_pfn_clean(pfn);
+				return -ENOMEM;
+			}
 
-static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
-			int largepage, gfn_t gfn, pfn_t pfn)
-{
-	int r;
-	struct direct_shadow_walk walker = {
-		.walker = { .entry = direct_map_entry, },
-		.pfn = pfn,
-		.largepage = largepage,
-		.write = write,
-		.pt_write = 0,
-	};
-
-	r = walk_shadow(&walker.walker, vcpu, gfn << PAGE_SHIFT);
-	if (r < 0)
-		return r;
-	return walker.pt_write;
+			set_shadow_pte(iterator.sptep,
+				       __pa(sp->spt)
+				       | PT_PRESENT_MASK | PT_WRITABLE_MASK
+				       | shadow_user_mask | shadow_x_mask);
+		}
+	}
+	return pt_write;
 }
 
 static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
-- 
cgit v1.2.3


From e7a04c99b54ad9acb98a56113ec3163bc1039e13 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 25 Dec 2008 15:10:50 +0200
Subject: KVM: MMU: Replace walk_shadow() by for_each_shadow_entry() in fetch()

Effectively reverting to the pre walk_shadow() version -- but now
with the reusable for_each().

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/paging_tmpl.h | 128 ++++++++++++++++++++-------------------------
 1 file changed, 58 insertions(+), 70 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 9fd78b6e17a..69c7e3311b8 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -283,91 +283,79 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
 /*
  * Fetch a shadow pte for a specific level in the paging hierarchy.
  */
-static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
-				    struct kvm_vcpu *vcpu, u64 addr,
-				    u64 *sptep, int level)
+static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
+			 struct guest_walker *gw,
+			 int user_fault, int write_fault, int largepage,
+			 int *ptwrite, pfn_t pfn)
 {
-	struct shadow_walker *sw =
-		container_of(_sw, struct shadow_walker, walker);
-	struct guest_walker *gw = sw->guest_walker;
 	unsigned access = gw->pt_access;
 	struct kvm_mmu_page *shadow_page;
-	u64 spte;
+	u64 spte, *sptep;
 	int metaphysical;
 	gfn_t table_gfn;
 	int r;
+	int level;
 	pt_element_t curr_pte;
+	struct kvm_shadow_walk_iterator iterator;
 
-	if (level == PT_PAGE_TABLE_LEVEL
-	    || (sw->largepage && level == PT_DIRECTORY_LEVEL)) {
-		mmu_set_spte(vcpu, sptep, access, gw->pte_access & access,
-			     sw->user_fault, sw->write_fault,
-			     gw->ptes[gw->level-1] & PT_DIRTY_MASK,
-			     sw->ptwrite, sw->largepage,
-			     gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
-			     gw->gfn, sw->pfn, false);
-		sw->sptep = sptep;
-		return 1;
-	}
-
-	if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep))
-		return 0;
-
-	if (is_large_pte(*sptep)) {
-		set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
-		kvm_flush_remote_tlbs(vcpu->kvm);
-		rmap_remove(vcpu->kvm, sptep);
-	}
+	if (!is_present_pte(gw->ptes[gw->level - 1]))
+		return NULL;
 
-	if (level == PT_DIRECTORY_LEVEL && gw->level == PT_DIRECTORY_LEVEL) {
-		metaphysical = 1;
-		if (!is_dirty_pte(gw->ptes[level - 1]))
-			access &= ~ACC_WRITE_MASK;
-		table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
-	} else {
-		metaphysical = 0;
-		table_gfn = gw->table_gfn[level - 2];
-	}
-	shadow_page = kvm_mmu_get_page(vcpu, table_gfn, (gva_t)addr, level-1,
-				       metaphysical, access, sptep);
-	if (!metaphysical) {
-		r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2],
-					  &curr_pte, sizeof(curr_pte));
-		if (r || curr_pte != gw->ptes[level - 2]) {
-			kvm_mmu_put_page(shadow_page, sptep);
-			kvm_release_pfn_clean(sw->pfn);
-			sw->sptep = NULL;
-			return 1;
+	for_each_shadow_entry(vcpu, addr, iterator) {
+		level = iterator.level;
+		sptep = iterator.sptep;
+		if (level == PT_PAGE_TABLE_LEVEL
+		    || (largepage && level == PT_DIRECTORY_LEVEL)) {
+			mmu_set_spte(vcpu, sptep, access,
+				     gw->pte_access & access,
+				     user_fault, write_fault,
+				     gw->ptes[gw->level-1] & PT_DIRTY_MASK,
+				     ptwrite, largepage,
+				     gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
+				     gw->gfn, pfn, false);
+			break;
 		}
-	}
 
-	spte = __pa(shadow_page->spt) | PT_PRESENT_MASK | PT_ACCESSED_MASK
-		| PT_WRITABLE_MASK | PT_USER_MASK;
-	*sptep = spte;
-	return 0;
-}
+		if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep))
+			continue;
 
-static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
-			 struct guest_walker *guest_walker,
-			 int user_fault, int write_fault, int largepage,
-			 int *ptwrite, pfn_t pfn)
-{
-	struct shadow_walker walker = {
-		.walker = { .entry = FNAME(shadow_walk_entry), },
-		.guest_walker = guest_walker,
-		.user_fault = user_fault,
-		.write_fault = write_fault,
-		.largepage = largepage,
-		.ptwrite = ptwrite,
-		.pfn = pfn,
-	};
+		if (is_large_pte(*sptep)) {
+			set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
+			kvm_flush_remote_tlbs(vcpu->kvm);
+			rmap_remove(vcpu->kvm, sptep);
+		}
 
-	if (!is_present_pte(guest_walker->ptes[guest_walker->level - 1]))
-		return NULL;
+		if (level == PT_DIRECTORY_LEVEL
+		    && gw->level == PT_DIRECTORY_LEVEL) {
+			metaphysical = 1;
+			if (!is_dirty_pte(gw->ptes[level - 1]))
+				access &= ~ACC_WRITE_MASK;
+			table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
+		} else {
+			metaphysical = 0;
+			table_gfn = gw->table_gfn[level - 2];
+		}
+		shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
+					       metaphysical, access, sptep);
+		if (!metaphysical) {
+			r = kvm_read_guest_atomic(vcpu->kvm,
+						  gw->pte_gpa[level - 2],
+						  &curr_pte, sizeof(curr_pte));
+			if (r || curr_pte != gw->ptes[level - 2]) {
+				kvm_mmu_put_page(shadow_page, sptep);
+				kvm_release_pfn_clean(pfn);
+				sptep = NULL;
+				break;
+			}
+		}
 
-	walk_shadow(&walker.walker, vcpu, addr);
+		spte = __pa(shadow_page->spt)
+			| PT_PRESENT_MASK | PT_ACCESSED_MASK
+			| PT_WRITABLE_MASK | PT_USER_MASK;
+		*sptep = spte;
+	}
 
-	return walker.sptep;
+	return sptep;
 }
 
 /*
-- 
cgit v1.2.3


From a461930bc3cece021f8f89a80dcc1d0691a92b52 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 25 Dec 2008 15:19:00 +0200
Subject: KVM: MMU: Replace walk_shadow() by for_each_shadow_entry() in
 invlpg()

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/paging_tmpl.h | 81 ++++++++++++++++++----------------------------
 1 file changed, 32 insertions(+), 49 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 69c7e3311b8..46b68f941f6 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -25,7 +25,6 @@
 #if PTTYPE == 64
 	#define pt_element_t u64
 	#define guest_walker guest_walker64
-	#define shadow_walker shadow_walker64
 	#define FNAME(name) paging##64_##name
 	#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
 	#define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
@@ -42,7 +41,6 @@
 #elif PTTYPE == 32
 	#define pt_element_t u32
 	#define guest_walker guest_walker32
-	#define shadow_walker shadow_walker32
 	#define FNAME(name) paging##32_##name
 	#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
 	#define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
@@ -73,18 +71,6 @@ struct guest_walker {
 	u32 error_code;
 };
 
-struct shadow_walker {
-	struct kvm_shadow_walk walker;
-	struct guest_walker *guest_walker;
-	int user_fault;
-	int write_fault;
-	int largepage;
-	int *ptwrite;
-	pfn_t pfn;
-	u64 *sptep;
-	gpa_t pte_gpa;
-};
-
 static gfn_t gpte_to_gfn(pt_element_t gpte)
 {
 	return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
@@ -453,54 +439,52 @@ out_unlock:
 	return 0;
 }
 
-static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
-				      struct kvm_vcpu *vcpu, u64 addr,
-				      u64 *sptep, int level)
+static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 {
-	struct shadow_walker *sw =
-		container_of(_sw, struct shadow_walker, walker);
+	struct kvm_shadow_walk_iterator iterator;
+	pt_element_t gpte;
+	gpa_t pte_gpa = -1;
+	int level;
+	u64 *sptep;
+
+	spin_lock(&vcpu->kvm->mmu_lock);
 
-	/* FIXME: properly handle invlpg on large guest pages */
-	if (level == PT_PAGE_TABLE_LEVEL ||
-	    ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
-		struct kvm_mmu_page *sp = page_header(__pa(sptep));
+	for_each_shadow_entry(vcpu, gva, iterator) {
+		level = iterator.level;
+		sptep = iterator.sptep;
 
-		sw->pte_gpa = (sp->gfn << PAGE_SHIFT);
-		sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
+		/* FIXME: properly handle invlpg on large guest pages */
+		if (level == PT_PAGE_TABLE_LEVEL ||
+		    ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
+			struct kvm_mmu_page *sp = page_header(__pa(sptep));
 
-		if (is_shadow_present_pte(*sptep)) {
-			rmap_remove(vcpu->kvm, sptep);
-			if (is_large_pte(*sptep))
-				--vcpu->kvm->stat.lpages;
+			pte_gpa = (sp->gfn << PAGE_SHIFT);
+			pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
+
+			if (is_shadow_present_pte(*sptep)) {
+				rmap_remove(vcpu->kvm, sptep);
+				if (is_large_pte(*sptep))
+					--vcpu->kvm->stat.lpages;
+			}
+			set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
+			break;
 		}
-		set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
-		return 1;
-	}
-	if (!is_shadow_present_pte(*sptep))
-		return 1;
-	return 0;
-}
 
-static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
-{
-	pt_element_t gpte;
-	struct shadow_walker walker = {
-		.walker = { .entry = FNAME(shadow_invlpg_entry), },
-		.pte_gpa = -1,
-	};
+		if (!is_shadow_present_pte(*sptep))
+			break;
+	}
 
-	spin_lock(&vcpu->kvm->mmu_lock);
-	walk_shadow(&walker.walker, vcpu, gva);
 	spin_unlock(&vcpu->kvm->mmu_lock);
-	if (walker.pte_gpa == -1)
+
+	if (pte_gpa == -1)
 		return;
-	if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte,
+	if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
 				  sizeof(pt_element_t)))
 		return;
 	if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
 		if (mmu_topup_memory_caches(vcpu))
 			return;
-		kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte,
+		kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte,
 				  sizeof(pt_element_t), 0);
 	}
 }
@@ -607,7 +591,6 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
 #undef pt_element_t
 #undef guest_walker
-#undef shadow_walker
 #undef FNAME
 #undef PT_BASE_ADDR_MASK
 #undef PT_INDEX
-- 
cgit v1.2.3


From e8c4a4e8a7cc047dfb3c26b2cbc8599ad3460364 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 25 Dec 2008 15:20:07 +0200
Subject: KVM: MMU: Drop walk_shadow()

No longer used.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a25e1adb5cf..15850809b55 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -145,11 +145,6 @@ struct kvm_rmap_desc {
 	struct kvm_rmap_desc *more;
 };
 
-struct kvm_shadow_walk {
-	int (*entry)(struct kvm_shadow_walk *walk, struct kvm_vcpu *vcpu,
-		     u64 addr, u64 *spte, int level);
-};
-
 struct kvm_shadow_walk_iterator {
 	u64 addr;
 	hpa_t shadow_addr;
@@ -1299,21 +1294,6 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
 	--iterator->level;
 }
 
-static int walk_shadow(struct kvm_shadow_walk *walker,
-		       struct kvm_vcpu *vcpu, u64 addr)
-{
-	struct kvm_shadow_walk_iterator iterator;
-	int r;
-
-	for_each_shadow_entry(vcpu, addr, iterator) {
-		r = walker->entry(walker, vcpu, addr,
-				  iterator.sptep, iterator.level);
-		if (r)
-			return r;
-	}
-	return 0;
-}
-
 static void kvm_mmu_page_unlink_children(struct kvm *kvm,
 					 struct kvm_mmu_page *sp)
 {
-- 
cgit v1.2.3


From 53f658b3c33616a4997ee254311b335e59063289 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Thu, 11 Dec 2008 20:45:05 +0100
Subject: KVM: VMX: initialize TSC offset relative to vm creation time

VMX initializes the TSC offset for each vcpu at different times, and
also reinitializes it for vcpus other than 0 on APIC SIPI message.

This bug causes the TSC's to appear unsynchronized in the guest, even if
the host is good.

Older Linux kernels don't handle the situation very well, so
gettimeofday is likely to go backwards in time:

http://www.mail-archive.com/kvm@vger.kernel.org/msg02955.html
http://sourceforge.net/tracker/index.php?func=detail&aid=2025534&group_id=180599&atid=893831

Fix it by initializating the offset of each vcpu relative to vm creation
time, and moving it from vmx_vcpu_reset to vmx_vcpu_setup, out of the
APIC MP init path.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/vmx.c              | 19 +++++++++++--------
 arch/x86/kvm/x86.c              |  2 ++
 3 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c2a01d0513f..9efc446b5ac 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -398,6 +398,7 @@ struct kvm_arch{
 
 	unsigned long irq_sources_bitmap;
 	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
+	u64 vm_init_tsc;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index cee81c9a665..3312047664a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -865,11 +865,8 @@ static u64 guest_read_tsc(void)
  * writes 'guest_tsc' into guest's timestamp counter "register"
  * guest_tsc = host_tsc + tsc_offset ==> tsc_offset = guest_tsc - host_tsc
  */
-static void guest_write_tsc(u64 guest_tsc)
+static void guest_write_tsc(u64 guest_tsc, u64 host_tsc)
 {
-	u64 host_tsc;
-
-	rdtscll(host_tsc);
 	vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc);
 }
 
@@ -934,6 +931,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct kvm_msr_entry *msr;
+	u64 host_tsc;
 	int ret = 0;
 
 	switch (msr_index) {
@@ -959,7 +957,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 		vmcs_writel(GUEST_SYSENTER_ESP, data);
 		break;
 	case MSR_IA32_TIME_STAMP_COUNTER:
-		guest_write_tsc(data);
+		rdtscll(host_tsc);
+		guest_write_tsc(data, host_tsc);
 		break;
 	case MSR_P6_PERFCTR0:
 	case MSR_P6_PERFCTR1:
@@ -2109,7 +2108,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 {
 	u32 host_sysenter_cs, msr_low, msr_high;
 	u32 junk;
-	u64 host_pat;
+	u64 host_pat, tsc_this, tsc_base;
 	unsigned long a;
 	struct descriptor_table dt;
 	int i;
@@ -2237,6 +2236,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
 	vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
 
+	tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc;
+	rdtscll(tsc_this);
+	if (tsc_this < vmx->vcpu.kvm->arch.vm_init_tsc)
+		tsc_base = tsc_this;
+
+	guest_write_tsc(0, tsc_base);
 
 	return 0;
 }
@@ -2328,8 +2333,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 	vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
 	vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
 
-	guest_write_tsc(0);
-
 	/* Special registers */
 	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 873602b5edf..3b2acfd72d7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4170,6 +4170,8 @@ struct  kvm *kvm_arch_create_vm(void)
 	/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
 	set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
 
+	rdtscll(kvm->arch.vm_init_tsc);
+
 	return kvm;
 }
 
-- 
cgit v1.2.3


From 77c2002e7c6f019f59a6f3cc5f8b16b41748dbe1 Mon Sep 17 00:00:00 2001
From: Izik Eidus <ieidus@redhat.com>
Date: Mon, 29 Dec 2008 01:42:19 +0200
Subject: KVM: introduce kvm_read_guest_virt, kvm_write_guest_virt

This commit change the name of emulator_read_std into kvm_read_guest_virt,
and add new function name kvm_write_guest_virt that allow writing into a
guest virtual address.

Signed-off-by: Izik Eidus <ieidus@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  4 ---
 arch/x86/kvm/x86.c              | 56 ++++++++++++++++++++++++++++++-----------
 2 files changed, 42 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9efc446b5ac..b74576aec19 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -609,10 +609,6 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu);
 
 void fx_init(struct kvm_vcpu *vcpu);
 
-int emulator_read_std(unsigned long addr,
-		      void *val,
-		      unsigned int bytes,
-		      struct kvm_vcpu *vcpu);
 int emulator_write_emulated(unsigned long addr,
 			    const void *val,
 			    unsigned int bytes,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3b2acfd72d7..67f91764e99 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1976,10 +1976,8 @@ static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
 	return dev;
 }
 
-int emulator_read_std(unsigned long addr,
-			     void *val,
-			     unsigned int bytes,
-			     struct kvm_vcpu *vcpu)
+int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
+			struct kvm_vcpu *vcpu)
 {
 	void *data = val;
 	int r = X86EMUL_CONTINUE;
@@ -1987,27 +1985,57 @@ int emulator_read_std(unsigned long addr,
 	while (bytes) {
 		gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
 		unsigned offset = addr & (PAGE_SIZE-1);
-		unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset);
+		unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
 		int ret;
 
 		if (gpa == UNMAPPED_GVA) {
 			r = X86EMUL_PROPAGATE_FAULT;
 			goto out;
 		}
-		ret = kvm_read_guest(vcpu->kvm, gpa, data, tocopy);
+		ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
 		if (ret < 0) {
 			r = X86EMUL_UNHANDLEABLE;
 			goto out;
 		}
 
-		bytes -= tocopy;
-		data += tocopy;
-		addr += tocopy;
+		bytes -= toread;
+		data += toread;
+		addr += toread;
 	}
 out:
 	return r;
 }
-EXPORT_SYMBOL_GPL(emulator_read_std);
+
+int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
+			 struct kvm_vcpu *vcpu)
+{
+	void *data = val;
+	int r = X86EMUL_CONTINUE;
+
+	while (bytes) {
+		gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+		unsigned offset = addr & (PAGE_SIZE-1);
+		unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
+		int ret;
+
+		if (gpa == UNMAPPED_GVA) {
+			r = X86EMUL_PROPAGATE_FAULT;
+			goto out;
+		}
+		ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
+		if (ret < 0) {
+			r = X86EMUL_UNHANDLEABLE;
+			goto out;
+		}
+
+		bytes -= towrite;
+		data += towrite;
+		addr += towrite;
+	}
+out:
+	return r;
+}
+
 
 static int emulator_read_emulated(unsigned long addr,
 				  void *val,
@@ -2029,8 +2057,8 @@ static int emulator_read_emulated(unsigned long addr,
 	if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
 		goto mmio;
 
-	if (emulator_read_std(addr, val, bytes, vcpu)
-			== X86EMUL_CONTINUE)
+	if (kvm_read_guest_virt(addr, val, bytes, vcpu)
+				== X86EMUL_CONTINUE)
 		return X86EMUL_CONTINUE;
 	if (gpa == UNMAPPED_GVA)
 		return X86EMUL_PROPAGATE_FAULT;
@@ -2233,7 +2261,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
 
 	rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
 
-	emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);
+	kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu);
 
 	printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
 	       context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
@@ -2241,7 +2269,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
 EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
 
 static struct x86_emulate_ops emulate_ops = {
-	.read_std            = emulator_read_std,
+	.read_std            = kvm_read_guest_virt,
 	.read_emulated       = emulator_read_emulated,
 	.write_emulated      = emulator_write_emulated,
 	.cmpxchg_emulated    = emulator_cmpxchg_emulated,
-- 
cgit v1.2.3


From 0f346074403bc109f9569f14b45cb09e83729032 Mon Sep 17 00:00:00 2001
From: Izik Eidus <ieidus@redhat.com>
Date: Mon, 29 Dec 2008 01:42:20 +0200
Subject: KVM: remove the vmap usage

vmap() on guest pages hides those pages from the Linux mm for an extended
(userspace determined) amount of time.  Get rid of it.

Signed-off-by: Izik Eidus <ieidus@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 62 ++++++++++++------------------------------------------
 1 file changed, 13 insertions(+), 49 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 67f91764e99..2a4f3a69734 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2371,40 +2371,19 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
 }
 EXPORT_SYMBOL_GPL(emulate_instruction);
 
-static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(vcpu->arch.pio.guest_pages); ++i)
-		if (vcpu->arch.pio.guest_pages[i]) {
-			kvm_release_page_dirty(vcpu->arch.pio.guest_pages[i]);
-			vcpu->arch.pio.guest_pages[i] = NULL;
-		}
-}
-
 static int pio_copy_data(struct kvm_vcpu *vcpu)
 {
 	void *p = vcpu->arch.pio_data;
-	void *q;
+	gva_t q = vcpu->arch.pio.guest_gva;
 	unsigned bytes;
-	int nr_pages = vcpu->arch.pio.guest_pages[1] ? 2 : 1;
+	int ret;
 
-	q = vmap(vcpu->arch.pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
-		 PAGE_KERNEL);
-	if (!q) {
-		free_pio_guest_pages(vcpu);
-		return -ENOMEM;
-	}
-	q += vcpu->arch.pio.guest_page_offset;
 	bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
 	if (vcpu->arch.pio.in)
-		memcpy(q, p, bytes);
+		ret = kvm_write_guest_virt(q, p, bytes, vcpu);
 	else
-		memcpy(p, q, bytes);
-	q -= vcpu->arch.pio.guest_page_offset;
-	vunmap(q);
-	free_pio_guest_pages(vcpu);
-	return 0;
+		ret = kvm_read_guest_virt(q, p, bytes, vcpu);
+	return ret;
 }
 
 int complete_pio(struct kvm_vcpu *vcpu)
@@ -2515,7 +2494,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 	vcpu->arch.pio.in = in;
 	vcpu->arch.pio.string = 0;
 	vcpu->arch.pio.down = 0;
-	vcpu->arch.pio.guest_page_offset = 0;
 	vcpu->arch.pio.rep = 0;
 
 	if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
@@ -2543,9 +2521,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 		  gva_t address, int rep, unsigned port)
 {
 	unsigned now, in_page;
-	int i, ret = 0;
-	int nr_pages = 1;
-	struct page *page;
+	int ret = 0;
 	struct kvm_io_device *pio_dev;
 
 	vcpu->run->exit_reason = KVM_EXIT_IO;
@@ -2557,7 +2533,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 	vcpu->arch.pio.in = in;
 	vcpu->arch.pio.string = 1;
 	vcpu->arch.pio.down = down;
-	vcpu->arch.pio.guest_page_offset = offset_in_page(address);
 	vcpu->arch.pio.rep = rep;
 
 	if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
@@ -2577,15 +2552,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 	else
 		in_page = offset_in_page(address) + size;
 	now = min(count, (unsigned long)in_page / size);
-	if (!now) {
-		/*
-		 * String I/O straddles page boundary.  Pin two guest pages
-		 * so that we satisfy atomicity constraints.  Do just one
-		 * transaction to avoid complexity.
-		 */
-		nr_pages = 2;
+	if (!now)
 		now = 1;
-	}
 	if (down) {
 		/*
 		 * String I/O in reverse.  Yuck.  Kill the guest, fix later.
@@ -2600,15 +2568,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 	if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count)
 		kvm_x86_ops->skip_emulated_instruction(vcpu);
 
-	for (i = 0; i < nr_pages; ++i) {
-		page = gva_to_page(vcpu, address + i * PAGE_SIZE);
-		vcpu->arch.pio.guest_pages[i] = page;
-		if (!page) {
-			kvm_inject_gp(vcpu, 0);
-			free_pio_guest_pages(vcpu);
-			return 1;
-		}
-	}
+	vcpu->arch.pio.guest_gva = address;
 
 	pio_dev = vcpu_find_pio_dev(vcpu, port,
 				    vcpu->arch.pio.cur_count,
@@ -2616,7 +2576,11 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 	if (!vcpu->arch.pio.in) {
 		/* string PIO write */
 		ret = pio_copy_data(vcpu);
-		if (ret >= 0 && pio_dev) {
+		if (ret == X86EMUL_PROPAGATE_FAULT) {
+			kvm_inject_gp(vcpu, 0);
+			return 1;
+		}
+		if (ret == 0 && pio_dev) {
 			pio_string_write(pio_dev, vcpu);
 			complete_pio(vcpu);
 			if (vcpu->arch.pio.count == 0)
-- 
cgit v1.2.3


From 61a6bd672bda3b9468bf5895c1be085c4e481138 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 29 Dec 2008 17:32:28 +0200
Subject: KVM: Fallback support for MSR_VM_HSAVE_PA

Since we advertise MSR_VM_HSAVE_PA, userspace will attempt to read it
even on Intel.  Implement fake support for this MSR to avoid the
warnings.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2a4f3a69734..c3fbe8c55c1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -742,6 +742,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		break;
 	case MSR_IA32_UCODE_REV:
 	case MSR_IA32_UCODE_WRITE:
+	case MSR_VM_HSAVE_PA:
 		break;
 	case 0x200 ... 0x2ff:
 		return set_msr_mtrr(vcpu, msr, data);
@@ -863,6 +864,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case MSR_IA32_LASTBRANCHTOIP:
 	case MSR_IA32_LASTINTFROMIP:
 	case MSR_IA32_LASTINTTOIP:
+	case MSR_VM_HSAVE_PA:
 		data = 0;
 		break;
 	case MSR_MTRRcap:
-- 
cgit v1.2.3


From 52d939a0bf44081bc9f69b4fbdc9e7f416df27c7 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 30 Dec 2008 15:55:06 -0200
Subject: KVM: PIT: provide an option to disable interrupt reinjection

Certain clocks (such as TSC) in older 2.6 guests overaccount for lost
ticks, causing severe time drift. Interrupt reinjection magnifies the
problem.

Provide an option to disable it.

[avi: allow room for expansion in case we want to disable reinjection
      of other timers]

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm.h |  5 +++++
 arch/x86/kvm/i8254.c       |  4 ++++
 arch/x86/kvm/i8254.h       |  1 +
 arch/x86/kvm/x86.c         | 21 +++++++++++++++++++++
 4 files changed, 31 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 32eb96c7ca2..54bcf228152 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -233,4 +233,9 @@ struct kvm_guest_debug_arch {
 struct kvm_pit_state {
 	struct kvm_pit_channel_state channels[3];
 };
+
+struct kvm_reinject_control {
+	__u8 pit_reinject;
+	__u8 reserved[31];
+};
 #endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 72bd275a9b5..528daadeba4 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -201,6 +201,9 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
 	if (!atomic_inc_and_test(&pt->pending))
 		set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
 
+	if (!pt->reinject)
+		atomic_set(&pt->pending, 1);
+
 	if (vcpu0 && waitqueue_active(&vcpu0->wq))
 		wake_up_interruptible(&vcpu0->wq);
 
@@ -580,6 +583,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
 	pit_state->irq_ack_notifier.gsi = 0;
 	pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
 	kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
+	pit_state->pit_timer.reinject = true;
 	mutex_unlock(&pit->pit_state.lock);
 
 	kvm_pit_reset(pit);
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 4178022b97a..76959c4b500 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -9,6 +9,7 @@ struct kvm_kpit_timer {
 	s64 period; /* unit: ns */
 	s64 scheduled;
 	atomic_t pending;
+	bool reinject;
 };
 
 struct kvm_kpit_channel_state {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c3fbe8c55c1..a1f14611f4b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -993,6 +993,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_NOP_IO_DELAY:
 	case KVM_CAP_MP_STATE:
 	case KVM_CAP_SYNC_MMU:
+	case KVM_CAP_REINJECT_CONTROL:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -1728,6 +1729,15 @@ static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
 	return r;
 }
 
+static int kvm_vm_ioctl_reinject(struct kvm *kvm,
+				 struct kvm_reinject_control *control)
+{
+	if (!kvm->arch.vpit)
+		return -ENXIO;
+	kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
+	return 0;
+}
+
 /*
  * Get (and clear) the dirty memory log for a memory slot.
  */
@@ -1925,6 +1935,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_REINJECT_CONTROL: {
+		struct kvm_reinject_control control;
+		r =  -EFAULT;
+		if (copy_from_user(&control, argp, sizeof(control)))
+			goto out;
+		r = kvm_vm_ioctl_reinject(kvm, &control);
+		if (r)
+			goto out;
+		r = 0;
+		break;
+	}
 	default:
 		;
 	}
-- 
cgit v1.2.3


From 1c08364c3565242f1e1bd585bc2ce458967941af Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 4 Jan 2009 12:39:07 +0200
Subject: KVM: Move struct kvm_pio_request into x86 kvm_host.h

This is an x86 specific stucture and has no business living in common code.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b74576aec19..863ea73431a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -227,6 +227,18 @@ struct kvm_pv_mmu_op_buffer {
 	char buf[512] __aligned(sizeof(long));
 };
 
+struct kvm_pio_request {
+	unsigned long count;
+	int cur_count;
+	gva_t guest_gva;
+	int in;
+	int port;
+	int size;
+	int string;
+	int down;
+	int rep;
+};
+
 /*
  * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
  * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
-- 
cgit v1.2.3


From c46fb0211f8b93de45400bff814c0f29335af5fc Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Sat, 3 Jan 2009 16:22:58 -0600
Subject: KVM: ppc: move struct kvmppc_44x_tlbe into 44x-specific header

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_44x.h  | 7 +++++++
 arch/powerpc/include/asm/kvm_host.h | 7 -------
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h
index f49031b632c..d22d39942a9 100644
--- a/arch/powerpc/include/asm/kvm_44x.h
+++ b/arch/powerpc/include/asm/kvm_44x.h
@@ -28,6 +28,13 @@
  * need to find some way of advertising it. */
 #define KVM44x_GUEST_TLB_SIZE 64
 
+struct kvmppc_44x_tlbe {
+	u32 tid; /* Only the low 8 bits are used. */
+	u32 word0;
+	u32 word1;
+	u32 word2;
+};
+
 struct kvmppc_44x_shadow_ref {
 	struct page *page;
 	u16 gtlb_index;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index a22600537a8..65c4d492d72 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -64,13 +64,6 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
-struct kvmppc_44x_tlbe {
-	u32 tid; /* Only the low 8 bits are used. */
-	u32 word0;
-	u32 word1;
-	u32 word2;
-};
-
 enum kvm_exit_types {
 	MMIO_EXITS,
 	DCR_EXITS,
-- 
cgit v1.2.3


From ecc0981ff07cbe7cdf95de20be5b24fee8e49cb5 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Sat, 3 Jan 2009 16:22:59 -0600
Subject: KVM: ppc: cosmetic changes to mmu hook names

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_ppc.h | 5 +++--
 arch/powerpc/kvm/44x_tlb.c         | 2 +-
 arch/powerpc/kvm/powerpc.c         | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 36d2a50a848..7ba95d28b83 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -52,13 +52,14 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
 extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
 extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
 
+/* Core-specific hooks */
+
 extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
                            u64 asid, u32 flags, u32 max_bytes,
                            unsigned int gtlb_idx);
 extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
 extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
-
-/* Core-specific hooks */
+extern void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu);
 
 extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
                                                 unsigned int id);
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 9a34b8edb9e..8f9c09cbb83 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -248,7 +248,7 @@ static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
 	KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler);
 }
 
-void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu)
+void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	int i;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 7c2ad4017d6..f30be9ef231 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -216,7 +216,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
-	kvmppc_core_destroy_mmu(vcpu);
+	kvmppc_mmu_destroy(vcpu);
 }
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
-- 
cgit v1.2.3


From 475e7cdd69101939006659a63c2e4a32d5b71389 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Sat, 3 Jan 2009 16:23:00 -0600
Subject: KVM: ppc: small cosmetic changes to Book E DTLB miss handler

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kvm/booke.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 35485dd6927..d196ae61930 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -290,6 +290,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		struct kvmppc_44x_tlbe *gtlbe;
 		unsigned long eaddr = vcpu->arch.fault_dear;
 		int gtlb_index;
+		gpa_t gpaddr;
 		gfn_t gfn;
 
 		/* Check the guest TLB. */
@@ -305,8 +306,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		}
 
 		gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
-		vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr);
-		gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
+		gpaddr = tlb_xlate(gtlbe, eaddr);
+		gfn = gpaddr >> PAGE_SHIFT;
 
 		if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
 			/* The guest TLB had a mapping, but the shadow TLB
@@ -315,13 +316,14 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			 * b) the guest used a large mapping which we're faking
 			 * Either way, we need to satisfy the fault without
 			 * invoking the guest. */
-			kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid,
+			kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid,
 			               gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
 			kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
 			r = RESUME_GUEST;
 		} else {
 			/* Guest has mapped and accessed a page which is not
 			 * actually RAM. */
+			vcpu->arch.paddr_accessed = gpaddr;
 			r = kvmppc_emulate_mmio(run, vcpu);
 			kvmppc_account_exit(vcpu, MMIO_EXITS);
 		}
-- 
cgit v1.2.3


From 58a96214a306fc7fc66105097eea9c4f3bfa35bc Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Sat, 3 Jan 2009 16:23:01 -0600
Subject: KVM: ppc: change kvmppc_mmu_map() parameters

Passing just the TLB index will ease an e500 implementation.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_ppc.h |  1 -
 arch/powerpc/kvm/44x_tlb.c         | 15 +++++++--------
 arch/powerpc/kvm/booke.c           |  6 ++----
 3 files changed, 9 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 7ba95d28b83..f661f8ba3ab 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -55,7 +55,6 @@ extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
 /* Core-specific hooks */
 
 extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
-                           u64 asid, u32 flags, u32 max_bytes,
                            unsigned int gtlb_idx);
 extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
 extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 8f9c09cbb83..e8ed22f28ea 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -269,15 +269,19 @@ void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
  * Caller must ensure that the specified guest TLB entry is safe to insert into
  * the shadow TLB.
  */
-void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
-                    u32 flags, u32 max_bytes, unsigned int gtlb_index)
+void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
+                    unsigned int gtlb_index)
 {
 	struct kvmppc_44x_tlbe stlbe;
 	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+	struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
 	struct kvmppc_44x_shadow_ref *ref;
 	struct page *new_page;
 	hpa_t hpaddr;
 	gfn_t gfn;
+	u32 asid = gtlbe->tid;
+	u32 flags = gtlbe->word2;
+	u32 max_bytes = get_tlb_bytes(gtlbe);
 	unsigned int victim;
 
 	/* Select TLB entry to clobber. Indirectly guard against races with the TLB
@@ -448,10 +452,8 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
 	}
 
 	if (tlbe_is_host_safe(vcpu, tlbe)) {
-		u64 asid;
 		gva_t eaddr;
 		gpa_t gpaddr;
-		u32 flags;
 		u32 bytes;
 
 		eaddr = get_tlb_eaddr(tlbe);
@@ -462,10 +464,7 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
 		eaddr &= ~(bytes - 1);
 		gpaddr &= ~(bytes - 1);
 
-		asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
-		flags = tlbe->word2 & 0xffff;
-
-		kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes, gtlb_index);
+		kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
 	}
 
 	KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0,
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index d196ae61930..85b9e2fc6c6 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -316,8 +316,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			 * b) the guest used a large mapping which we're faking
 			 * Either way, we need to satisfy the fault without
 			 * invoking the guest. */
-			kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid,
-			               gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
+			kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
 			kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
 			r = RESUME_GUEST;
 		} else {
@@ -364,8 +363,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			 * b) the guest used a large mapping which we're faking
 			 * Either way, we need to satisfy the fault without
 			 * invoking the guest. */
-			kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid,
-			               gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
+			kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
 		} else {
 			/* Guest mapped and leaped at non-RAM! */
 			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
-- 
cgit v1.2.3


From be8d1cae07d5acf4a61046d7def5eda40f0981e1 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Sat, 3 Jan 2009 16:23:02 -0600
Subject: KVM: ppc: turn tlb_xlate() into a per-core hook (and give it a better
 name)

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_ppc.h |  2 ++
 arch/powerpc/kvm/44x.c             |  6 +-----
 arch/powerpc/kvm/44x_tlb.c         | 10 ++++++++++
 arch/powerpc/kvm/44x_tlb.h         |  7 -------
 arch/powerpc/kvm/booke.c           | 10 ++--------
 5 files changed, 15 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index f661f8ba3ab..9fd70aa916d 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -59,6 +59,8 @@ extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
 extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
 extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
 extern void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu);
+extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
+                              gva_t eaddr);
 
 extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
                                                 unsigned int id);
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index a66bec57265..8383603dd8a 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -149,8 +149,6 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
 int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
                                struct kvm_translation *tr)
 {
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	struct kvmppc_44x_tlbe *gtlbe;
 	int index;
 	gva_t eaddr;
 	u8 pid;
@@ -166,9 +164,7 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
 		return 0;
 	}
 
-	gtlbe = &vcpu_44x->guest_tlb[index];
-
-	tr->physical_address = tlb_xlate(gtlbe, eaddr);
+	tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
 	/* XXX what does "writeable" and "usermode" even mean? */
 	tr->valid = 1;
 
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index e8ed22f28ea..2f14671e8a1 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -208,6 +208,16 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
 	return -1;
 }
 
+gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
+                       gva_t eaddr)
+{
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+	struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
+	unsigned int pgmask = get_tlb_bytes(gtlbe) - 1;
+
+	return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
+}
+
 int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
 {
 	unsigned int as = !!(vcpu->arch.msr & MSR_IS);
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h
index 772191f29e6..05b6f7eef5b 100644
--- a/arch/powerpc/kvm/44x_tlb.h
+++ b/arch/powerpc/kvm/44x_tlb.h
@@ -85,11 +85,4 @@ static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu)
 	return (vcpu->arch.mmucr >> 16) & 0x1;
 }
 
-static inline gpa_t tlb_xlate(struct kvmppc_44x_tlbe *tlbe, gva_t eaddr)
-{
-	unsigned int pgmask = get_tlb_bytes(tlbe) - 1;
-
-	return get_tlb_raddr(tlbe) | (eaddr & pgmask);
-}
-
 #endif /* __KVM_POWERPC_TLB_H__ */
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 85b9e2fc6c6..56d6ed69ed6 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -286,8 +286,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	/* XXX move to a 440-specific file. */
 	case BOOKE_INTERRUPT_DTLB_MISS: {
-		struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-		struct kvmppc_44x_tlbe *gtlbe;
 		unsigned long eaddr = vcpu->arch.fault_dear;
 		int gtlb_index;
 		gpa_t gpaddr;
@@ -305,8 +303,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			break;
 		}
 
-		gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
-		gpaddr = tlb_xlate(gtlbe, eaddr);
+		gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr);
 		gfn = gpaddr >> PAGE_SHIFT;
 
 		if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
@@ -332,8 +329,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	/* XXX move to a 440-specific file. */
 	case BOOKE_INTERRUPT_ITLB_MISS: {
-		struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-		struct kvmppc_44x_tlbe *gtlbe;
 		unsigned long eaddr = vcpu->arch.pc;
 		gpa_t gpaddr;
 		gfn_t gfn;
@@ -352,8 +347,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 		kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS);
 
-		gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
-		gpaddr = tlb_xlate(gtlbe, eaddr);
+		gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr);
 		gfn = gpaddr >> PAGE_SHIFT;
 
 		if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
-- 
cgit v1.2.3


From fa86b8dda2e0faccefbeda61edc02a50bd588f4f Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Sat, 3 Jan 2009 16:23:03 -0600
Subject: KVM: ppc: rename 44x MMU functions used in booke.c

e500 will provide its own implementation of these.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_ppc.h | 2 ++
 arch/powerpc/kvm/44x_tlb.c         | 4 ++--
 arch/powerpc/kvm/44x_tlb.h         | 2 --
 arch/powerpc/kvm/booke.c           | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 9fd70aa916d..5e80a20f32b 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -59,6 +59,8 @@ extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
 extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
 extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
 extern void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu);
+extern int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
+extern int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
 extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
                               gva_t eaddr);
 
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 2f14671e8a1..e67b7313ffc 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -218,14 +218,14 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
 	return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
 }
 
-int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
 {
 	unsigned int as = !!(vcpu->arch.msr & MSR_IS);
 
 	return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
 }
 
-int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
 {
 	unsigned int as = !!(vcpu->arch.msr & MSR_DS);
 
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h
index 05b6f7eef5b..a9ff80e5152 100644
--- a/arch/powerpc/kvm/44x_tlb.h
+++ b/arch/powerpc/kvm/44x_tlb.h
@@ -25,8 +25,6 @@
 
 extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
                                 unsigned int pid, unsigned int as);
-extern int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
-extern int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
 
 extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb,
                                  u8 rc);
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 56d6ed69ed6..1e692ac16e9 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -292,7 +292,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		gfn_t gfn;
 
 		/* Check the guest TLB. */
-		gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr);
+		gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr);
 		if (gtlb_index < 0) {
 			/* The guest didn't have a mapping for it. */
 			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
@@ -337,7 +337,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		r = RESUME_GUEST;
 
 		/* Check the guest TLB. */
-		gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr);
+		gtlb_index = kvmppc_mmu_itlb_index(vcpu, eaddr);
 		if (gtlb_index < 0) {
 			/* The guest didn't have a mapping for it. */
 			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
-- 
cgit v1.2.3


From f44353610b584fcbc31e363f35594796c6446d63 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Sat, 3 Jan 2009 16:23:04 -0600
Subject: KVM: ppc: remove last 44x-specific bits from booke.c

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kvm/booke.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 1e692ac16e9..a73b3959e41 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -30,10 +30,8 @@
 #include <asm/kvm_ppc.h>
 #include "timing.h"
 #include <asm/cacheflush.h>
-#include <asm/kvm_44x.h>
 
 #include "booke.h"
-#include "44x_tlb.h"
 
 unsigned long kvmppc_booke_handlers;
 
@@ -284,7 +282,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		r = RESUME_GUEST;
 		break;
 
-	/* XXX move to a 440-specific file. */
 	case BOOKE_INTERRUPT_DTLB_MISS: {
 		unsigned long eaddr = vcpu->arch.fault_dear;
 		int gtlb_index;
@@ -327,7 +324,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		break;
 	}
 
-	/* XXX move to a 440-specific file. */
 	case BOOKE_INTERRUPT_ITLB_MISS: {
 		unsigned long eaddr = vcpu->arch.pc;
 		gpa_t gpaddr;
-- 
cgit v1.2.3


From cea5d8c9de669e30ed6d60930318376d5cc42e9e Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Sat, 3 Jan 2009 16:23:05 -0600
Subject: KVM: ppc: use macros instead of hardcoded literals for instruction
 decoding

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kvm/emulate.c | 93 +++++++++++++++++++++++++++++++---------------
 1 file changed, 63 insertions(+), 30 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index d1d38daa93f..a561d6e8da1 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -30,6 +30,39 @@
 #include <asm/disassemble.h>
 #include "timing.h"
 
+#define OP_TRAP 3
+
+#define OP_31_XOP_LWZX      23
+#define OP_31_XOP_LBZX      87
+#define OP_31_XOP_STWX      151
+#define OP_31_XOP_STBX      215
+#define OP_31_XOP_STBUX     247
+#define OP_31_XOP_LHZX      279
+#define OP_31_XOP_LHZUX     311
+#define OP_31_XOP_MFSPR     339
+#define OP_31_XOP_STHX      407
+#define OP_31_XOP_STHUX     439
+#define OP_31_XOP_MTSPR     467
+#define OP_31_XOP_DCBI      470
+#define OP_31_XOP_LWBRX     534
+#define OP_31_XOP_TLBSYNC   566
+#define OP_31_XOP_STWBRX    662
+#define OP_31_XOP_LHBRX     790
+#define OP_31_XOP_STHBRX    918
+
+#define OP_LWZ  32
+#define OP_LWZU 33
+#define OP_LBZ  34
+#define OP_LBZU 35
+#define OP_STW  36
+#define OP_STWU 37
+#define OP_STB  38
+#define OP_STBU 39
+#define OP_LHZ  40
+#define OP_LHZU 41
+#define OP_STH  44
+#define OP_STHU 45
+
 void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->arch.tcr & TCR_DIE) {
@@ -78,7 +111,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
 
 	switch (get_op(inst)) {
-	case 3:                                             /* trap */
+	case OP_TRAP:
 		vcpu->arch.esr |= ESR_PTR;
 		kvmppc_core_queue_program(vcpu);
 		advance = 0;
@@ -87,31 +120,31 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	case 31:
 		switch (get_xop(inst)) {
 
-		case 23:                                        /* lwzx */
+		case OP_31_XOP_LWZX:
 			rt = get_rt(inst);
 			emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
 			break;
 
-		case 87:                                        /* lbzx */
+		case OP_31_XOP_LBZX:
 			rt = get_rt(inst);
 			emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
 			break;
 
-		case 151:                                       /* stwx */
+		case OP_31_XOP_STWX:
 			rs = get_rs(inst);
 			emulated = kvmppc_handle_store(run, vcpu,
 			                               vcpu->arch.gpr[rs],
 			                               4, 1);
 			break;
 
-		case 215:                                       /* stbx */
+		case OP_31_XOP_STBX:
 			rs = get_rs(inst);
 			emulated = kvmppc_handle_store(run, vcpu,
 			                               vcpu->arch.gpr[rs],
 			                               1, 1);
 			break;
 
-		case 247:                                       /* stbux */
+		case OP_31_XOP_STBUX:
 			rs = get_rs(inst);
 			ra = get_ra(inst);
 			rb = get_rb(inst);
@@ -126,12 +159,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			vcpu->arch.gpr[rs] = ea;
 			break;
 
-		case 279:                                       /* lhzx */
+		case OP_31_XOP_LHZX:
 			rt = get_rt(inst);
 			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
 			break;
 
-		case 311:                                       /* lhzux */
+		case OP_31_XOP_LHZUX:
 			rt = get_rt(inst);
 			ra = get_ra(inst);
 			rb = get_rb(inst);
@@ -144,7 +177,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			vcpu->arch.gpr[ra] = ea;
 			break;
 
-		case 339:                                       /* mfspr */
+		case OP_31_XOP_MFSPR:
 			sprn = get_sprn(inst);
 			rt = get_rt(inst);
 
@@ -185,7 +218,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			}
 			break;
 
-		case 407:                                       /* sthx */
+		case OP_31_XOP_STHX:
 			rs = get_rs(inst);
 			ra = get_ra(inst);
 			rb = get_rb(inst);
@@ -195,7 +228,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			                               2, 1);
 			break;
 
-		case 439:                                       /* sthux */
+		case OP_31_XOP_STHUX:
 			rs = get_rs(inst);
 			ra = get_ra(inst);
 			rb = get_rb(inst);
@@ -210,7 +243,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			vcpu->arch.gpr[ra] = ea;
 			break;
 
-		case 467:                                       /* mtspr */
+		case OP_31_XOP_MTSPR:
 			sprn = get_sprn(inst);
 			rs = get_rs(inst);
 			switch (sprn) {
@@ -246,7 +279,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			}
 			break;
 
-		case 470:                                       /* dcbi */
+		case OP_31_XOP_DCBI:
 			/* Do nothing. The guest is performing dcbi because
 			 * hardware DMA is not snooped by the dcache, but
 			 * emulated DMA either goes through the dcache as
@@ -254,15 +287,15 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			 * coherence. */
 			break;
 
-		case 534:                                       /* lwbrx */
+		case OP_31_XOP_LWBRX:
 			rt = get_rt(inst);
 			emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0);
 			break;
 
-		case 566:                                       /* tlbsync */
+		case OP_31_XOP_TLBSYNC:
 			break;
 
-		case 662:                                       /* stwbrx */
+		case OP_31_XOP_STWBRX:
 			rs = get_rs(inst);
 			ra = get_ra(inst);
 			rb = get_rb(inst);
@@ -272,12 +305,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			                               4, 0);
 			break;
 
-		case 790:                                       /* lhbrx */
+		case OP_31_XOP_LHBRX:
 			rt = get_rt(inst);
 			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
 			break;
 
-		case 918:                                       /* sthbrx */
+		case OP_31_XOP_STHBRX:
 			rs = get_rs(inst);
 			ra = get_ra(inst);
 			rb = get_rb(inst);
@@ -293,37 +326,37 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		}
 		break;
 
-	case 32:                                                /* lwz */
+	case OP_LWZ:
 		rt = get_rt(inst);
 		emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
 		break;
 
-	case 33:                                                /* lwzu */
+	case OP_LWZU:
 		ra = get_ra(inst);
 		rt = get_rt(inst);
 		emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
 		vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
 		break;
 
-	case 34:                                                /* lbz */
+	case OP_LBZ:
 		rt = get_rt(inst);
 		emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
 		break;
 
-	case 35:                                                /* lbzu */
+	case OP_LBZU:
 		ra = get_ra(inst);
 		rt = get_rt(inst);
 		emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
 		vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
 		break;
 
-	case 36:                                                /* stw */
+	case OP_STW:
 		rs = get_rs(inst);
 		emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
 		                               4, 1);
 		break;
 
-	case 37:                                                /* stwu */
+	case OP_STWU:
 		ra = get_ra(inst);
 		rs = get_rs(inst);
 		emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
@@ -331,13 +364,13 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
 		break;
 
-	case 38:                                                /* stb */
+	case OP_STB:
 		rs = get_rs(inst);
 		emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
 		                               1, 1);
 		break;
 
-	case 39:                                                /* stbu */
+	case OP_STBU:
 		ra = get_ra(inst);
 		rs = get_rs(inst);
 		emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
@@ -345,25 +378,25 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
 		break;
 
-	case 40:                                                /* lhz */
+	case OP_LHZ:
 		rt = get_rt(inst);
 		emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
 		break;
 
-	case 41:                                                /* lhzu */
+	case OP_LHZU:
 		ra = get_ra(inst);
 		rt = get_rt(inst);
 		emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
 		vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
 		break;
 
-	case 44:                                                /* sth */
+	case OP_STH:
 		rs = get_rs(inst);
 		emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
 		                               2, 1);
 		break;
 
-	case 45:                                                /* sthu */
+	case OP_STHU:
 		ra = get_ra(inst);
 		rs = get_rs(inst);
 		emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
-- 
cgit v1.2.3


From d0c7dc03442f7cbd8740d9a4e7a4e7f84bfa676d Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Sat, 3 Jan 2009 16:23:06 -0600
Subject: KVM: ppc: split out common Book E instruction emulation

The Book E code will be shared with e500.

I've left PID in kvmppc_core_emulate_op() just so that we don't need to move
kvmppc_set_pid() right now. Once we have the e500 implementation, we can
probably share that too.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kvm/44x_emulate.c   | 217 +++-----------------------------
 arch/powerpc/kvm/Makefile        |   1 +
 arch/powerpc/kvm/booke.h         |   6 +
 arch/powerpc/kvm/booke_emulate.c | 262 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 284 insertions(+), 202 deletions(-)
 create mode 100644 arch/powerpc/kvm/booke_emulate.c

(limited to 'arch')

diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index 82489a743a6..61af58fcece 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -27,25 +27,12 @@
 #include "booke.h"
 #include "44x_tlb.h"
 
-#define OP_RFI      19
-
-#define XOP_RFI     50
-#define XOP_MFMSR   83
-#define XOP_WRTEE   131
-#define XOP_MTMSR   146
-#define XOP_WRTEEI  163
 #define XOP_MFDCR   323
 #define XOP_MTDCR   451
 #define XOP_TLBSX   914
 #define XOP_ICCCI   966
 #define XOP_TLBWE   978
 
-static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
-{
-	vcpu->arch.pc = vcpu->arch.srr0;
-	kvmppc_set_msr(vcpu, vcpu->arch.srr1);
-}
-
 int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
                            unsigned int inst, int *advance)
 {
@@ -59,48 +46,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	int ws;
 
 	switch (get_op(inst)) {
-	case OP_RFI:
-		switch (get_xop(inst)) {
-		case XOP_RFI:
-			kvmppc_emul_rfi(vcpu);
-			kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS);
-			*advance = 0;
-			break;
-
-		default:
-			emulated = EMULATE_FAIL;
-			break;
-		}
-		break;
-
 	case 31:
 		switch (get_xop(inst)) {
 
-		case XOP_MFMSR:
-			rt = get_rt(inst);
-			vcpu->arch.gpr[rt] = vcpu->arch.msr;
-			kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
-			break;
-
-		case XOP_MTMSR:
-			rs = get_rs(inst);
-			kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
-			kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
-			break;
-
-		case XOP_WRTEE:
-			rs = get_rs(inst);
-			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
-							 | (vcpu->arch.gpr[rs] & MSR_EE);
-			kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
-			break;
-
-		case XOP_WRTEEI:
-			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
-							 | (inst & MSR_EE);
-			kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
-			break;
-
 		case XOP_MFDCR:
 			dcrn = get_dcrn(inst);
 			rt = get_rt(inst);
@@ -186,186 +134,51 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		emulated = EMULATE_FAIL;
 	}
 
+	if (emulated == EMULATE_FAIL)
+		emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance);
+
 	return emulated;
 }
 
 int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 {
+	int emulated = EMULATE_DONE;
+
 	switch (sprn) {
-	case SPRN_MMUCR:
-		vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
 	case SPRN_PID:
 		kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
+	case SPRN_MMUCR:
+		vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
 	case SPRN_CCR0:
 		vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
 	case SPRN_CCR1:
 		vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
-	case SPRN_DEAR:
-		vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
-	case SPRN_ESR:
-		vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
-	case SPRN_DBCR0:
-		vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
-	case SPRN_DBCR1:
-		vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
-	case SPRN_TSR:
-		vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
-	case SPRN_TCR:
-		vcpu->arch.tcr = vcpu->arch.gpr[rs];
-		kvmppc_emulate_dec(vcpu);
-		break;
-
-	/* Note: SPRG4-7 are user-readable. These values are
-	 * loaded into the real SPRGs when resuming the
-	 * guest. */
-	case SPRN_SPRG4:
-		vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
-	case SPRN_SPRG5:
-		vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
-	case SPRN_SPRG6:
-		vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
-	case SPRN_SPRG7:
-		vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
-
-	case SPRN_IVPR:
-		vcpu->arch.ivpr = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR0:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR1:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR2:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR3:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR4:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR5:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR6:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR7:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR8:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR9:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR10:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR11:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR12:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR13:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR14:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR15:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
-		break;
-
 	default:
-		return EMULATE_FAIL;
+		emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
 	}
 
 	kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
-	return EMULATE_DONE;
+	return emulated;
 }
 
 int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 {
+	int emulated = EMULATE_DONE;
+
 	switch (sprn) {
-	/* 440 */
+	case SPRN_PID:
+		vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
 	case SPRN_MMUCR:
 		vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
 	case SPRN_CCR0:
 		vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
 	case SPRN_CCR1:
 		vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
-
-	/* Book E */
-	case SPRN_PID:
-		vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
-	case SPRN_IVPR:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
-	case SPRN_DEAR:
-		vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
-	case SPRN_ESR:
-		vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
-	case SPRN_DBCR0:
-		vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
-	case SPRN_DBCR1:
-		vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
-
-	case SPRN_IVOR0:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
-		break;
-	case SPRN_IVOR1:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
-		break;
-	case SPRN_IVOR2:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
-		break;
-	case SPRN_IVOR3:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
-		break;
-	case SPRN_IVOR4:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
-		break;
-	case SPRN_IVOR5:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
-		break;
-	case SPRN_IVOR6:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
-		break;
-	case SPRN_IVOR7:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
-		break;
-	case SPRN_IVOR8:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
-		break;
-	case SPRN_IVOR9:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
-		break;
-	case SPRN_IVOR10:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
-		break;
-	case SPRN_IVOR11:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
-		break;
-	case SPRN_IVOR12:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
-		break;
-	case SPRN_IVOR13:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
-		break;
-	case SPRN_IVOR14:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
-		break;
-	case SPRN_IVOR15:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
-		break;
-
 	default:
-		return EMULATE_FAIL;
+		emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
 	}
 
 	kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
-	return EMULATE_DONE;
+	return emulated;
 }
 
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index df7ba59e6d5..3ef5261828e 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -16,6 +16,7 @@ AFLAGS_booke_interrupts.o := -I$(obj)
 
 kvm-440-objs := \
 	booke.o \
+	booke_emulate.o \
 	booke_interrupts.o \
 	44x.o \
 	44x_tlb.o \
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index cf7c94ca24b..311fdbc23fb 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -22,6 +22,7 @@
 
 #include <linux/types.h>
 #include <linux/kvm_host.h>
+#include <asm/kvm_ppc.h>
 #include "timing.h"
 
 /* interrupt priortity ordering */
@@ -57,4 +58,9 @@ static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
 	};
 }
 
+int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                            unsigned int inst, int *advance);
+int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
+int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
+
 #endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
new file mode 100644
index 00000000000..8aa78f1a1f8
--- /dev/null
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -0,0 +1,262 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/disassemble.h>
+
+#include "booke.h"
+
+#define OP_19_XOP_RFI     50
+
+#define OP_31_XOP_MFMSR   83
+#define OP_31_XOP_WRTEE   131
+#define OP_31_XOP_MTMSR   146
+#define OP_31_XOP_WRTEEI  163
+
+static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.pc = vcpu->arch.srr0;
+	kvmppc_set_msr(vcpu, vcpu->arch.srr1);
+}
+
+int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                            unsigned int inst, int *advance)
+{
+	int emulated = EMULATE_DONE;
+	int rs;
+	int rt;
+
+	switch (get_op(inst)) {
+	case 19:
+		switch (get_xop(inst)) {
+		case OP_19_XOP_RFI:
+			kvmppc_emul_rfi(vcpu);
+			kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS);
+			*advance = 0;
+			break;
+
+		default:
+			emulated = EMULATE_FAIL;
+			break;
+		}
+		break;
+
+	case 31:
+		switch (get_xop(inst)) {
+
+		case OP_31_XOP_MFMSR:
+			rt = get_rt(inst);
+			vcpu->arch.gpr[rt] = vcpu->arch.msr;
+			kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
+			break;
+
+		case OP_31_XOP_MTMSR:
+			rs = get_rs(inst);
+			kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
+			kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
+			break;
+
+		case OP_31_XOP_WRTEE:
+			rs = get_rs(inst);
+			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+							 | (vcpu->arch.gpr[rs] & MSR_EE);
+			kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
+			break;
+
+		case OP_31_XOP_WRTEEI:
+			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+							 | (inst & MSR_EE);
+			kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
+			break;
+
+		default:
+			emulated = EMULATE_FAIL;
+		}
+
+		break;
+
+	default:
+		emulated = EMULATE_FAIL;
+	}
+
+	return emulated;
+}
+
+int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+{
+	int emulated = EMULATE_DONE;
+
+	switch (sprn) {
+	case SPRN_DEAR:
+		vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
+	case SPRN_ESR:
+		vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
+	case SPRN_DBCR0:
+		vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
+	case SPRN_DBCR1:
+		vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
+	case SPRN_TSR:
+		vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
+	case SPRN_TCR:
+		vcpu->arch.tcr = vcpu->arch.gpr[rs];
+		kvmppc_emulate_dec(vcpu);
+		break;
+
+	/* Note: SPRG4-7 are user-readable. These values are
+	 * loaded into the real SPRGs when resuming the
+	 * guest. */
+	case SPRN_SPRG4:
+		vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
+	case SPRN_SPRG5:
+		vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
+	case SPRN_SPRG6:
+		vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
+	case SPRN_SPRG7:
+		vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
+
+	case SPRN_IVPR:
+		vcpu->arch.ivpr = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR0:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR1:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR2:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR3:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR4:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR5:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR6:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR7:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR8:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR9:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR10:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR11:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR12:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR13:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR14:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR15:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
+		break;
+
+	default:
+		emulated = EMULATE_FAIL;
+	}
+
+	return emulated;
+}
+
+int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+{
+	int emulated = EMULATE_DONE;
+
+	switch (sprn) {
+	case SPRN_IVPR:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
+	case SPRN_DEAR:
+		vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
+	case SPRN_ESR:
+		vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
+	case SPRN_DBCR0:
+		vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
+	case SPRN_DBCR1:
+		vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
+
+	case SPRN_IVOR0:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
+		break;
+	case SPRN_IVOR1:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
+		break;
+	case SPRN_IVOR2:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
+		break;
+	case SPRN_IVOR3:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
+		break;
+	case SPRN_IVOR4:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
+		break;
+	case SPRN_IVOR5:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
+		break;
+	case SPRN_IVOR6:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
+		break;
+	case SPRN_IVOR7:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
+		break;
+	case SPRN_IVOR8:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
+		break;
+	case SPRN_IVOR9:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
+		break;
+	case SPRN_IVOR10:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
+		break;
+	case SPRN_IVOR11:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
+		break;
+	case SPRN_IVOR12:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
+		break;
+	case SPRN_IVOR13:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
+		break;
+	case SPRN_IVOR14:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
+		break;
+	case SPRN_IVOR15:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+		break;
+
+	default:
+		emulated = EMULATE_FAIL;
+	}
+
+	return emulated;
+}
-- 
cgit v1.2.3


From f7b200af8f1da748cc67993caeff3923d6014070 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Sat, 3 Jan 2009 16:23:07 -0600
Subject: KVM: ppc: Add dbsr in kvm_vcpu_arch

Kernel for E500 need clear dbsr when startup.
So add dbsr register in kvm_vcpu_arch for BOOKE.

Signed-off-by: Liu Yu <yu.liu@freescale.com>
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_host.h | 1 +
 arch/powerpc/kvm/booke_emulate.c    | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 65c4d492d72..50e5ce1b400 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -163,6 +163,7 @@ struct kvm_vcpu_arch {
 	u32 ccr1;
 	u32 dbcr0;
 	u32 dbcr1;
+	u32 dbsr;
 
 #ifdef CONFIG_KVM_EXIT_TIMING
 	struct kvmppc_exit_timing timing_exit;
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 8aa78f1a1f8..aebc65e93f4 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -111,6 +111,8 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 		vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
 	case SPRN_DBCR1:
 		vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
+	case SPRN_DBSR:
+		vcpu->arch.dbsr &= ~vcpu->arch.gpr[rs]; break;
 	case SPRN_TSR:
 		vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
 	case SPRN_TCR:
@@ -204,6 +206,8 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 		vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
 	case SPRN_DBCR1:
 		vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
+	case SPRN_DBSR:
+		vcpu->arch.gpr[rt] = vcpu->arch.dbsr; break;
 
 	case SPRN_IVOR0:
 		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
-- 
cgit v1.2.3


From 366d4b9b9fdda282006b97316f8038cd36f8ecb7 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Sat, 3 Jan 2009 16:23:08 -0600
Subject: KVM: ppc: No need to include core-header for KVM in asm-offsets.c
 currently

Signed-off-by: Liu Yu <yu.liu@freescale.com>
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kernel/asm-offsets.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 19ee491e9e2..42fe4da4e8a 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -49,7 +49,7 @@
 #include <asm/iseries/alpaca.h>
 #endif
 #ifdef CONFIG_KVM
-#include <asm/kvm_44x.h>
+#include <linux/kvm_host.h>
 #endif
 
 #if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
@@ -361,8 +361,6 @@ int main(void)
 	DEFINE(PTE_SIZE, sizeof(pte_t));
 
 #ifdef CONFIG_KVM
-	DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe));
-
 	DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
 	DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
 	DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
-- 
cgit v1.2.3


From 17c885eb5c38f39a2bb3143a67687bd905dcd0fa Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Sat, 3 Jan 2009 16:23:09 -0600
Subject: KVM: ppc: ifdef iccci with CONFIG_44x

E500 deosn't support this instruction.

Signed-off-by: Liu Yu <yu.liu@freescale.com>
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kvm/booke_interrupts.S | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 084ebcd7dd8..4679ec287e6 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -347,7 +347,9 @@ lightweight_exit:
 	lwz	r3, VCPU_SHADOW_PID(r4)
 	mtspr	SPRN_PID, r3
 
+#ifdef CONFIG_44x
 	iccci	0, 0 /* XXX hack */
+#endif
 
 	/* Load some guest volatiles. */
 	lwz	r0, VCPU_GPR(r0)(r4)
-- 
cgit v1.2.3


From bc8080cbcc8870178f0910edd537d0cb5706d703 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Sat, 3 Jan 2009 16:23:10 -0600
Subject: KVM: ppc: E500 core-specific code

Signed-off-by: Liu Yu <yu.liu@freescale.com>
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_e500.h |  67 ++++
 arch/powerpc/kvm/Kconfig            |  13 +
 arch/powerpc/kvm/Makefile           |   9 +
 arch/powerpc/kvm/e500.c             | 151 ++++++++
 arch/powerpc/kvm/e500_emulate.c     | 169 +++++++++
 arch/powerpc/kvm/e500_tlb.c         | 737 ++++++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/e500_tlb.h         | 184 +++++++++
 7 files changed, 1330 insertions(+)
 create mode 100644 arch/powerpc/include/asm/kvm_e500.h
 create mode 100644 arch/powerpc/kvm/e500.c
 create mode 100644 arch/powerpc/kvm/e500_emulate.c
 create mode 100644 arch/powerpc/kvm/e500_tlb.c
 create mode 100644 arch/powerpc/kvm/e500_tlb.h

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
new file mode 100644
index 00000000000..9d497ce4972
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_e500.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, <yu.liu@freescale.com>
+ *
+ * Description:
+ * This file is derived from arch/powerpc/include/asm/kvm_44x.h,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_KVM_E500_H__
+#define __ASM_KVM_E500_H__
+
+#include <linux/kvm_host.h>
+
+#define BOOKE_INTERRUPT_SIZE 36
+
+#define E500_PID_NUM   3
+#define E500_TLB_NUM   2
+
+struct tlbe{
+	u32 mas1;
+	u32 mas2;
+	u32 mas3;
+	u32 mas7;
+};
+
+struct kvmppc_vcpu_e500 {
+	/* Unmodified copy of the guest's TLB. */
+	struct tlbe *guest_tlb[E500_TLB_NUM];
+	/* TLB that's actually used when the guest is running. */
+	struct tlbe *shadow_tlb[E500_TLB_NUM];
+	/* Pages which are referenced in the shadow TLB. */
+	struct page **shadow_pages[E500_TLB_NUM];
+
+	unsigned int guest_tlb_size[E500_TLB_NUM];
+	unsigned int shadow_tlb_size[E500_TLB_NUM];
+	unsigned int guest_tlb_nv[E500_TLB_NUM];
+
+	u32 host_pid[E500_PID_NUM];
+	u32 pid[E500_PID_NUM];
+
+	u32 mas0;
+	u32 mas1;
+	u32 mas2;
+	u32 mas3;
+	u32 mas4;
+	u32 mas5;
+	u32 mas6;
+	u32 mas7;
+	u32 l1csr1;
+	u32 hid0;
+	u32 hid1;
+
+	struct kvm_vcpu vcpu;
+};
+
+static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu)
+{
+	return container_of(vcpu, struct kvmppc_vcpu_e500, vcpu);
+}
+
+#endif /* __ASM_KVM_E500_H__ */
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 6dbdc4817d8..14657055014 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -43,6 +43,19 @@ config KVM_EXIT_TIMING
 
 	  If unsure, say N.
 
+config KVM_E500
+	bool "KVM support for PowerPC E500 processors"
+	depends on EXPERIMENTAL && E500
+	select KVM
+	---help---
+	  Support running unmodified E500 guest kernels in virtual machines on
+	  E500 host processors.
+
+	  This module provides access to the hardware capabilities through
+	  a character device node named /dev/kvm.
+
+	  If unsure, say N.
+
 config KVM_TRACE
 	bool "KVM trace support"
 	depends on KVM && MARKERS && SYSFS
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 3ef5261828e..4b2df66c79d 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -22,3 +22,12 @@ kvm-440-objs := \
 	44x_tlb.o \
 	44x_emulate.o
 obj-$(CONFIG_KVM_440) += kvm-440.o
+
+kvm-e500-objs := \
+	booke.o \
+	booke_emulate.o \
+	booke_interrupts.o \
+	e500.o \
+	e500_tlb.o \
+	e500_emulate.o
+obj-$(CONFIG_KVM_E500) += kvm-e500.o
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
new file mode 100644
index 00000000000..7992da497cd
--- /dev/null
+++ b/arch/powerpc/kvm/e500.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, <yu.liu@freescale.com>
+ *
+ * Description:
+ * This file is derived from arch/powerpc/kvm/44x.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/tlbflush.h>
+#include <asm/kvm_e500.h>
+#include <asm/kvm_ppc.h>
+
+#include "e500_tlb.h"
+
+void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+	kvmppc_e500_tlb_load(vcpu, cpu);
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	kvmppc_e500_tlb_put(vcpu);
+}
+
+int kvmppc_core_check_processor_compat(void)
+{
+	int r;
+
+	if (strcmp(cur_cpu_spec->cpu_name, "e500v2") == 0)
+		r = 0;
+	else
+		r = -ENOTSUPP;
+
+	return r;
+}
+
+int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	kvmppc_e500_tlb_setup(vcpu_e500);
+
+	/* Use the same core vertion as host's */
+	vcpu->arch.pvr = mfspr(SPRN_PVR);
+
+	return 0;
+}
+
+/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
+int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
+                               struct kvm_translation *tr)
+{
+	int index;
+	gva_t eaddr;
+	u8 pid;
+	u8 as;
+
+	eaddr = tr->linear_address;
+	pid = (tr->linear_address >> 32) & 0xff;
+	as = (tr->linear_address >> 40) & 0x1;
+
+	index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as);
+	if (index < 0) {
+		tr->valid = 0;
+		return 0;
+	}
+
+	tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
+	/* XXX what does "writeable" and "usermode" even mean? */
+	tr->valid = 1;
+
+	return 0;
+}
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500;
+	struct kvm_vcpu *vcpu;
+	int err;
+
+	vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+	if (!vcpu_e500) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	vcpu = &vcpu_e500->vcpu;
+	err = kvm_vcpu_init(vcpu, kvm, id);
+	if (err)
+		goto free_vcpu;
+
+	err = kvmppc_e500_tlb_init(vcpu_e500);
+	if (err)
+		goto uninit_vcpu;
+
+	return vcpu;
+
+uninit_vcpu:
+	kvm_vcpu_uninit(vcpu);
+free_vcpu:
+	kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
+out:
+	return ERR_PTR(err);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	kvmppc_e500_tlb_uninit(vcpu_e500);
+	kvm_vcpu_uninit(vcpu);
+	kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
+}
+
+static int kvmppc_e500_init(void)
+{
+	int r;
+
+	r = kvmppc_booke_init();
+	if (r)
+		return r;
+
+	return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), THIS_MODULE);
+}
+
+static void kvmppc_e500_exit(void)
+{
+	kvmppc_booke_exit();
+}
+
+module_init(kvmppc_e500_init);
+module_exit(kvmppc_e500_exit);
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
new file mode 100644
index 00000000000..a47f44cd2cf
--- /dev/null
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, <yu.liu@freescale.com>
+ *
+ * Description:
+ * This file is derived from arch/powerpc/kvm/44x_emulate.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/kvm_ppc.h>
+#include <asm/disassemble.h>
+#include <asm/kvm_e500.h>
+
+#include "booke.h"
+#include "e500_tlb.h"
+
+#define XOP_TLBIVAX 786
+#define XOP_TLBSX   914
+#define XOP_TLBRE   946
+#define XOP_TLBWE   978
+
+int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                           unsigned int inst, int *advance)
+{
+	int emulated = EMULATE_DONE;
+	int ra;
+	int rb;
+	int rs;
+	int rt;
+
+	switch (get_op(inst)) {
+	case 31:
+		switch (get_xop(inst)) {
+
+		case XOP_TLBRE:
+			emulated = kvmppc_e500_emul_tlbre(vcpu);
+			break;
+
+		case XOP_TLBWE:
+			emulated = kvmppc_e500_emul_tlbwe(vcpu);
+			break;
+
+		case XOP_TLBSX:
+			rb = get_rb(inst);
+			emulated = kvmppc_e500_emul_tlbsx(vcpu,rb);
+			break;
+
+		case XOP_TLBIVAX:
+			ra = get_ra(inst);
+			rb = get_rb(inst);
+			emulated = kvmppc_e500_emul_tlbivax(vcpu, ra, rb);
+			break;
+
+		default:
+			emulated = EMULATE_FAIL;
+		}
+
+		break;
+
+	default:
+		emulated = EMULATE_FAIL;
+	}
+
+	if (emulated == EMULATE_FAIL)
+		emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance);
+
+	return emulated;
+}
+
+int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	int emulated = EMULATE_DONE;
+
+	switch (sprn) {
+	case SPRN_PID:
+		vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
+			vcpu->arch.pid = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_PID1:
+		vcpu_e500->pid[1] = vcpu->arch.gpr[rs]; break;
+	case SPRN_PID2:
+		vcpu_e500->pid[2] = vcpu->arch.gpr[rs]; break;
+	case SPRN_MAS0:
+		vcpu_e500->mas0 = vcpu->arch.gpr[rs]; break;
+	case SPRN_MAS1:
+		vcpu_e500->mas1 = vcpu->arch.gpr[rs]; break;
+	case SPRN_MAS2:
+		vcpu_e500->mas2 = vcpu->arch.gpr[rs]; break;
+	case SPRN_MAS3:
+		vcpu_e500->mas3 = vcpu->arch.gpr[rs]; break;
+	case SPRN_MAS4:
+		vcpu_e500->mas4 = vcpu->arch.gpr[rs]; break;
+	case SPRN_MAS6:
+		vcpu_e500->mas6 = vcpu->arch.gpr[rs]; break;
+	case SPRN_MAS7:
+		vcpu_e500->mas7 = vcpu->arch.gpr[rs]; break;
+	case SPRN_L1CSR1:
+		vcpu_e500->l1csr1 = vcpu->arch.gpr[rs]; break;
+	case SPRN_HID0:
+		vcpu_e500->hid0 = vcpu->arch.gpr[rs]; break;
+	case SPRN_HID1:
+		vcpu_e500->hid1 = vcpu->arch.gpr[rs]; break;
+
+	default:
+		emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
+	}
+
+	return emulated;
+}
+
+int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	int emulated = EMULATE_DONE;
+
+	switch (sprn) {
+	case SPRN_PID:
+		vcpu->arch.gpr[rt] = vcpu_e500->pid[0]; break;
+	case SPRN_PID1:
+		vcpu->arch.gpr[rt] = vcpu_e500->pid[1]; break;
+	case SPRN_PID2:
+		vcpu->arch.gpr[rt] = vcpu_e500->pid[2]; break;
+	case SPRN_MAS0:
+		vcpu->arch.gpr[rt] = vcpu_e500->mas0; break;
+	case SPRN_MAS1:
+		vcpu->arch.gpr[rt] = vcpu_e500->mas1; break;
+	case SPRN_MAS2:
+		vcpu->arch.gpr[rt] = vcpu_e500->mas2; break;
+	case SPRN_MAS3:
+		vcpu->arch.gpr[rt] = vcpu_e500->mas3; break;
+	case SPRN_MAS4:
+		vcpu->arch.gpr[rt] = vcpu_e500->mas4; break;
+	case SPRN_MAS6:
+		vcpu->arch.gpr[rt] = vcpu_e500->mas6; break;
+	case SPRN_MAS7:
+		vcpu->arch.gpr[rt] = vcpu_e500->mas7; break;
+
+	case SPRN_TLB0CFG:
+		vcpu->arch.gpr[rt] = mfspr(SPRN_TLB0CFG);
+		vcpu->arch.gpr[rt] &= ~0xfffUL;
+		vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[0];
+		break;
+
+	case SPRN_TLB1CFG:
+		vcpu->arch.gpr[rt] = mfspr(SPRN_TLB1CFG);
+		vcpu->arch.gpr[rt] &= ~0xfffUL;
+		vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[1];
+		break;
+
+	case SPRN_L1CSR1:
+		vcpu->arch.gpr[rt] = vcpu_e500->l1csr1; break;
+	case SPRN_HID0:
+		vcpu->arch.gpr[rt] = vcpu_e500->hid0; break;
+	case SPRN_HID1:
+		vcpu->arch.gpr[rt] = vcpu_e500->hid1; break;
+
+	default:
+		emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
+	}
+
+	return emulated;
+}
+
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
new file mode 100644
index 00000000000..6a50340f575
--- /dev/null
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -0,0 +1,737 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, yu.liu@freescale.com
+ *
+ * Description:
+ * This file is based on arch/powerpc/kvm/44x_tlb.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_e500.h>
+
+#include "e500_tlb.h"
+
+#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
+
+static unsigned int tlb1_entry_num;
+
+void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	struct tlbe *tlbe;
+	int i, tlbsel;
+
+	printk("| %8s | %8s | %8s | %8s | %8s |\n",
+			"nr", "mas1", "mas2", "mas3", "mas7");
+
+	for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+		printk("Guest TLB%d:\n", tlbsel);
+		for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) {
+			tlbe = &vcpu_e500->guest_tlb[tlbsel][i];
+			if (tlbe->mas1 & MAS1_VALID)
+				printk(" G[%d][%3d] |  %08X | %08X | %08X | %08X |\n",
+					tlbsel, i, tlbe->mas1, tlbe->mas2,
+					tlbe->mas3, tlbe->mas7);
+		}
+	}
+
+	for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+		printk("Shadow TLB%d:\n", tlbsel);
+		for (i = 0; i < vcpu_e500->shadow_tlb_size[tlbsel]; i++) {
+			tlbe = &vcpu_e500->shadow_tlb[tlbsel][i];
+			if (tlbe->mas1 & MAS1_VALID)
+				printk(" S[%d][%3d] |  %08X | %08X | %08X | %08X |\n",
+					tlbsel, i, tlbe->mas1, tlbe->mas2,
+					tlbe->mas3, tlbe->mas7);
+		}
+	}
+}
+
+static inline unsigned int tlb0_get_next_victim(
+		struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	unsigned int victim;
+
+	victim = vcpu_e500->guest_tlb_nv[0]++;
+	if (unlikely(vcpu_e500->guest_tlb_nv[0] >= KVM_E500_TLB0_WAY_NUM))
+		vcpu_e500->guest_tlb_nv[0] = 0;
+
+	return victim;
+}
+
+static inline unsigned int tlb1_max_shadow_size(void)
+{
+	return tlb1_entry_num - tlbcam_index;
+}
+
+static inline int tlbe_is_writable(struct tlbe *tlbe)
+{
+	return tlbe->mas3 & (MAS3_SW|MAS3_UW);
+}
+
+static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
+{
+	/* Mask off reserved bits. */
+	mas3 &= MAS3_ATTRIB_MASK;
+
+	if (!usermode) {
+		/* Guest is in supervisor mode,
+		 * so we need to translate guest
+		 * supervisor permissions into user permissions. */
+		mas3 &= ~E500_TLB_USER_PERM_MASK;
+		mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1;
+	}
+
+	return mas3 | E500_TLB_SUPER_PERM_MASK;
+}
+
+static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
+{
+	return mas2 & MAS2_ATTRIB_MASK;
+}
+
+/*
+ * writing shadow tlb entry to host TLB
+ */
+static inline void __write_host_tlbe(struct tlbe *stlbe)
+{
+	mtspr(SPRN_MAS1, stlbe->mas1);
+	mtspr(SPRN_MAS2, stlbe->mas2);
+	mtspr(SPRN_MAS3, stlbe->mas3);
+	mtspr(SPRN_MAS7, stlbe->mas7);
+	__asm__ __volatile__ ("tlbwe\n" : : );
+}
+
+static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
+		int tlbsel, int esel)
+{
+	struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
+
+	local_irq_disable();
+	if (tlbsel == 0) {
+		__write_host_tlbe(stlbe);
+	} else {
+		unsigned register mas0;
+
+		mas0 = mfspr(SPRN_MAS0);
+
+		mtspr(SPRN_MAS0, MAS0_TLBSEL(1) | MAS0_ESEL(to_htlb1_esel(esel)));
+		__write_host_tlbe(stlbe);
+
+		mtspr(SPRN_MAS0, mas0);
+	}
+	local_irq_enable();
+}
+
+void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	int i;
+	unsigned register mas0;
+
+	/* Load all valid TLB1 entries to reduce guest tlb miss fault */
+	local_irq_disable();
+	mas0 = mfspr(SPRN_MAS0);
+	for (i = 0; i < tlb1_max_shadow_size(); i++) {
+		struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i];
+
+		if (get_tlb_v(stlbe)) {
+			mtspr(SPRN_MAS0, MAS0_TLBSEL(1)
+					| MAS0_ESEL(to_htlb1_esel(i)));
+			__write_host_tlbe(stlbe);
+		}
+	}
+	mtspr(SPRN_MAS0, mas0);
+	local_irq_enable();
+}
+
+void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu)
+{
+	_tlbia();
+}
+
+/* Search the guest TLB for a matching entry. */
+static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
+		gva_t eaddr, int tlbsel, unsigned int pid, int as)
+{
+	int i;
+
+	/* XXX Replace loop with fancy data structures. */
+	for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) {
+		struct tlbe *tlbe = &vcpu_e500->guest_tlb[tlbsel][i];
+		unsigned int tid;
+
+		if (eaddr < get_tlb_eaddr(tlbe))
+			continue;
+
+		if (eaddr > get_tlb_end(tlbe))
+			continue;
+
+		tid = get_tlb_tid(tlbe);
+		if (tid && (tid != pid))
+			continue;
+
+		if (!get_tlb_v(tlbe))
+			continue;
+
+		if (get_tlb_ts(tlbe) != as && as != -1)
+			continue;
+
+		return i;
+	}
+
+	return -1;
+}
+
+static void kvmppc_e500_shadow_release(struct kvmppc_vcpu_e500 *vcpu_e500,
+		int tlbsel, int esel)
+{
+	struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
+	struct page *page = vcpu_e500->shadow_pages[tlbsel][esel];
+
+	if (page) {
+		vcpu_e500->shadow_pages[tlbsel][esel] = NULL;
+
+		if (get_tlb_v(stlbe)) {
+			if (tlbe_is_writable(stlbe))
+				kvm_release_page_dirty(page);
+			else
+				kvm_release_page_clean(page);
+		}
+	}
+}
+
+static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
+		int tlbsel, int esel)
+{
+	struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
+
+	kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
+	stlbe->mas1 = 0;
+	KVMTRACE_5D(STLB_INVAL, &vcpu_e500->vcpu, index_of(tlbsel, esel),
+			stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7,
+			handler);
+}
+
+static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
+		gva_t eaddr, gva_t eend, u32 tid)
+{
+	unsigned int pid = tid & 0xff;
+	unsigned int i;
+
+	/* XXX Replace loop with fancy data structures. */
+	for (i = 0; i < vcpu_e500->guest_tlb_size[1]; i++) {
+		struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i];
+		unsigned int tid;
+
+		if (!get_tlb_v(stlbe))
+			continue;
+
+		if (eend < get_tlb_eaddr(stlbe))
+			continue;
+
+		if (eaddr > get_tlb_end(stlbe))
+			continue;
+
+		tid = get_tlb_tid(stlbe);
+		if (tid && (tid != pid))
+			continue;
+
+		kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i);
+		write_host_tlbe(vcpu_e500, 1, i);
+	}
+}
+
+static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
+		unsigned int eaddr, int as)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	unsigned int victim, pidsel, tsized;
+	int tlbsel;
+
+	/* since we only have tow TLBs, only lower bit is used. */
+	tlbsel = (vcpu_e500->mas4 >> 28) & 0x1;
+	victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
+	pidsel = (vcpu_e500->mas4 >> 16) & 0xf;
+	tsized = (vcpu_e500->mas4 >> 8) & 0xf;
+
+	vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
+		| MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
+	vcpu_e500->mas1 = MAS1_VALID | (as ? MAS1_TS : 0)
+		| MAS1_TID(vcpu_e500->pid[pidsel])
+		| MAS1_TSIZE(tsized);
+	vcpu_e500->mas2 = (eaddr & MAS2_EPN)
+		| (vcpu_e500->mas4 & MAS2_ATTRIB_MASK);
+	vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
+	vcpu_e500->mas6 = (vcpu_e500->mas6 & MAS6_SPID1)
+		| (get_cur_pid(vcpu) << 16)
+		| (as ? MAS6_SAS : 0);
+	vcpu_e500->mas7 = 0;
+}
+
+static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+	u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel)
+{
+	struct page *new_page;
+	struct tlbe *stlbe;
+	hpa_t hpaddr;
+
+	stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
+
+	/* Get reference to new page. */
+	new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn);
+	if (is_error_page(new_page)) {
+		printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
+		kvm_release_page_clean(new_page);
+		return;
+	}
+	hpaddr = page_to_phys(new_page);
+
+	/* Drop reference to old page. */
+	kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
+
+	vcpu_e500->shadow_pages[tlbsel][esel] = new_page;
+
+	/* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */
+	stlbe->mas1 = MAS1_TSIZE(BOOKE_PAGESZ_4K)
+		| MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
+	stlbe->mas2 = (gvaddr & MAS2_EPN)
+		| e500_shadow_mas2_attrib(gtlbe->mas2,
+				vcpu_e500->vcpu.arch.msr & MSR_PR);
+	stlbe->mas3 = (hpaddr & MAS3_RPN)
+		| e500_shadow_mas3_attrib(gtlbe->mas3,
+				vcpu_e500->vcpu.arch.msr & MSR_PR);
+	stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN;
+
+	KVMTRACE_5D(STLB_WRITE, &vcpu_e500->vcpu, index_of(tlbsel, esel),
+			stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7,
+			handler);
+}
+
+/* XXX only map the one-one case, for now use TLB0 */
+static int kvmppc_e500_stlbe_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+		int tlbsel, int esel)
+{
+	struct tlbe *gtlbe;
+
+	gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+
+	kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe),
+			get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
+			gtlbe, tlbsel, esel);
+
+	return esel;
+}
+
+/* Caller must ensure that the specified guest TLB entry is safe to insert into
+ * the shadow TLB. */
+/* XXX for both one-one and one-to-many , for now use TLB1 */
+static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+		u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe)
+{
+	unsigned int victim;
+
+	victim = vcpu_e500->guest_tlb_nv[1]++;
+
+	if (unlikely(vcpu_e500->guest_tlb_nv[1] >= tlb1_max_shadow_size()))
+		vcpu_e500->guest_tlb_nv[1] = 0;
+
+	kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, victim);
+
+	return victim;
+}
+
+/* Invalidate all guest kernel mappings when enter usermode,
+ * so that when they fault back in they will get the
+ * proper permission bits. */
+void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
+{
+	if (usermode) {
+		struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+		int i;
+
+		/* XXX Replace loop with fancy data structures. */
+		/* needn't set modified since tlbia will make TLB1 coherent */
+		for (i = 0; i < tlb1_max_shadow_size(); i++)
+			kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i);
+
+		_tlbia();
+	}
+}
+
+static int kvmppc_e500_gtlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
+		int tlbsel, int esel)
+{
+	struct tlbe *gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+
+	if (unlikely(get_tlb_iprot(gtlbe)))
+		return -1;
+
+	if (tlbsel == 1) {
+		kvmppc_e500_tlb1_invalidate(vcpu_e500, get_tlb_eaddr(gtlbe),
+				get_tlb_end(gtlbe),
+				get_tlb_tid(gtlbe));
+	} else {
+		kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel);
+	}
+
+	gtlbe->mas1 = 0;
+
+	return 0;
+}
+
+int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	unsigned int ia;
+	int esel, tlbsel;
+	gva_t ea;
+
+	ea = ((ra) ? vcpu->arch.gpr[ra] : 0) + vcpu->arch.gpr[rb];
+
+	ia = (ea >> 2) & 0x1;
+
+	/* since we only have tow TLBs, only lower bit is used. */
+	tlbsel = (ea >> 3) & 0x1;
+
+	if (ia) {
+		/* invalidate all entries */
+		for (esel = 0; esel < vcpu_e500->guest_tlb_size[tlbsel]; esel++)
+			kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
+	} else {
+		ea &= 0xfffff000;
+		esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel,
+				get_cur_pid(vcpu), -1);
+		if (esel >= 0)
+			kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
+	}
+
+	_tlbia();
+
+	return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	int tlbsel, esel;
+	struct tlbe *gtlbe;
+
+	tlbsel = get_tlb_tlbsel(vcpu_e500);
+	esel = get_tlb_esel(vcpu_e500, tlbsel);
+
+	gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+	vcpu_e500->mas0 &= MAS0_NV(0);
+	vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
+	vcpu_e500->mas1 = gtlbe->mas1;
+	vcpu_e500->mas2 = gtlbe->mas2;
+	vcpu_e500->mas3 = gtlbe->mas3;
+	vcpu_e500->mas7 = gtlbe->mas7;
+
+	return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	int as = !!get_cur_sas(vcpu_e500);
+	unsigned int pid = get_cur_spid(vcpu_e500);
+	int esel, tlbsel;
+	struct tlbe *gtlbe = NULL;
+	gva_t ea;
+
+	ea = vcpu->arch.gpr[rb];
+
+	for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+		esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
+		if (esel >= 0) {
+			gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+			break;
+		}
+	}
+
+	if (gtlbe) {
+		vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel)
+			| MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
+		vcpu_e500->mas1 = gtlbe->mas1;
+		vcpu_e500->mas2 = gtlbe->mas2;
+		vcpu_e500->mas3 = gtlbe->mas3;
+		vcpu_e500->mas7 = gtlbe->mas7;
+	} else {
+		int victim;
+
+		/* since we only have tow TLBs, only lower bit is used. */
+		tlbsel = vcpu_e500->mas4 >> 28 & 0x1;
+		victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
+
+		vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
+			| MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
+		vcpu_e500->mas1 = (vcpu_e500->mas6 & MAS6_SPID0)
+			| (vcpu_e500->mas6 & (MAS6_SAS ? MAS1_TS : 0))
+			| (vcpu_e500->mas4 & MAS4_TSIZED(~0));
+		vcpu_e500->mas2 &= MAS2_EPN;
+		vcpu_e500->mas2 |= vcpu_e500->mas4 & MAS2_ATTRIB_MASK;
+		vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
+		vcpu_e500->mas7 = 0;
+	}
+
+	return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	u64 eaddr;
+	u64 raddr;
+	u32 tid;
+	struct tlbe *gtlbe;
+	int tlbsel, esel, stlbsel, sesel;
+
+	tlbsel = get_tlb_tlbsel(vcpu_e500);
+	esel = get_tlb_esel(vcpu_e500, tlbsel);
+
+	gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+
+	if (get_tlb_v(gtlbe) && tlbsel == 1) {
+		eaddr = get_tlb_eaddr(gtlbe);
+		tid = get_tlb_tid(gtlbe);
+		kvmppc_e500_tlb1_invalidate(vcpu_e500, eaddr,
+				get_tlb_end(gtlbe), tid);
+	}
+
+	gtlbe->mas1 = vcpu_e500->mas1;
+	gtlbe->mas2 = vcpu_e500->mas2;
+	gtlbe->mas3 = vcpu_e500->mas3;
+	gtlbe->mas7 = vcpu_e500->mas7;
+
+	KVMTRACE_5D(GTLB_WRITE, vcpu, vcpu_e500->mas0,
+			gtlbe->mas1, gtlbe->mas2, gtlbe->mas3, gtlbe->mas7,
+			handler);
+
+	/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
+	if (tlbe_is_host_safe(vcpu, gtlbe)) {
+		switch (tlbsel) {
+		case 0:
+			/* TLB0 */
+			gtlbe->mas1 &= ~MAS1_TSIZE(~0);
+			gtlbe->mas1 |= MAS1_TSIZE(BOOKE_PAGESZ_4K);
+
+			stlbsel = 0;
+			sesel = kvmppc_e500_stlbe_map(vcpu_e500, 0, esel);
+
+			break;
+
+		case 1:
+			/* TLB1 */
+			eaddr = get_tlb_eaddr(gtlbe);
+			raddr = get_tlb_raddr(gtlbe);
+
+			/* Create a 4KB mapping on the host.
+			 * If the guest wanted a large page,
+			 * only the first 4KB is mapped here and the rest
+			 * are mapped on the fly. */
+			stlbsel = 1;
+			sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr,
+					raddr >> PAGE_SHIFT, gtlbe);
+			break;
+
+		default:
+			BUG();
+		}
+		write_host_tlbe(vcpu_e500, stlbsel, sesel);
+	}
+
+	return EMULATE_DONE;
+}
+
+int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+{
+	unsigned int as = !!(vcpu->arch.msr & MSR_IS);
+
+	return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as);
+}
+
+int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+{
+	unsigned int as = !!(vcpu->arch.msr & MSR_DS);
+
+	return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as);
+}
+
+void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu)
+{
+	unsigned int as = !!(vcpu->arch.msr & MSR_IS);
+
+	kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.pc, as);
+}
+
+void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu)
+{
+	unsigned int as = !!(vcpu->arch.msr & MSR_DS);
+
+	kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.fault_dear, as);
+}
+
+gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
+			gva_t eaddr)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	struct tlbe *gtlbe =
+		&vcpu_e500->guest_tlb[tlbsel_of(index)][esel_of(index)];
+	u64 pgmask = get_tlb_bytes(gtlbe) - 1;
+
+	return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
+}
+
+void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	int tlbsel, i;
+
+	for (tlbsel = 0; tlbsel < 2; tlbsel++)
+		for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++)
+			kvmppc_e500_shadow_release(vcpu_e500, tlbsel, i);
+
+	/* discard all guest mapping */
+	_tlbia();
+}
+
+void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
+			unsigned int index)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	int tlbsel = tlbsel_of(index);
+	int esel = esel_of(index);
+	int stlbsel, sesel;
+
+	switch (tlbsel) {
+	case 0:
+		stlbsel = 0;
+		sesel = esel;
+		break;
+
+	case 1: {
+		gfn_t gfn = gpaddr >> PAGE_SHIFT;
+		struct tlbe *gtlbe
+			= &vcpu_e500->guest_tlb[tlbsel][esel];
+
+		stlbsel = 1;
+		sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe);
+		break;
+	}
+
+	default:
+		BUG();
+		break;
+	}
+	write_host_tlbe(vcpu_e500, stlbsel, sesel);
+}
+
+int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
+				gva_t eaddr, unsigned int pid, int as)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	int esel, tlbsel;
+
+	for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+		esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as);
+		if (esel >= 0)
+			return index_of(tlbsel, esel);
+	}
+
+	return -1;
+}
+
+void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	struct tlbe *tlbe;
+
+	/* Insert large initial mapping for guest. */
+	tlbe = &vcpu_e500->guest_tlb[1][0];
+	tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_256M);
+	tlbe->mas2 = 0;
+	tlbe->mas3 = E500_TLB_SUPER_PERM_MASK;
+	tlbe->mas7 = 0;
+
+	/* 4K map for serial output. Used by kernel wrapper. */
+	tlbe = &vcpu_e500->guest_tlb[1][1];
+	tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_4K);
+	tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
+	tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
+	tlbe->mas7 = 0;
+}
+
+int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	tlb1_entry_num = mfspr(SPRN_TLB1CFG) & 0xFFF;
+
+	vcpu_e500->guest_tlb_size[0] = KVM_E500_TLB0_SIZE;
+	vcpu_e500->guest_tlb[0] =
+		kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
+	if (vcpu_e500->guest_tlb[0] == NULL)
+		goto err_out;
+
+	vcpu_e500->shadow_tlb_size[0] = KVM_E500_TLB0_SIZE;
+	vcpu_e500->shadow_tlb[0] =
+		kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
+	if (vcpu_e500->shadow_tlb[0] == NULL)
+		goto err_out_guest0;
+
+	vcpu_e500->guest_tlb_size[1] = KVM_E500_TLB1_SIZE;
+	vcpu_e500->guest_tlb[1] =
+		kzalloc(sizeof(struct tlbe) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
+	if (vcpu_e500->guest_tlb[1] == NULL)
+		goto err_out_shadow0;
+
+	vcpu_e500->shadow_tlb_size[1] = tlb1_entry_num;
+	vcpu_e500->shadow_tlb[1] =
+		kzalloc(sizeof(struct tlbe) * tlb1_entry_num, GFP_KERNEL);
+	if (vcpu_e500->shadow_tlb[1] == NULL)
+		goto err_out_guest1;
+
+	vcpu_e500->shadow_pages[0] = (struct page **)
+		kzalloc(sizeof(struct page *) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
+	if (vcpu_e500->shadow_pages[0] == NULL)
+		goto err_out_shadow1;
+
+	vcpu_e500->shadow_pages[1] = (struct page **)
+		kzalloc(sizeof(struct page *) * tlb1_entry_num, GFP_KERNEL);
+	if (vcpu_e500->shadow_pages[1] == NULL)
+		goto err_out_page0;
+
+	return 0;
+
+err_out_page0:
+	kfree(vcpu_e500->shadow_pages[0]);
+err_out_shadow1:
+	kfree(vcpu_e500->shadow_tlb[1]);
+err_out_guest1:
+	kfree(vcpu_e500->guest_tlb[1]);
+err_out_shadow0:
+	kfree(vcpu_e500->shadow_tlb[0]);
+err_out_guest0:
+	kfree(vcpu_e500->guest_tlb[0]);
+err_out:
+	return -1;
+}
+
+void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	kfree(vcpu_e500->shadow_pages[1]);
+	kfree(vcpu_e500->shadow_pages[0]);
+	kfree(vcpu_e500->shadow_tlb[1]);
+	kfree(vcpu_e500->guest_tlb[1]);
+	kfree(vcpu_e500->shadow_tlb[0]);
+	kfree(vcpu_e500->guest_tlb[0]);
+}
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
new file mode 100644
index 00000000000..d8833f95516
--- /dev/null
+++ b/arch/powerpc/kvm/e500_tlb.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, yu.liu@freescale.com
+ *
+ * Description:
+ * This file is based on arch/powerpc/kvm/44x_tlb.h,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __KVM_E500_TLB_H__
+#define __KVM_E500_TLB_H__
+
+#include <linux/kvm_host.h>
+#include <asm/mmu-fsl-booke.h>
+#include <asm/tlb.h>
+#include <asm/kvm_e500.h>
+
+#define KVM_E500_TLB0_WAY_SIZE_BIT	7	/* Fixed */
+#define KVM_E500_TLB0_WAY_SIZE		(1UL << KVM_E500_TLB0_WAY_SIZE_BIT)
+#define KVM_E500_TLB0_WAY_SIZE_MASK	(KVM_E500_TLB0_WAY_SIZE - 1)
+
+#define KVM_E500_TLB0_WAY_NUM_BIT	1	/* No greater than 7 */
+#define KVM_E500_TLB0_WAY_NUM		(1UL << KVM_E500_TLB0_WAY_NUM_BIT)
+#define KVM_E500_TLB0_WAY_NUM_MASK	(KVM_E500_TLB0_WAY_NUM - 1)
+
+#define KVM_E500_TLB0_SIZE  (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM)
+#define KVM_E500_TLB1_SIZE  16
+
+#define index_of(tlbsel, esel)	(((tlbsel) << 16) | ((esel) & 0xFFFF))
+#define tlbsel_of(index)	((index) >> 16)
+#define esel_of(index)		((index) & 0xFFFF)
+
+#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW)
+#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW)
+#define MAS2_ATTRIB_MASK \
+	  (MAS2_X0 | MAS2_X1 | MAS2_W | MAS2_I | MAS2_M | MAS2_G | MAS2_E)
+#define MAS3_ATTRIB_MASK \
+	  (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \
+	   | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)
+
+extern void kvmppc_dump_tlbs(struct kvm_vcpu *);
+extern int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *);
+extern int kvmppc_e500_emul_tlbre(struct kvm_vcpu *);
+extern int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *, int, int);
+extern int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *, int);
+extern int kvmppc_e500_tlb_search(struct kvm_vcpu *, gva_t, unsigned int, int);
+extern void kvmppc_e500_tlb_put(struct kvm_vcpu *);
+extern void kvmppc_e500_tlb_load(struct kvm_vcpu *, int);
+extern int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *);
+extern void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *);
+extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
+
+/* TLB helper functions */
+static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
+{
+	return (tlbe->mas1 >> 8) & 0xf;
+}
+
+static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
+{
+	return tlbe->mas2 & 0xfffff000;
+}
+
+static inline u64 get_tlb_bytes(const struct tlbe *tlbe)
+{
+	unsigned int pgsize = get_tlb_size(tlbe);
+	return 1ULL << 10 << (pgsize << 1);
+}
+
+static inline gva_t get_tlb_end(const struct tlbe *tlbe)
+{
+	u64 bytes = get_tlb_bytes(tlbe);
+	return get_tlb_eaddr(tlbe) + bytes - 1;
+}
+
+static inline u64 get_tlb_raddr(const struct tlbe *tlbe)
+{
+	u64 rpn = tlbe->mas7;
+	return (rpn << 32) | (tlbe->mas3 & 0xfffff000);
+}
+
+static inline unsigned int get_tlb_tid(const struct tlbe *tlbe)
+{
+	return (tlbe->mas1 >> 16) & 0xff;
+}
+
+static inline unsigned int get_tlb_ts(const struct tlbe *tlbe)
+{
+	return (tlbe->mas1 >> 12) & 0x1;
+}
+
+static inline unsigned int get_tlb_v(const struct tlbe *tlbe)
+{
+	return (tlbe->mas1 >> 31) & 0x1;
+}
+
+static inline unsigned int get_tlb_iprot(const struct tlbe *tlbe)
+{
+	return (tlbe->mas1 >> 30) & 0x1;
+}
+
+static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.pid & 0xff;
+}
+
+static inline unsigned int get_cur_spid(
+		const struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	return (vcpu_e500->mas6 >> 16) & 0xff;
+}
+
+static inline unsigned int get_cur_sas(
+		const struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	return vcpu_e500->mas6 & 0x1;
+}
+
+static inline unsigned int get_tlb_tlbsel(
+		const struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	/*
+	 * Manual says that tlbsel has 2 bits wide.
+	 * Since we only have tow TLBs, only lower bit is used.
+	 */
+	return (vcpu_e500->mas0 >> 28) & 0x1;
+}
+
+static inline unsigned int get_tlb_nv_bit(
+		const struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	return vcpu_e500->mas0 & 0xfff;
+}
+
+static inline unsigned int get_tlb_esel_bit(
+		const struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	return (vcpu_e500->mas0 >> 16) & 0xfff;
+}
+
+static inline unsigned int get_tlb_esel(
+		const struct kvmppc_vcpu_e500 *vcpu_e500,
+		int tlbsel)
+{
+	unsigned int esel = get_tlb_esel_bit(vcpu_e500);
+
+	if (tlbsel == 0) {
+		esel &= KVM_E500_TLB0_WAY_NUM_MASK;
+		esel |= ((vcpu_e500->mas2 >> 12) & KVM_E500_TLB0_WAY_SIZE_MASK)
+				<< KVM_E500_TLB0_WAY_NUM_BIT;
+	} else {
+		esel &= KVM_E500_TLB1_SIZE - 1;
+	}
+
+	return esel;
+}
+
+static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
+			const struct tlbe *tlbe)
+{
+	gpa_t gpa;
+
+	if (!get_tlb_v(tlbe))
+		return 0;
+
+	/* Does it match current guest AS? */
+	/* XXX what about IS != DS? */
+	if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
+		return 0;
+
+	gpa = get_tlb_raddr(tlbe);
+	if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
+		/* Mapping is not for RAM. */
+		return 0;
+
+	return 1;
+}
+
+#endif /* __KVM_E500_TLB_H__ */
-- 
cgit v1.2.3


From b52a638c391c5c7b013180f5374274698b8535c8 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Sat, 3 Jan 2009 16:23:11 -0600
Subject: KVM: ppc: Add kvmppc_mmu_dtlb/itlb_miss for booke

When itlb or dtlb miss happens, E500 needs to update some mmu registers.
So that the auto-load mechanism can work on E500 when write a tlb entry.

Signed-off-by: Liu Yu <yu.liu@freescale.com>
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_ppc.h | 2 ++
 arch/powerpc/kvm/44x_tlb.c         | 8 ++++++++
 arch/powerpc/kvm/booke.c           | 2 ++
 3 files changed, 12 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 5e80a20f32b..6052779dbb5 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -63,6 +63,8 @@ extern int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
 extern int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
 extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
                               gva_t eaddr);
+extern void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu);
+extern void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu);
 
 extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
                                                 unsigned int id);
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index e67b7313ffc..4a16f472cc1 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -232,6 +232,14 @@ int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
 	return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
 }
 
+void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu)
+{
+}
+
 static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
                                       unsigned int stlb_index)
 {
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index a73b3959e41..933c406c791 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -295,6 +295,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
 			vcpu->arch.dear = vcpu->arch.fault_dear;
 			vcpu->arch.esr = vcpu->arch.fault_esr;
+			kvmppc_mmu_dtlb_miss(vcpu);
 			kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
 			r = RESUME_GUEST;
 			break;
@@ -337,6 +338,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		if (gtlb_index < 0) {
 			/* The guest didn't have a mapping for it. */
 			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
+			kvmppc_mmu_itlb_miss(vcpu);
 			kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS);
 			break;
 		}
-- 
cgit v1.2.3


From bdc89f13ec955c14777d5caf02dfca3f51d639bd Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Sat, 3 Jan 2009 16:23:12 -0600
Subject: KVM: ppc: distinguish between interrupts and priorities

Although BOOKE_MAX_INTERRUPT has the right value, the meaning is not match.

Signed-off-by: Liu Yu <yu.liu@freescale.com>
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kvm/booke.c | 2 +-
 arch/powerpc/kvm/booke.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 933c406c791..f192fbee2f2 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -163,7 +163,7 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
 	unsigned int priority;
 
 	priority = __ffs(*pending);
-	while (priority <= BOOKE_MAX_INTERRUPT) {
+	while (priority <= BOOKE_IRQPRIO_MAX) {
 		if (kvmppc_booke_irqprio_deliver(vcpu, priority))
 			break;
 
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index 311fdbc23fb..7ceeb3e739b 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -42,6 +42,7 @@
 #define BOOKE_IRQPRIO_EXTERNAL 13
 #define BOOKE_IRQPRIO_FIT 14
 #define BOOKE_IRQPRIO_DECREMENTER 15
+#define BOOKE_IRQPRIO_MAX 15
 
 /* Helper function for "full" MSR writes. No need to call this if only EE is
  * changing. */
-- 
cgit v1.2.3


From bb3a8a178dec1e46df3138a30f76acf67fe12397 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Sat, 3 Jan 2009 16:23:13 -0600
Subject: KVM: ppc: Add extra E500 exceptions

e500 has additional interrupt vectors (and corresponding IVORs) for SPE and
performance monitoring interrupts.

Signed-off-by: Liu Yu <yu.liu@freescale.com>
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_asm.h  |  7 ++++++-
 arch/powerpc/include/asm/kvm_host.h |  2 +-
 arch/powerpc/kvm/booke.c            | 18 ++++++++++++++++++
 arch/powerpc/kvm/booke.h            | 30 ++++++++++++++++++------------
 arch/powerpc/kvm/booke_interrupts.S |  3 +++
 arch/powerpc/kvm/e500.c             | 20 +++++++++++++++++++-
 arch/powerpc/kvm/e500_emulate.c     | 27 +++++++++++++++++++++++++++
 7 files changed, 92 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 2197764796d..56bfae59837 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -42,7 +42,12 @@
 #define BOOKE_INTERRUPT_DTLB_MISS 13
 #define BOOKE_INTERRUPT_ITLB_MISS 14
 #define BOOKE_INTERRUPT_DEBUG 15
-#define BOOKE_MAX_INTERRUPT 15
+
+/* E500 */
+#define BOOKE_INTERRUPT_SPE_UNAVAIL 32
+#define BOOKE_INTERRUPT_SPE_FP_DATA 33
+#define BOOKE_INTERRUPT_SPE_FP_ROUND 34
+#define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35
 
 #define RESUME_FLAG_NV          (1<<0)  /* Reload guest nonvolatile state? */
 #define RESUME_FLAG_HOST        (1<<1)  /* Resume host? */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 50e5ce1b400..1c618769752 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -150,7 +150,7 @@ struct kvm_vcpu_arch {
 	u32 tbu;
 	u32 tcr;
 	u32 tsr;
-	u32 ivor[16];
+	u32 ivor[64];
 	ulong ivpr;
 	u32 pir;
 
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index f192fbee2f2..642e4204cf2 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -118,6 +118,9 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 	case BOOKE_IRQPRIO_DATA_STORAGE:
 	case BOOKE_IRQPRIO_INST_STORAGE:
 	case BOOKE_IRQPRIO_FP_UNAVAIL:
+	case BOOKE_IRQPRIO_SPE_UNAVAIL:
+	case BOOKE_IRQPRIO_SPE_FP_DATA:
+	case BOOKE_IRQPRIO_SPE_FP_ROUND:
 	case BOOKE_IRQPRIO_AP_UNAVAIL:
 	case BOOKE_IRQPRIO_ALIGNMENT:
 		allowed = 1;
@@ -261,6 +264,21 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		r = RESUME_GUEST;
 		break;
 
+	case BOOKE_INTERRUPT_SPE_UNAVAIL:
+		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_UNAVAIL);
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_SPE_FP_DATA:
+		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_DATA);
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_SPE_FP_ROUND:
+		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND);
+		r = RESUME_GUEST;
+		break;
+
 	case BOOKE_INTERRUPT_DATA_STORAGE:
 		vcpu->arch.dear = vcpu->arch.fault_dear;
 		vcpu->arch.esr = vcpu->arch.fault_esr;
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index 7ceeb3e739b..d59bcca1f9d 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -31,18 +31,24 @@
 #define BOOKE_IRQPRIO_ALIGNMENT 2
 #define BOOKE_IRQPRIO_PROGRAM 3
 #define BOOKE_IRQPRIO_FP_UNAVAIL 4
-#define BOOKE_IRQPRIO_SYSCALL 5
-#define BOOKE_IRQPRIO_AP_UNAVAIL 6
-#define BOOKE_IRQPRIO_DTLB_MISS 7
-#define BOOKE_IRQPRIO_ITLB_MISS 8
-#define BOOKE_IRQPRIO_MACHINE_CHECK 9
-#define BOOKE_IRQPRIO_DEBUG 10
-#define BOOKE_IRQPRIO_CRITICAL 11
-#define BOOKE_IRQPRIO_WATCHDOG 12
-#define BOOKE_IRQPRIO_EXTERNAL 13
-#define BOOKE_IRQPRIO_FIT 14
-#define BOOKE_IRQPRIO_DECREMENTER 15
-#define BOOKE_IRQPRIO_MAX 15
+#define BOOKE_IRQPRIO_SPE_UNAVAIL 5
+#define BOOKE_IRQPRIO_SPE_FP_DATA 6
+#define BOOKE_IRQPRIO_SPE_FP_ROUND 7
+#define BOOKE_IRQPRIO_SYSCALL 8
+#define BOOKE_IRQPRIO_AP_UNAVAIL 9
+#define BOOKE_IRQPRIO_DTLB_MISS 10
+#define BOOKE_IRQPRIO_ITLB_MISS 11
+#define BOOKE_IRQPRIO_MACHINE_CHECK 12
+#define BOOKE_IRQPRIO_DEBUG 13
+#define BOOKE_IRQPRIO_CRITICAL 14
+#define BOOKE_IRQPRIO_WATCHDOG 15
+#define BOOKE_IRQPRIO_EXTERNAL 16
+#define BOOKE_IRQPRIO_FIT 17
+#define BOOKE_IRQPRIO_DECREMENTER 18
+#define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19
+#define BOOKE_IRQPRIO_MAX 19
+
+extern unsigned long kvmppc_booke_handlers;
 
 /* Helper function for "full" MSR writes. No need to call this if only EE is
  * changing. */
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 4679ec287e6..d0c6f841bbd 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -86,6 +86,9 @@ KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG
 KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS
 KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS
 KVM_HANDLER BOOKE_INTERRUPT_DEBUG
+KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL
+KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA
+KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND
 
 _GLOBAL(kvmppc_handler_len)
 	.long kvmppc_handler_1 - kvmppc_handler_0
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 7992da497cd..d8067fd81cd 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -21,6 +21,7 @@
 #include <asm/kvm_e500.h>
 #include <asm/kvm_ppc.h>
 
+#include "booke.h"
 #include "e500_tlb.h"
 
 void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
@@ -133,12 +134,29 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 
 static int kvmppc_e500_init(void)
 {
-	int r;
+	int r, i;
+	unsigned long ivor[3];
+	unsigned long max_ivor = 0;
 
 	r = kvmppc_booke_init();
 	if (r)
 		return r;
 
+	/* copy extra E500 exception handlers */
+	ivor[0] = mfspr(SPRN_IVOR32);
+	ivor[1] = mfspr(SPRN_IVOR33);
+	ivor[2] = mfspr(SPRN_IVOR34);
+	for (i = 0; i < 3; i++) {
+		if (ivor[i] > max_ivor)
+			max_ivor = ivor[i];
+
+		memcpy((void *)kvmppc_booke_handlers + ivor[i],
+		       kvmppc_handlers_start + (i + 16) * kvmppc_handler_len,
+		       kvmppc_handler_len);
+	}
+	flush_icache_range(kvmppc_booke_handlers,
+			kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
+
 	return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), THIS_MODULE);
 }
 
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index a47f44cd2cf..d3c0c7c7f20 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -107,6 +107,20 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 	case SPRN_HID1:
 		vcpu_e500->hid1 = vcpu->arch.gpr[rs]; break;
 
+	/* extra exceptions */
+	case SPRN_IVOR32:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR33:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR34:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR35:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = vcpu->arch.gpr[rs];
+		break;
+
 	default:
 		emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
 	}
@@ -160,6 +174,19 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 	case SPRN_HID1:
 		vcpu->arch.gpr[rt] = vcpu_e500->hid1; break;
 
+	/* extra exceptions */
+	case SPRN_IVOR32:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
+		break;
+	case SPRN_IVOR33:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
+		break;
+	case SPRN_IVOR34:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
+		break;
+	case SPRN_IVOR35:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
+		break;
 	default:
 		emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
 	}
-- 
cgit v1.2.3


From 1872a3f411ffe95c8e92300e0986a3532db55ce9 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 4 Jan 2009 23:26:52 +0200
Subject: KVM: VMX: Fix guest state validity checks

The vmx guest state validity checks are full of bugs.  Make them
conform to the manual.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3312047664a..be9441056ff 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1784,14 +1784,16 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu)
 	vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
 	cs_rpl = cs.selector & SELECTOR_RPL_MASK;
 
+	if (cs.unusable)
+		return false;
 	if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK))
 		return false;
 	if (!cs.s)
 		return false;
-	if (!(~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK))) {
+	if (cs.type & AR_TYPE_WRITEABLE_MASK) {
 		if (cs.dpl > cs_rpl)
 			return false;
-	} else if (cs.type & AR_TYPE_CODE_MASK) {
+	} else {
 		if (cs.dpl != cs_rpl)
 			return false;
 	}
@@ -1810,7 +1812,9 @@ static bool stack_segment_valid(struct kvm_vcpu *vcpu)
 	vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
 	ss_rpl = ss.selector & SELECTOR_RPL_MASK;
 
-	if ((ss.type != 3) || (ss.type != 7))
+	if (ss.unusable)
+		return true;
+	if (ss.type != 3 && ss.type != 7)
 		return false;
 	if (!ss.s)
 		return false;
@@ -1830,6 +1834,8 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
 	vmx_get_segment(vcpu, &var, seg);
 	rpl = var.selector & SELECTOR_RPL_MASK;
 
+	if (var.unusable)
+		return true;
 	if (!var.s)
 		return false;
 	if (!var.present)
@@ -1851,9 +1857,11 @@ static bool tr_valid(struct kvm_vcpu *vcpu)
 
 	vmx_get_segment(vcpu, &tr, VCPU_SREG_TR);
 
+	if (tr.unusable)
+		return false;
 	if (tr.selector & SELECTOR_TI_MASK)	/* TI = 1 */
 		return false;
-	if ((tr.type != 3) || (tr.type != 11)) /* TODO: Check if guest is in IA32e mode */
+	if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */
 		return false;
 	if (!tr.present)
 		return false;
@@ -1867,6 +1875,8 @@ static bool ldtr_valid(struct kvm_vcpu *vcpu)
 
 	vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR);
 
+	if (ldtr.unusable)
+		return true;
 	if (ldtr.selector & SELECTOR_TI_MASK)	/* TI = 1 */
 		return false;
 	if (ldtr.type != 2)
-- 
cgit v1.2.3


From 9fd4a3b7a412f983696b23121413a79d2132fed6 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 4 Jan 2009 23:43:42 +0200
Subject: KVM: VMX: don't clobber segment AR if emulating invalid state

The ususable bit is important for determining state validity; don't
clobber it.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index be9441056ff..a6598cbaa00 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1649,7 +1649,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
 	var->limit = vmcs_read32(sf->limit);
 	var->selector = vmcs_read16(sf->selector);
 	ar = vmcs_read32(sf->ar_bytes);
-	if (ar & AR_UNUSABLE_MASK)
+	if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
 		ar = 0;
 	var->type = ar & 15;
 	var->s = (ar >> 4) & 1;
-- 
cgit v1.2.3


From 10f32d84c750ccf8c0afb3a4ea9d4059aa3e9ffc Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 5 Jan 2009 00:53:19 +0200
Subject: KVM: VMX: Prevent exit handler from running if emulating due to
 invalid state

If we've just emulated an instruction, we won't have any valid exit
reason and associated information.

Fix by moving the clearing of the emulation_required flag to the exit handler.
This way the exit handler can notice that we've been emulating and abort
early.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a6598cbaa00..a309be6788e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3130,7 +3130,6 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
 				struct kvm_run *kvm_run)
 {
-	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	int err;
 
 	preempt_enable();
@@ -3155,11 +3154,6 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
 
 	local_irq_disable();
 	preempt_disable();
-
-	/* Guest state should be valid now except if we need to
-	 * emulate an MMIO */
-	if (guest_state_valid(vcpu))
-		vmx->emulation_required = 0;
 }
 
 /*
@@ -3208,8 +3202,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
 	/* If we need to emulate an MMIO from handle_invalid_guest_state
 	 * we just return 0 */
-	if (vmx->emulation_required && emulate_invalid_guest_state)
+	if (vmx->emulation_required && emulate_invalid_guest_state) {
+		if (guest_state_valid(vcpu))
+			vmx->emulation_required = 0;
 		return 0;
+	}
 
 	/* Access CR3 don't cause VMExit in paging mode, so we need
 	 * to sync with guest real CR3. */
-- 
cgit v1.2.3


From 350f69dcd169d536307aa4a8c38c480e3a51c0db Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 5 Jan 2009 11:12:40 +0200
Subject: KVM: x86 emulator: Make emulate_pop() a little more generic

Allow emulate_pop() to read into arbitrary memory rather than just the
source operand.  Needed for complicated instructions like far returns.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86_emulate.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 54fb09889a8..94459f313f1 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1136,18 +1136,19 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
 }
 
 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
-		       struct x86_emulate_ops *ops)
+		       struct x86_emulate_ops *ops,
+		       void *dest, int len)
 {
 	struct decode_cache *c = &ctxt->decode;
 	int rc;
 
 	rc = ops->read_emulated(register_address(c, ss_base(ctxt),
 						 c->regs[VCPU_REGS_RSP]),
-				&c->src.val, c->src.bytes, ctxt->vcpu);
+				dest, len, ctxt->vcpu);
 	if (rc != 0)
 		return rc;
 
-	register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes);
+	register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
 	return rc;
 }
 
@@ -1157,11 +1158,9 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
 	struct decode_cache *c = &ctxt->decode;
 	int rc;
 
-	c->src.bytes = c->dst.bytes;
-	rc = emulate_pop(ctxt, ops);
+	rc = emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
 	if (rc != 0)
 		return rc;
-	c->dst.val = c->src.val;
 	return 0;
 }
 
@@ -1467,11 +1466,9 @@ special_insn:
 		break;
 	case 0x58 ... 0x5f: /* pop reg */
 	pop_instruction:
-		c->src.bytes = c->op_bytes;
-		rc = emulate_pop(ctxt, ops);
+		rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
 		if (rc != 0)
 			goto done;
-		c->dst.val = c->src.val;
 		break;
 	case 0x63:		/* movsxd */
 		if (ctxt->mode != X86EMUL_MODE_PROT64)
-- 
cgit v1.2.3


From 8b3079a5c0c031de07c8390aa160a4229088274f Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 5 Jan 2009 12:10:54 +0200
Subject: KVM: VMX: When emulating on invalid vmx state, don't return to
 userspace unnecessarily

If we aren't doing mmio there's no need to exit to userspace (which will
just be confused).

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a309be6788e..df454de8acf 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -91,6 +91,7 @@ struct vcpu_vmx {
 	} rmode;
 	int vpid;
 	bool emulation_required;
+	enum emulation_result invalid_state_emulation_result;
 
 	/* Support for vnmi-less CPUs */
 	int soft_vnmi_blocked;
@@ -3130,7 +3131,8 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
 				struct kvm_run *kvm_run)
 {
-	int err;
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	enum emulation_result err = EMULATE_DONE;
 
 	preempt_enable();
 	local_irq_enable();
@@ -3154,6 +3156,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
 
 	local_irq_disable();
 	preempt_disable();
+
+	vmx->invalid_state_emulation_result = err;
 }
 
 /*
@@ -3205,7 +3209,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	if (vmx->emulation_required && emulate_invalid_guest_state) {
 		if (guest_state_valid(vcpu))
 			vmx->emulation_required = 0;
-		return 0;
+		return vmx->invalid_state_emulation_result != EMULATE_DO_MMIO;
 	}
 
 	/* Access CR3 don't cause VMExit in paging mode, so we need
-- 
cgit v1.2.3


From a77ab5ead5c1fef2c6c5a9b3cf3765e52643a2aa Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 5 Jan 2009 13:27:34 +0200
Subject: KVM: x86 emulator: implement 'ret far' instruction (opcode 0xcb)

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86_emulate.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 94459f313f1..ca91749d208 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -178,7 +178,7 @@ static u32 opcode_table[256] = {
 	0, ImplicitOps | Stack, 0, 0,
 	ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
 	/* 0xC8 - 0xCF */
-	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, ImplicitOps | Stack, 0, 0, 0, 0,
 	/* 0xD0 - 0xD7 */
 	ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
 	ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
@@ -1278,6 +1278,25 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
 	return 0;
 }
 
+static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
+			   struct x86_emulate_ops *ops)
+{
+	struct decode_cache *c = &ctxt->decode;
+	int rc;
+	unsigned long cs;
+
+	rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
+	if (rc)
+		return rc;
+	if (c->op_bytes == 4)
+		c->eip = (u32)c->eip;
+	rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
+	if (rc)
+		return rc;
+	rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, 1, VCPU_SREG_CS);
+	return rc;
+}
+
 static inline int writeback(struct x86_emulate_ctxt *ctxt,
 			    struct x86_emulate_ops *ops)
 {
@@ -1735,6 +1754,11 @@ special_insn:
 	mov:
 		c->dst.val = c->src.val;
 		break;
+	case 0xcb:		/* ret far */
+		rc = emulate_ret_far(ctxt, ops);
+		if (rc)
+			goto done;
+		break;
 	case 0xd0 ... 0xd1:	/* Grp2 */
 		c->src.val = 1;
 		emulate_grp2(ctxt);
-- 
cgit v1.2.3


From 269e05e48502f1cc06802e9fba90f5100dd6bb0d Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 5 Jan 2009 15:21:42 +0200
Subject: KVM: Properly lock PIT creation

Otherwise, two threads can create a PIT in parallel and cause a memory leak.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/i8254.c | 2 --
 arch/x86/kvm/x86.c   | 6 ++++++
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 528daadeba4..69d1bbff3fd 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -548,9 +548,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
 	if (!pit)
 		return NULL;
 
-	mutex_lock(&kvm->lock);
 	pit->irq_source_id = kvm_request_irq_source_id(kvm);
-	mutex_unlock(&kvm->lock);
 	if (pit->irq_source_id < 0) {
 		kfree(pit);
 		return NULL;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a1f14611f4b..6fbc3460337 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1837,10 +1837,16 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			goto out;
 		break;
 	case KVM_CREATE_PIT:
+		mutex_lock(&kvm->lock);
+		r = -EEXIST;
+		if (kvm->arch.vpit)
+			goto create_pit_unlock;
 		r = -ENOMEM;
 		kvm->arch.vpit = kvm_create_pit(kvm);
 		if (kvm->arch.vpit)
 			r = 0;
+	create_pit_unlock:
+		mutex_unlock(&kvm->lock);
 		break;
 	case KVM_IRQ_LINE: {
 		struct kvm_irq_level irq_event;
-- 
cgit v1.2.3


From f5d0906b5bafd7faea553ed1cc92bd06755b66b9 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Sun, 4 Jan 2009 13:51:09 -0600
Subject: KVM: ppc: remove debug support broken by KVM debug rewrite

After the rewrite of KVM's debug support, this code doesn't even build any
more.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_host.h |  5 ---
 arch/powerpc/include/asm/kvm_ppc.h  |  3 --
 arch/powerpc/kvm/44x.c              | 66 -------------------------------------
 arch/powerpc/kvm/powerpc.c          | 27 ++-------------
 4 files changed, 2 insertions(+), 99 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 1c618769752..dfdf13c9fef 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -111,11 +111,6 @@ struct kvm_arch {
 struct kvm_vcpu_arch {
 	u32 host_stack;
 	u32 host_pid;
-	u32 host_dbcr0;
-	u32 host_dbcr1;
-	u32 host_dbcr2;
-	u32 host_iac[4];
-	u32 host_msr;
 
 	u64 fpr[32];
 	ulong gpr[32];
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 6052779dbb5..2c6ee349df5 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -77,9 +77,6 @@ extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
 extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
 extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
 
-extern void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu);
-
 extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu);
 extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 8383603dd8a..0cef809cec2 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -28,72 +28,6 @@
 
 #include "44x_tlb.h"
 
-/* Note: clearing MSR[DE] just means that the debug interrupt will not be
- * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
- * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
- * will be delivered as an "imprecise debug event" (which is indicated by
- * DBSR[IDE].
- */
-static void kvm44x_disable_debug_interrupts(void)
-{
-	mtmsr(mfmsr() & ~MSR_DE);
-}
-
-void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
-{
-	kvm44x_disable_debug_interrupts();
-
-	mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
-	mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
-	mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
-	mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
-	mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
-	mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
-	mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
-	mtmsr(vcpu->arch.host_msr);
-}
-
-void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
-{
-	struct kvm_guest_debug *dbg = &vcpu->guest_debug;
-	u32 dbcr0 = 0;
-
-	vcpu->arch.host_msr = mfmsr();
-	kvm44x_disable_debug_interrupts();
-
-	/* Save host debug register state. */
-	vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
-	vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
-	vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
-	vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
-	vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
-	vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
-	vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
-
-	/* set registers up for guest */
-
-	if (dbg->bp[0]) {
-		mtspr(SPRN_IAC1, dbg->bp[0]);
-		dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
-	}
-	if (dbg->bp[1]) {
-		mtspr(SPRN_IAC2, dbg->bp[1]);
-		dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
-	}
-	if (dbg->bp[2]) {
-		mtspr(SPRN_IAC3, dbg->bp[2]);
-		dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
-	}
-	if (dbg->bp[3]) {
-		mtspr(SPRN_IAC4, dbg->bp[3]);
-		dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
-	}
-
-	mtspr(SPRN_DBCR0, dbcr0);
-	mtspr(SPRN_DBCR1, 0);
-	mtspr(SPRN_DBCR2, 0);
-}
-
 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	kvmppc_44x_tlb_load(vcpu);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index f30be9ef231..9057335fdc6 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -221,41 +221,18 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
-	if (vcpu->guest_debug.enabled)
-		kvmppc_core_load_guest_debugstate(vcpu);
-
 	kvmppc_core_vcpu_load(vcpu, cpu);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
-	if (vcpu->guest_debug.enabled)
-		kvmppc_core_load_host_debugstate(vcpu);
-
-	/* Don't leave guest TLB entries resident when being de-scheduled. */
-	/* XXX It would be nice to differentiate between heavyweight exit and
-	 * sched_out here, since we could avoid the TLB flush for heavyweight
-	 * exits. */
-	_tlbil_all();
 	kvmppc_core_vcpu_put(vcpu);
 }
 
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
-					struct kvm_guest_debug *dbg)
+                                        struct kvm_guest_debug *dbg)
 {
-	int i;
-
-	vcpu->guest_debug.enabled = dbg->enabled;
-	if (vcpu->guest_debug.enabled) {
-		for (i=0; i < ARRAY_SIZE(vcpu->guest_debug.bp); i++) {
-			if (dbg->breakpoints[i].enabled)
-				vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address;
-			else
-				vcpu->guest_debug.bp[i] = 0;
-		}
-	}
-
-	return 0;
+	return -EINVAL;
 }
 
 static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
-- 
cgit v1.2.3


From c8a73f186bf62235b6fb5dd52601d641917dd50b Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 5 Jan 2009 16:02:47 +0100
Subject: KVM: SVM: Add microcode patch level dummy

VMware ESX checks if the microcode level is correct when using a barcelona
CPU, in order to see if it actually can use SVM. Let's tell it we're on the
safe side...

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/svm.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b3a7a314d55..3a60c3f04e6 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1928,6 +1928,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
 	case MSR_VM_CR:
 		*data = 0;
 		break;
+	case MSR_IA32_UCODE_REV:
+		*data = 0x01000065;
+		break;
 	default:
 		return kvm_get_msr_common(vcpu, ecx, data);
 	}
-- 
cgit v1.2.3


From 4677a3b693e035f186e2875259b9a0bb94c42fbe Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 6 Jan 2009 13:00:27 +0200
Subject: KVM: MMU: Optimize page unshadowing

Using kvm_mmu_lookup_page() will result in multiple scans of the hash chains;
use hlist_for_each_entry_safe() to achieve a single scan instead.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 15850809b55..aac0499947d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1471,11 +1471,20 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 
 static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
 {
+	unsigned index;
+	struct hlist_head *bucket;
 	struct kvm_mmu_page *sp;
+	struct hlist_node *node, *nn;
 
-	while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) {
-		pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word);
-		kvm_mmu_zap_page(kvm, sp);
+	index = kvm_page_table_hashfn(gfn);
+	bucket = &kvm->arch.mmu_page_hash[index];
+	hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) {
+		if (sp->gfn == gfn && !sp->role.metaphysical
+		    && !sp->role.invalid) {
+			pgprintk("%s: zap %lx %x\n",
+				 __func__, gfn, sp->role.word);
+			kvm_mmu_zap_page(kvm, sp);
+		}
 	}
 }
 
-- 
cgit v1.2.3


From 5d9b8e30f543a9f21a968a4cda71e8f6d1c66a61 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 4 Jan 2009 18:04:18 +0200
Subject: KVM: Add CONFIG_HAVE_KVM_IRQCHIP

Two KVM archs support irqchips and two don't.  Add a Kconfig item to
make selecting between the two models easier.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/Kconfig    | 4 ++++
 arch/powerpc/kvm/Kconfig | 3 +++
 arch/s390/kvm/Kconfig    | 3 +++
 arch/x86/kvm/Kconfig     | 4 ++++
 4 files changed, 14 insertions(+)

(limited to 'arch')

diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index f833a0b4188..0a2d6b86075 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -4,6 +4,10 @@
 config HAVE_KVM
 	bool
 
+config HAVE_KVM_IRQCHIP
+       bool
+       default y
+
 menuconfig VIRTUALIZATION
 	bool "Virtualization"
 	depends on HAVE_KVM || IA64
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 14657055014..5a152a52796 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -2,6 +2,9 @@
 # KVM configuration
 #
 
+config HAVE_KVM_IRQCHIP
+       bool
+
 menuconfig VIRTUALIZATION
 	bool "Virtualization"
 	---help---
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index e051cad1f1e..3e260b7e37b 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -4,6 +4,9 @@
 config HAVE_KVM
        bool
 
+config HAVE_KVM_IRQCHIP
+       bool
+
 menuconfig VIRTUALIZATION
 	bool "Virtualization"
 	default y
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index b81125f0bde..0a303c3ed11 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -4,6 +4,10 @@
 config HAVE_KVM
        bool
 
+config HAVE_KVM_IRQCHIP
+       bool
+       default y
+
 menuconfig VIRTUALIZATION
 	bool "Virtualization"
 	depends on HAVE_KVM || X86
-- 
cgit v1.2.3


From 4780c65904f0fc4e312ee2da9383eacbe04e61ea Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 4 Jan 2009 18:06:06 +0200
Subject: KVM: Reset PIT irq injection logic when the PIT IRQ is unmasked

While the PIT is masked the guest cannot ack the irq, so the reinject logic
will never allow the interrupt to be injected.

Fix by resetting the reinjection counters on unmask.

Unbreaks Xen.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/i8254.c | 15 +++++++++++++++
 arch/x86/kvm/i8254.h |  1 +
 2 files changed, 16 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 69d1bbff3fd..c13bb92d315 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -539,6 +539,16 @@ void kvm_pit_reset(struct kvm_pit *pit)
 	pit->pit_state.irq_ack = 1;
 }
 
+static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
+{
+	struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
+
+	if (!mask) {
+		atomic_set(&pit->pit_state.pit_timer.pending, 0);
+		pit->pit_state.irq_ack = 1;
+	}
+}
+
 struct kvm_pit *kvm_create_pit(struct kvm *kvm)
 {
 	struct kvm_pit *pit;
@@ -586,6 +596,9 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
 
 	kvm_pit_reset(pit);
 
+	pit->mask_notifier.func = pit_mask_notifer;
+	kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
+
 	return pit;
 }
 
@@ -594,6 +607,8 @@ void kvm_free_pit(struct kvm *kvm)
 	struct hrtimer *timer;
 
 	if (kvm->arch.vpit) {
+		kvm_unregister_irq_mask_notifier(kvm, 0,
+					       &kvm->arch.vpit->mask_notifier);
 		mutex_lock(&kvm->arch.vpit->pit_state.lock);
 		timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
 		hrtimer_cancel(timer);
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 76959c4b500..6acbe4b505d 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -46,6 +46,7 @@ struct kvm_pit {
 	struct kvm *kvm;
 	struct kvm_kpit_state pit_state;
 	int irq_source_id;
+	struct kvm_irq_mask_notifier mask_notifier;
 };
 
 #define KVM_PIT_BASE_ADDRESS	    0x40
-- 
cgit v1.2.3


From a26b73ad5e763db1b01cca6965ac3b65fe36e82a Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Thu, 8 Jan 2009 13:58:48 +0100
Subject: KVM: ia64: expose registers in struct kvm_regs

Provide register layout for struct kvm_regs exposed to userland.

Signed-off-by: Jes Sorensen <jes@sgi.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/include/asm/kvm.h | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h
index b5145784233..0ee5bd7a988 100644
--- a/arch/ia64/include/asm/kvm.h
+++ b/arch/ia64/include/asm/kvm.h
@@ -166,7 +166,40 @@ struct saved_vpd {
 	unsigned long  vcpuid[5];
 	unsigned long  vpsr;
 	unsigned long  vpr;
-	unsigned long  vcr[128];
+	union {
+		unsigned long  vcr[128];
+		struct {
+			unsigned long dcr;
+			unsigned long itm;
+			unsigned long iva;
+			unsigned long rsv1[5];
+			unsigned long pta;
+			unsigned long rsv2[7];
+			unsigned long ipsr;
+			unsigned long isr;
+			unsigned long rsv3;
+			unsigned long iip;
+			unsigned long ifa;
+			unsigned long itir;
+			unsigned long iipa;
+			unsigned long ifs;
+			unsigned long iim;
+			unsigned long iha;
+			unsigned long rsv4[38];
+			unsigned long lid;
+			unsigned long ivr;
+			unsigned long tpr;
+			unsigned long eoi;
+			unsigned long irr[4];
+			unsigned long itv;
+			unsigned long pmv;
+			unsigned long cmcv;
+			unsigned long rsv5[5];
+			unsigned long lrr0;
+			unsigned long lrr1;
+			unsigned long rsv6[46];
+		};
+	};
 };
 
 struct kvm_regs {
-- 
cgit v1.2.3


From ff81ff10b4417952919dcc0bd67effa9ceb411c0 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 8 Jan 2009 11:05:17 -0800
Subject: KVM: SVM: Fix typo in has_svm()

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/svm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 3a60c3f04e6..db5021b2b5a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -250,7 +250,7 @@ static int has_svm(void)
 	const char *msg;
 
 	if (!cpu_has_svm(&msg)) {
-		printk(KERN_INFO "has_svn: %s\n", msg);
+		printk(KERN_INFO "has_svm: %s\n", msg);
 		return 0;
 	}
 
-- 
cgit v1.2.3


From 9903a927a4aea4b1ea42356a8453fca664df0b18 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Thu, 8 Jan 2009 17:44:19 -0200
Subject: KVM: MMU: drop zeroing on mmu_memory_cache_alloc

Zeroing on mmu_memory_cache_alloc is unnecessary since:

- Smaller areas are pre-allocated with kmem_cache_zalloc.
- Page pointed by ->spt is overwritten with prefetch_page
  and entries in page pointed by ->gfns are initialized
  before reading.

[avi: zeroing pages is unnecessary]

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index aac0499947d..de9a9fbc16e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -352,7 +352,6 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
 
 	BUG_ON(!mc->nobjs);
 	p = mc->objects[--mc->nobjs];
-	memset(p, 0, size);
 	return p;
 }
 
-- 
cgit v1.2.3


From f6e2c02b6d28ddabe99377c5640a833407a62632 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 11 Jan 2009 13:02:10 +0200
Subject: KVM: MMU: Rename "metaphysical" attribute to "direct"

This actually describes what is going on, rather than alerting the reader
that something strange is going on.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  5 +++--
 arch/x86/kvm/mmu.c              | 32 ++++++++++++++++----------------
 arch/x86/kvm/paging_tmpl.h      | 12 ++++++------
 3 files changed, 25 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 863ea73431a..55fd4c5fd38 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -170,7 +170,8 @@ struct kvm_pte_chain {
  *   bits 0:3 - total guest paging levels (2-4, or zero for real mode)
  *   bits 4:7 - page table level for this shadow (1-4)
  *   bits 8:9 - page table quadrant for 2-level guests
- *   bit   16 - "metaphysical" - gfn is not a real page (huge page/real mode)
+ *   bit   16 - direct mapping of virtual to physical mapping at gfn
+ *              used for real mode and two-dimensional paging
  *   bits 17:19 - common access permissions for all ptes in this shadow page
  */
 union kvm_mmu_page_role {
@@ -180,7 +181,7 @@ union kvm_mmu_page_role {
 		unsigned level:4;
 		unsigned quadrant:2;
 		unsigned pad_for_nice_hex_output:6;
-		unsigned metaphysical:1;
+		unsigned direct:1;
 		unsigned access:3;
 		unsigned invalid:1;
 		unsigned cr4_pge:1;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index de9a9fbc16e..ef060ec444a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1066,7 +1066,7 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
 	index = kvm_page_table_hashfn(gfn);
 	bucket = &kvm->arch.mmu_page_hash[index];
 	hlist_for_each_entry(sp, node, bucket, hash_link)
-		if (sp->gfn == gfn && !sp->role.metaphysical
+		if (sp->gfn == gfn && !sp->role.direct
 		    && !sp->role.invalid) {
 			pgprintk("%s: found role %x\n",
 				 __func__, sp->role.word);
@@ -1200,7 +1200,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 					     gfn_t gfn,
 					     gva_t gaddr,
 					     unsigned level,
-					     int metaphysical,
+					     int direct,
 					     unsigned access,
 					     u64 *parent_pte)
 {
@@ -1213,7 +1213,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 
 	role = vcpu->arch.mmu.base_role;
 	role.level = level;
-	role.metaphysical = metaphysical;
+	role.direct = direct;
 	role.access = access;
 	if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {
 		quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
@@ -1250,7 +1250,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	sp->role = role;
 	sp->global = role.cr4_pge;
 	hlist_add_head(&sp->hash_link, bucket);
-	if (!metaphysical) {
+	if (!direct) {
 		if (rmap_write_protect(vcpu->kvm, gfn))
 			kvm_flush_remote_tlbs(vcpu->kvm);
 		account_shadowed(vcpu->kvm, gfn);
@@ -1395,7 +1395,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 	kvm_mmu_page_unlink_children(kvm, sp);
 	kvm_mmu_unlink_parents(kvm, sp);
 	kvm_flush_remote_tlbs(kvm);
-	if (!sp->role.invalid && !sp->role.metaphysical)
+	if (!sp->role.invalid && !sp->role.direct)
 		unaccount_shadowed(kvm, sp->gfn);
 	if (sp->unsync)
 		kvm_unlink_unsync_page(kvm, sp);
@@ -1458,7 +1458,7 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 	index = kvm_page_table_hashfn(gfn);
 	bucket = &kvm->arch.mmu_page_hash[index];
 	hlist_for_each_entry_safe(sp, node, n, bucket, hash_link)
-		if (sp->gfn == gfn && !sp->role.metaphysical) {
+		if (sp->gfn == gfn && !sp->role.direct) {
 			pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
 				 sp->role.word);
 			r = 1;
@@ -1478,7 +1478,7 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
 	index = kvm_page_table_hashfn(gfn);
 	bucket = &kvm->arch.mmu_page_hash[index];
 	hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) {
-		if (sp->gfn == gfn && !sp->role.metaphysical
+		if (sp->gfn == gfn && !sp->role.direct
 		    && !sp->role.invalid) {
 			pgprintk("%s: zap %lx %x\n",
 				 __func__, gfn, sp->role.word);
@@ -1638,7 +1638,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 	bucket = &vcpu->kvm->arch.mmu_page_hash[index];
 	/* don't unsync if pagetable is shadowed with multiple roles */
 	hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
-		if (s->gfn != sp->gfn || s->role.metaphysical)
+		if (s->gfn != sp->gfn || s->role.direct)
 			continue;
 		if (s->role.word != sp->role.word)
 			return 1;
@@ -1951,7 +1951,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
 	int i;
 	gfn_t root_gfn;
 	struct kvm_mmu_page *sp;
-	int metaphysical = 0;
+	int direct = 0;
 
 	root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
 
@@ -1960,18 +1960,18 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
 
 		ASSERT(!VALID_PAGE(root));
 		if (tdp_enabled)
-			metaphysical = 1;
+			direct = 1;
 		sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
-				      PT64_ROOT_LEVEL, metaphysical,
+				      PT64_ROOT_LEVEL, direct,
 				      ACC_ALL, NULL);
 		root = __pa(sp->spt);
 		++sp->root_count;
 		vcpu->arch.mmu.root_hpa = root;
 		return;
 	}
-	metaphysical = !is_paging(vcpu);
+	direct = !is_paging(vcpu);
 	if (tdp_enabled)
-		metaphysical = 1;
+		direct = 1;
 	for (i = 0; i < 4; ++i) {
 		hpa_t root = vcpu->arch.mmu.pae_root[i];
 
@@ -1985,7 +1985,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
 		} else if (vcpu->arch.mmu.root_level == 0)
 			root_gfn = 0;
 		sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
-				      PT32_ROOT_LEVEL, metaphysical,
+				      PT32_ROOT_LEVEL, direct,
 				      ACC_ALL, NULL);
 		root = __pa(sp->spt);
 		++sp->root_count;
@@ -2487,7 +2487,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	index = kvm_page_table_hashfn(gfn);
 	bucket = &vcpu->kvm->arch.mmu_page_hash[index];
 	hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) {
-		if (sp->gfn != gfn || sp->role.metaphysical || sp->role.invalid)
+		if (sp->gfn != gfn || sp->role.direct || sp->role.invalid)
 			continue;
 		pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8;
 		misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
@@ -3125,7 +3125,7 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
 	gfn_t gfn;
 
 	list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) {
-		if (sp->role.metaphysical)
+		if (sp->role.direct)
 			continue;
 
 		gfn = unalias_gfn(vcpu->kvm, sp->gfn);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 46b68f941f6..7314c0944c5 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -277,7 +277,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 	unsigned access = gw->pt_access;
 	struct kvm_mmu_page *shadow_page;
 	u64 spte, *sptep;
-	int metaphysical;
+	int direct;
 	gfn_t table_gfn;
 	int r;
 	int level;
@@ -313,17 +313,17 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 
 		if (level == PT_DIRECTORY_LEVEL
 		    && gw->level == PT_DIRECTORY_LEVEL) {
-			metaphysical = 1;
+			direct = 1;
 			if (!is_dirty_pte(gw->ptes[level - 1]))
 				access &= ~ACC_WRITE_MASK;
 			table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
 		} else {
-			metaphysical = 0;
+			direct = 0;
 			table_gfn = gw->table_gfn[level - 2];
 		}
 		shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
-					       metaphysical, access, sptep);
-		if (!metaphysical) {
+					       direct, access, sptep);
+		if (!direct) {
 			r = kvm_read_guest_atomic(vcpu->kvm,
 						  gw->pte_gpa[level - 2],
 						  &curr_pte, sizeof(curr_pte));
@@ -512,7 +512,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
 	pt_element_t pt[256 / sizeof(pt_element_t)];
 	gpa_t pte_gpa;
 
-	if (sp->role.metaphysical
+	if (sp->role.direct
 	    || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
 		nonpaging_prefetch_page(vcpu, sp);
 		return;
-- 
cgit v1.2.3


From 5a41accd3f69c6ef1d58f0c148980bae70515937 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 11 Jan 2009 17:19:35 +0200
Subject: KVM: MMU: Only enable cr4_pge role in shadow mode

Two dimensional paging is only confused by it.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6fbc3460337..19ccde621cd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -364,7 +364,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 	}
 	kvm_x86_ops->set_cr4(vcpu, cr4);
 	vcpu->arch.cr4 = cr4;
-	vcpu->arch.mmu.base_role.cr4_pge = !!(cr4 & X86_CR4_PGE);
+	vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled;
 	kvm_mmu_sync_global(vcpu);
 	kvm_mmu_reset_context(vcpu);
 }
-- 
cgit v1.2.3


From 87c656b4147cd08c6efba95d8d70c12cba181255 Mon Sep 17 00:00:00 2001
From: Liu Yu <yu.liu@freescale.com>
Date: Wed, 14 Jan 2009 10:47:36 -0600
Subject: powerpc/fsl-booke: declare tlbcam_index for use in c

So, KVM needs to read tlbcam_index to know exactly
which TLB1 entry is unused by host.

Signed-off-by: Liu Yu <yu.liu@freescale.com>
Acked-by: Kumar Gala <galak@kernel.crashing.org>
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/mmu-fsl-booke.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/mmu-fsl-booke.h b/arch/powerpc/include/asm/mmu-fsl-booke.h
index 3f941c0f7e8..4285b64a65e 100644
--- a/arch/powerpc/include/asm/mmu-fsl-booke.h
+++ b/arch/powerpc/include/asm/mmu-fsl-booke.h
@@ -75,6 +75,8 @@
 
 #ifndef __ASSEMBLY__
 
+extern unsigned int tlbcam_index;
+
 typedef struct {
 	unsigned int	id;
 	unsigned int	active;
-- 
cgit v1.2.3


From fb2838d446f6ee7e13e4149e08ec138753c5c450 Mon Sep 17 00:00:00 2001
From: Liu Yu <yu.liu@freescale.com>
Date: Wed, 14 Jan 2009 10:47:37 -0600
Subject: KVM: ppc: Fix e500 warnings and some spelling problems

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kvm/e500_emulate.c | 2 --
 arch/powerpc/kvm/e500_tlb.c     | 6 +++---
 arch/powerpc/kvm/e500_tlb.h     | 2 +-
 3 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index d3c0c7c7f20..7a98d4a4c1e 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -30,8 +30,6 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	int emulated = EMULATE_DONE;
 	int ra;
 	int rb;
-	int rs;
-	int rt;
 
 	switch (get_op(inst)) {
 	case 31:
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 6a50340f575..e3daf57b5f5 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -260,7 +260,7 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
 	unsigned int victim, pidsel, tsized;
 	int tlbsel;
 
-	/* since we only have tow TLBs, only lower bit is used. */
+	/* since we only have two TLBs, only lower bit is used. */
 	tlbsel = (vcpu_e500->mas4 >> 28) & 0x1;
 	victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
 	pidsel = (vcpu_e500->mas4 >> 16) & 0xf;
@@ -402,7 +402,7 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
 
 	ia = (ea >> 2) & 0x1;
 
-	/* since we only have tow TLBs, only lower bit is used. */
+	/* since we only have two TLBs, only lower bit is used. */
 	tlbsel = (ea >> 3) & 0x1;
 
 	if (ia) {
@@ -471,7 +471,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
 	} else {
 		int victim;
 
-		/* since we only have tow TLBs, only lower bit is used. */
+		/* since we only have two TLBs, only lower bit is used. */
 		tlbsel = vcpu_e500->mas4 >> 28 & 0x1;
 		victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
 
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
index d8833f95516..ab49e935518 100644
--- a/arch/powerpc/kvm/e500_tlb.h
+++ b/arch/powerpc/kvm/e500_tlb.h
@@ -126,7 +126,7 @@ static inline unsigned int get_tlb_tlbsel(
 {
 	/*
 	 * Manual says that tlbsel has 2 bits wide.
-	 * Since we only have tow TLBs, only lower bit is used.
+	 * Since we only have two TLBs, only lower bit is used.
 	 */
 	return (vcpu_e500->mas0 >> 28) & 0x1;
 }
-- 
cgit v1.2.3


From 9aa4dd5e5f47a5feb96fc394fccf0265ab7d85a4 Mon Sep 17 00:00:00 2001
From: Liu Yu <yu.liu@freescale.com>
Date: Wed, 14 Jan 2009 10:47:38 -0600
Subject: KVM: ppc: Move to new TLB invalidate interface

Commit 2a4aca1144394653269720ffbb5a325a77abd5fa removed old method _tlbia().

Signed-off-by: Liu Yu <yu.liu@freescale.com>
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kvm/e500_tlb.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index e3daf57b5f5..d437160d388 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -20,6 +20,7 @@
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_e500.h>
 
+#include "../mm/mmu_decl.h"
 #include "e500_tlb.h"
 
 #define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
@@ -158,7 +159,7 @@ void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu)
 
 void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu)
 {
-	_tlbia();
+	_tlbil_all();
 }
 
 /* Search the guest TLB for a matching entry. */
@@ -362,11 +363,10 @@ void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
 		int i;
 
 		/* XXX Replace loop with fancy data structures. */
-		/* needn't set modified since tlbia will make TLB1 coherent */
 		for (i = 0; i < tlb1_max_shadow_size(); i++)
 			kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i);
 
-		_tlbia();
+		_tlbil_all();
 	}
 }
 
@@ -417,7 +417,7 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
 			kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
 	}
 
-	_tlbia();
+	_tlbil_all();
 
 	return EMULATE_DONE;
 }
@@ -604,7 +604,7 @@ void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
 			kvmppc_e500_shadow_release(vcpu_e500, tlbsel, i);
 
 	/* discard all guest mapping */
-	_tlbia();
+	_tlbil_all();
 }
 
 void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
-- 
cgit v1.2.3


From 193554750441d91e127dd5066b8aebe0f769101c Mon Sep 17 00:00:00 2001
From: Amit Shah <amit.shah@redhat.com>
Date: Wed, 14 Jan 2009 16:56:00 +0000
Subject: KVM: x86: Fix typos and whitespace errors

Some typos, comments, whitespace errors corrected in the cpuid code

Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 19ccde621cd..141a0166e51 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1067,7 +1067,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
 		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
 			goto out;
 		r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
-			cpuid_arg->entries);
+						      cpuid_arg->entries);
 		if (r)
 			goto out;
 
@@ -1165,8 +1165,8 @@ out:
 }
 
 static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
-				    struct kvm_cpuid2 *cpuid,
-				    struct kvm_cpuid_entry2 __user *entries)
+				     struct kvm_cpuid2 *cpuid,
+				     struct kvm_cpuid_entry2 __user *entries)
 {
 	int r;
 
@@ -1185,8 +1185,8 @@ out:
 }
 
 static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
-				    struct kvm_cpuid2 *cpuid,
-				    struct kvm_cpuid_entry2 __user *entries)
+				     struct kvm_cpuid2 *cpuid,
+				     struct kvm_cpuid_entry2 __user *entries)
 {
 	int r;
 
@@ -1195,7 +1195,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
 		goto out;
 	r = -EFAULT;
 	if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
-			   vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
+			 vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
 		goto out;
 	return 0;
 
@@ -1205,12 +1205,12 @@ out:
 }
 
 static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
-			  u32 index)
+			   u32 index)
 {
 	entry->function = function;
 	entry->index = index;
 	cpuid_count(entry->function, entry->index,
-		&entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
+		    &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
 	entry->flags = 0;
 }
 
@@ -1249,7 +1249,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) |
 		bit(X86_FEATURE_SVM);
 
-	/* all func 2 cpuid_count() should be called on the same cpu */
+	/* all calls to cpuid_count() should be made on the same cpu */
 	get_cpu();
 	do_cpuid_1_ent(entry, function, index);
 	++*nent;
@@ -1323,7 +1323,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 }
 
 static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
-				    struct kvm_cpuid_entry2 __user *entries)
+				     struct kvm_cpuid_entry2 __user *entries)
 {
 	struct kvm_cpuid_entry2 *cpuid_entries;
 	int limit, nent = 0, r = -E2BIG;
@@ -1340,7 +1340,7 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 	limit = cpuid_entries[0].eax;
 	for (func = 1; func <= limit && nent < cpuid->nent; ++func)
 		do_cpuid_ent(&cpuid_entries[nent], func, 0,
-				&nent, cpuid->nent);
+			     &nent, cpuid->nent);
 	r = -E2BIG;
 	if (nent >= cpuid->nent)
 		goto out_free;
@@ -1349,10 +1349,10 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 	limit = cpuid_entries[nent - 1].eax;
 	for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
 		do_cpuid_ent(&cpuid_entries[nent], func, 0,
-			       &nent, cpuid->nent);
+			     &nent, cpuid->nent);
 	r = -EFAULT;
 	if (copy_to_user(entries, cpuid_entries,
-			nent * sizeof(struct kvm_cpuid_entry2)))
+			 nent * sizeof(struct kvm_cpuid_entry2)))
 		goto out_free;
 	cpuid->nent = nent;
 	r = 0;
@@ -1496,7 +1496,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
 			goto out;
 		r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
-				cpuid_arg->entries);
+					      cpuid_arg->entries);
 		if (r)
 			goto out;
 		break;
@@ -1509,7 +1509,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
 			goto out;
 		r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
-				cpuid_arg->entries);
+					      cpuid_arg->entries);
 		if (r)
 			goto out;
 		r = -EFAULT;
@@ -2864,7 +2864,7 @@ static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
 	if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
 		return 0;
 	if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
-		!(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
+	    !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
 		return 0;
 	return 1;
 }
@@ -2892,7 +2892,6 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
 			if (!best || e->function > best->function)
 				best = e;
 	}
-
 	return best;
 }
 
-- 
cgit v1.2.3


From 399ec807ddc38ecccf8c06dbde04531cbdc63e11 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 19 Nov 2008 13:58:46 +0200
Subject: KVM: Userspace controlled irq routing

Currently KVM has a static routing from GSI numbers to interrupts (namely,
0-15 are mapped 1:1 to both PIC and IOAPIC, and 16:23 are mapped 1:1 to
the IOAPIC).  This is insufficient for several reasons:

- HPET requires non 1:1 mapping for the timer interrupt
- MSIs need a new method to assign interrupt numbers and dispatch them
- ACPI APIC mode needs to be able to reassign the PCI LINK interrupts to the
  ioapics

This patch implements an interrupt routing table (as a linked list, but this
can be easily changed) and a userspace interface to replace the table.  The
routing table is initialized according to the current hardwired mapping.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c | 5 +++++
 arch/x86/kvm/x86.c       | 6 ++++++
 2 files changed, 11 insertions(+)

(limited to 'arch')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 1477f91617a..dbf527a5734 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -919,6 +919,11 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = kvm_ioapic_init(kvm);
 		if (r)
 			goto out;
+		r = kvm_setup_default_irq_routing(kvm);
+		if (r) {
+			kfree(kvm->arch.vioapic);
+			goto out;
+		}
 		break;
 	case KVM_IRQ_LINE: {
 		struct kvm_irq_level irq_event;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 141a0166e51..32e3a7ec6ad 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1835,6 +1835,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			}
 		} else
 			goto out;
+		r = kvm_setup_default_irq_routing(kvm);
+		if (r) {
+			kfree(kvm->arch.vpic);
+			kfree(kvm->arch.vioapic);
+			goto out;
+		}
 		break;
 	case KVM_CREATE_PIT:
 		mutex_lock(&kvm->lock);
-- 
cgit v1.2.3


From 91b2ae773d3b168b763237fac33f75b13d891f20 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 19 Jan 2009 14:57:52 +0200
Subject: KVM: Avoid using CONFIG_ in userspace visible headers

Kconfig symbols are not available in userspace, and are not stripped by
headers-install.  Avoid their use by adding #defines in <asm/kvm.h> to
suit each architecture.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 54bcf228152..dc3f6cf1170 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -15,6 +15,7 @@
 #define __KVM_HAVE_DEVICE_ASSIGNMENT
 #define __KVM_HAVE_MSI
 #define __KVM_HAVE_USER_NMI
+#define __KVM_HAVE_GUEST_DEBUG
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
-- 
cgit v1.2.3


From 27d146449ccaad16480888129bb32a000ee33717 Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <xiantao.zhang@intel.com>
Date: Thu, 15 Jan 2009 17:58:19 +0800
Subject: KVM: ia64: vTLB change for enabling windows 2008 boot

Simply the logic of hash vTLB, and export kvm_gpa_to_mpa.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/vcpu.h |  4 ++--
 arch/ia64/kvm/vtlb.c | 39 +++++++++++++++++----------------------
 2 files changed, 19 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
index b2f12a562bd..042af92ced8 100644
--- a/arch/ia64/kvm/vcpu.h
+++ b/arch/ia64/kvm/vcpu.h
@@ -703,7 +703,7 @@ extern u64 guest_vhpt_lookup(u64 iha, u64 *pte);
 extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps);
 extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps);
 extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va);
-extern int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte,
+extern void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte,
 		u64 itir, u64 ifa, int type);
 extern void thash_purge_all(struct kvm_vcpu *v);
 extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v,
@@ -738,7 +738,7 @@ void kvm_init_vhpt(struct kvm_vcpu *v);
 void thash_init(struct thash_cb *hcb, u64 sz);
 
 void panic_vm(struct kvm_vcpu *v, const char *fmt, ...);
-
+u64 kvm_gpa_to_mpa(u64 gpa);
 extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
 		u64 arg4, u64 arg5, u64 arg6, u64 arg7);
 
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
index ac94867f826..38232b37668 100644
--- a/arch/ia64/kvm/vtlb.c
+++ b/arch/ia64/kvm/vtlb.c
@@ -164,11 +164,11 @@ static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte)
 	unsigned long ps, gpaddr;
 
 	ps = itir_ps(itir);
+	rr.val = ia64_get_rr(ifa);
 
-	gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) |
-		(ifa & ((1UL << ps) - 1));
+	 gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) |
+					(ifa & ((1UL << ps) - 1));
 
-	rr.val = ia64_get_rr(ifa);
 	head = (struct thash_data *)ia64_thash(ifa);
 	head->etag = INVALID_TI_TAG;
 	ia64_mf();
@@ -412,16 +412,14 @@ u64 translate_phy_pte(u64 *pte, u64 itir, u64 va)
 
 /*
  * Purge overlap TCs and then insert the new entry to emulate itc ops.
- *    Notes: Only TC entry can purge and insert.
- *    1 indicates this is MMIO
+ * Notes: Only TC entry can purge and insert.
  */
-int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
+void  thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
 						u64 ifa, int type)
 {
 	u64 ps;
 	u64 phy_pte, io_mask, index;
 	union ia64_rr vrr, mrr;
-	int ret = 0;
 
 	ps = itir_ps(itir);
 	vrr.val = vcpu_get_rr(v, ifa);
@@ -441,25 +439,19 @@ int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
 		phy_pte &= ~_PAGE_MA_MASK;
 	}
 
-	if (pte & VTLB_PTE_IO)
-		ret = 1;
-
 	vtlb_purge(v, ifa, ps);
 	vhpt_purge(v, ifa, ps);
 
-	if (ps == mrr.ps) {
-		if (!(pte&VTLB_PTE_IO)) {
-			vhpt_insert(phy_pte, itir, ifa, pte);
-		} else {
-			vtlb_insert(v, pte, itir, ifa);
-			vcpu_quick_region_set(VMX(v, tc_regions), ifa);
-		}
-	} else if (ps > mrr.ps) {
+	if ((ps != mrr.ps) || (pte & VTLB_PTE_IO)) {
 		vtlb_insert(v, pte, itir, ifa);
 		vcpu_quick_region_set(VMX(v, tc_regions), ifa);
-		if (!(pte&VTLB_PTE_IO))
-			vhpt_insert(phy_pte, itir, ifa, pte);
-	} else {
+	}
+	if (pte & VTLB_PTE_IO)
+		return;
+
+	if (ps >= mrr.ps)
+		vhpt_insert(phy_pte, itir, ifa, pte);
+	else {
 		u64 psr;
 		phy_pte  &= ~PAGE_FLAGS_RV_MASK;
 		psr = ia64_clear_ic();
@@ -469,7 +461,6 @@ int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
 	if (!(pte&VTLB_PTE_IO))
 		mark_pages_dirty(v, pte, ps);
 
-	return ret;
 }
 
 /*
@@ -570,6 +561,10 @@ void thash_init(struct thash_cb *hcb, u64 sz)
 u64 kvm_get_mpt_entry(u64 gpfn)
 {
 	u64 *base = (u64 *) KVM_P2M_BASE;
+
+	if (gpfn >= (KVM_P2M_SIZE >> 3))
+		panic_vm(current_vcpu, "Invalid gpfn =%lx\n", gpfn);
+
 	return *(base + gpfn);
 }
 
-- 
cgit v1.2.3


From 4b7bb626e3133eab131328a0770864b322c1bfe6 Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <xiantao.zhang@intel.com>
Date: Thu, 15 Jan 2009 18:08:36 +0800
Subject: KVM: ia64: Add the support for translating PAL Call's pointer args

Add the support to translate PAL Call's pointer args.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/process.c | 48 +++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 45 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
index f9c9504144f..0e727ce0d55 100644
--- a/arch/ia64/kvm/process.c
+++ b/arch/ia64/kvm/process.c
@@ -550,18 +550,60 @@ void reflect_interruption(u64 ifa, u64 isr, u64 iim,
 	inject_guest_interruption(vcpu, vector);
 }
 
+static unsigned long kvm_trans_pal_call_args(struct kvm_vcpu *vcpu,
+						unsigned long arg)
+{
+	struct thash_data *data;
+	unsigned long gpa, poff;
+
+	if (!is_physical_mode(vcpu)) {
+		/* Depends on caller to provide the DTR or DTC mapping.*/
+		data = vtlb_lookup(vcpu, arg, D_TLB);
+		if (data)
+			gpa = data->page_flags & _PAGE_PPN_MASK;
+		else {
+			data = vhpt_lookup(arg);
+			if (!data)
+				return 0;
+			gpa = data->gpaddr & _PAGE_PPN_MASK;
+		}
+
+		poff = arg & (PSIZE(data->ps) - 1);
+		arg = PAGEALIGN(gpa, data->ps) | poff;
+	}
+	arg = kvm_gpa_to_mpa(arg << 1 >> 1);
+
+	return (unsigned long)__va(arg);
+}
+
 static void set_pal_call_data(struct kvm_vcpu *vcpu)
 {
 	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+	unsigned long gr28 = vcpu_get_gr(vcpu, 28);
+	unsigned long gr29 = vcpu_get_gr(vcpu, 29);
+	unsigned long gr30 = vcpu_get_gr(vcpu, 30);
 
 	/*FIXME:For static and stacked convention, firmware
 	 * has put the parameters in gr28-gr31 before
 	 * break to vmm  !!*/
 
-	p->u.pal_data.gr28 = vcpu_get_gr(vcpu, 28);
-	p->u.pal_data.gr29 = vcpu_get_gr(vcpu, 29);
-	p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
+	switch (gr28) {
+	case PAL_PERF_MON_INFO:
+	case PAL_HALT_INFO:
+		p->u.pal_data.gr29 =  kvm_trans_pal_call_args(vcpu, gr29);
+		p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
+		break;
+	case PAL_BRAND_INFO:
+		p->u.pal_data.gr29 = gr29;;
+		p->u.pal_data.gr30 = kvm_trans_pal_call_args(vcpu, gr30);
+		break;
+	default:
+		p->u.pal_data.gr29 = gr29;;
+		p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
+	}
+	p->u.pal_data.gr28 = gr28;
 	p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31);
+
 	p->exit_reason = EXIT_REASON_PAL_CALL;
 }
 
-- 
cgit v1.2.3


From 7d656bd996b68737d5d07643bc57311059038d67 Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <xiantao.zhang@intel.com>
Date: Wed, 21 Jan 2009 11:21:27 +0800
Subject: KVM: ia64: Implement some pal calls needed for windows 2008

For windows 2008, it needs more pal calls to implement for booting.
In addition, also changes the name of set_{sal, pal}_call_result to
get_{sal,pal}_call_result for readability.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/kvm_fw.c  | 151 +++++++++++++++++++++++++++++++++++++++++++++++-
 arch/ia64/kvm/process.c |   8 +--
 2 files changed, 152 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c
index cb7600bdff9..a8ae52ed563 100644
--- a/arch/ia64/kvm/kvm_fw.c
+++ b/arch/ia64/kvm/kvm_fw.c
@@ -227,6 +227,18 @@ static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu)
 	return result;
 }
 
+static struct ia64_pal_retval pal_register_info(struct kvm_vcpu *vcpu)
+{
+
+	struct ia64_pal_retval result = {0, 0, 0, 0};
+	long in0, in1, in2, in3;
+
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+	result.status = ia64_pal_register_info(in1, &result.v1, &result.v2);
+
+	return result;
+}
+
 static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu)
 {
 
@@ -268,8 +280,12 @@ static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu)
 static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu)
 {
 	struct ia64_pal_retval result;
+	unsigned long in0, in1, in2, in3;
 
-	INIT_PAL_STATUS_UNIMPLEMENTED(result);
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+
+	result.status = ia64_pal_vm_info(in1, in2,
+			(pal_tc_info_u_t *)&result.v1, &result.v2);
 
 	return result;
 }
@@ -292,6 +308,108 @@ static void prepare_for_halt(struct kvm_vcpu *vcpu)
 	vcpu->arch.timer_fired = 0;
 }
 
+static struct ia64_pal_retval pal_perf_mon_info(struct kvm_vcpu *vcpu)
+{
+	long status;
+	unsigned long in0, in1, in2, in3, r9;
+	unsigned long pm_buffer[16];
+
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+	status = ia64_pal_perf_mon_info(pm_buffer,
+				(pal_perf_mon_info_u_t *) &r9);
+	if (status != 0) {
+		printk(KERN_DEBUG"PAL_PERF_MON_INFO fails ret=%ld\n", status);
+	} else {
+		if (in1)
+			memcpy((void *)in1, pm_buffer, sizeof(pm_buffer));
+		else {
+			status = PAL_STATUS_EINVAL;
+			printk(KERN_WARNING"Invalid parameters "
+						"for PAL call:0x%lx!\n", in0);
+		}
+	}
+	return (struct ia64_pal_retval){status, r9, 0, 0};
+}
+
+static struct ia64_pal_retval pal_halt_info(struct kvm_vcpu *vcpu)
+{
+	unsigned long in0, in1, in2, in3;
+	long status;
+	unsigned long res = 1000UL | (1000UL << 16) | (10UL << 32)
+					| (1UL << 61) | (1UL << 60);
+
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+	if (in1) {
+		memcpy((void *)in1, &res, sizeof(res));
+		status = 0;
+	} else{
+		status = PAL_STATUS_EINVAL;
+		printk(KERN_WARNING"Invalid parameters "
+					"for PAL call:0x%lx!\n", in0);
+	}
+
+	return (struct ia64_pal_retval){status, 0, 0, 0};
+}
+
+static struct ia64_pal_retval pal_mem_attrib(struct kvm_vcpu *vcpu)
+{
+	unsigned long r9;
+	long status;
+
+	status = ia64_pal_mem_attrib(&r9);
+
+	return (struct ia64_pal_retval){status, r9, 0, 0};
+}
+
+static void remote_pal_prefetch_visibility(void *v)
+{
+	s64 trans_type = (s64)v;
+	ia64_pal_prefetch_visibility(trans_type);
+}
+
+static struct ia64_pal_retval pal_prefetch_visibility(struct kvm_vcpu *vcpu)
+{
+	struct ia64_pal_retval result = {0, 0, 0, 0};
+	unsigned long in0, in1, in2, in3;
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+	result.status = ia64_pal_prefetch_visibility(in1);
+	if (result.status == 0) {
+		/* Must be performed on all remote processors
+		in the coherence domain. */
+		smp_call_function(remote_pal_prefetch_visibility,
+					(void *)in1, 1);
+		/* Unnecessary on remote processor for other vcpus!*/
+		result.status = 1;
+	}
+	return result;
+}
+
+static void remote_pal_mc_drain(void *v)
+{
+	ia64_pal_mc_drain();
+}
+
+static struct ia64_pal_retval pal_get_brand_info(struct kvm_vcpu *vcpu)
+{
+	struct ia64_pal_retval result = {0, 0, 0, 0};
+	unsigned long in0, in1, in2, in3;
+
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+
+	if (in1 == 0 && in2) {
+		char brand_info[128];
+		result.status = ia64_pal_get_brand_info(brand_info);
+		if (result.status == PAL_STATUS_SUCCESS)
+			memcpy((void *)in2, brand_info, 128);
+	} else {
+		result.status = PAL_STATUS_REQUIRES_MEMORY;
+		printk(KERN_WARNING"Invalid parameters for "
+					"PAL call:0x%lx!\n", in0);
+	}
+
+	return result;
+}
+
 int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 
@@ -300,14 +418,22 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	int ret = 1;
 
 	gr28 = kvm_get_pal_call_index(vcpu);
-	/*printk("pal_call index:%lx\n",gr28);*/
 	switch (gr28) {
 	case PAL_CACHE_FLUSH:
 		result = pal_cache_flush(vcpu);
 		break;
+	case PAL_MEM_ATTRIB:
+		result = pal_mem_attrib(vcpu);
+		break;
 	case PAL_CACHE_SUMMARY:
 		result = pal_cache_summary(vcpu);
 		break;
+	case PAL_PERF_MON_INFO:
+		result = pal_perf_mon_info(vcpu);
+		break;
+	case PAL_HALT_INFO:
+		result = pal_halt_info(vcpu);
+		break;
 	case PAL_HALT_LIGHT:
 	{
 		INIT_PAL_STATUS_SUCCESS(result);
@@ -317,6 +443,16 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	}
 		break;
 
+	case PAL_PREFETCH_VISIBILITY:
+		result = pal_prefetch_visibility(vcpu);
+		break;
+	case PAL_MC_DRAIN:
+		result.status = ia64_pal_mc_drain();
+		/* FIXME: All vcpus likely call PAL_MC_DRAIN.
+		   That causes the congestion. */
+		smp_call_function(remote_pal_mc_drain, NULL, 1);
+		break;
+
 	case PAL_FREQ_RATIOS:
 		result = pal_freq_ratios(vcpu);
 		break;
@@ -346,6 +482,9 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		INIT_PAL_STATUS_SUCCESS(result);
 		result.v1 = (1L << 32) | 1L;
 		break;
+	case PAL_REGISTER_INFO:
+		result = pal_register_info(vcpu);
+		break;
 	case PAL_VM_PAGE_SIZE:
 		result.status = ia64_pal_vm_page_size(&result.v0,
 							&result.v1);
@@ -365,12 +504,18 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		result.status = ia64_pal_version(
 				(pal_version_u_t *)&result.v0,
 				(pal_version_u_t *)&result.v1);
-
 		break;
 	case PAL_FIXED_ADDR:
 		result.status = PAL_STATUS_SUCCESS;
 		result.v0 = vcpu->vcpu_id;
 		break;
+	case PAL_BRAND_INFO:
+		result = pal_get_brand_info(vcpu);
+		break;
+	case PAL_GET_PSTATE:
+	case PAL_CACHE_SHARED_INFO:
+		INIT_PAL_STATUS_UNIMPLEMENTED(result);
+		break;
 	default:
 		INIT_PAL_STATUS_UNIMPLEMENTED(result);
 		printk(KERN_WARNING"kvm: Unsupported pal call,"
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
index 0e727ce0d55..b1dc80952d9 100644
--- a/arch/ia64/kvm/process.c
+++ b/arch/ia64/kvm/process.c
@@ -607,7 +607,7 @@ static void set_pal_call_data(struct kvm_vcpu *vcpu)
 	p->exit_reason = EXIT_REASON_PAL_CALL;
 }
 
-static void set_pal_call_result(struct kvm_vcpu *vcpu)
+static void get_pal_call_result(struct kvm_vcpu *vcpu)
 {
 	struct exit_ctl_data *p = &vcpu->arch.exit_data;
 
@@ -635,7 +635,7 @@ static void set_sal_call_data(struct kvm_vcpu *vcpu)
 	p->exit_reason = EXIT_REASON_SAL_CALL;
 }
 
-static void set_sal_call_result(struct kvm_vcpu *vcpu)
+static void get_sal_call_result(struct kvm_vcpu *vcpu)
 {
 	struct exit_ctl_data *p = &vcpu->arch.exit_data;
 
@@ -658,13 +658,13 @@ void  kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
 		if (iim == DOMN_PAL_REQUEST) {
 			set_pal_call_data(v);
 			vmm_transition(v);
-			set_pal_call_result(v);
+			get_pal_call_result(v);
 			vcpu_increment_iip(v);
 			return;
 		} else if (iim == DOMN_SAL_REQUEST) {
 			set_sal_call_data(v);
 			vmm_transition(v);
-			set_sal_call_result(v);
+			get_sal_call_result(v);
 			vcpu_increment_iip(v);
 			return;
 		}
-- 
cgit v1.2.3


From 2c411b48af3e3c534b9cfb6a79be1df384d1ca1a Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 22 Jan 2009 14:20:27 +0100
Subject: KVM: s390: Fix printk on SIGP set arch

KVM on s390 does not support the ESA/390 architecture. We refuse to
change the architecture mode and print a warning. This patch removes
the printk for several reasons:

o A malicious guest can flood host dmesg
o The old message had no newline
o there is no connection between the message and the failing guest

This patch simply removes the printk. We already set the condition
code to 3 - the guest knows that something went wrong.

Reported-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/s390/kvm/sigp.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 2a01b9e0280..f27dbedf086 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -153,8 +153,6 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
 
 	switch (parameter & 0xff) {
 	case 0:
-		printk(KERN_WARNING "kvm: request to switch to ESA/390 mode"
-							" not supported");
 		rc = 3; /* not operational */
 		break;
 	case 1:
-- 
cgit v1.2.3


From 70455a36a073cbb83ca17f92d135a6128c73cb3c Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 22 Jan 2009 10:28:29 +0100
Subject: KVM: s390: Fix problem state check for b2 intercepts

The kernel handles some priviledged instruction exits. While I was
unable to trigger such an exit from guest userspace, the code should
check for supervisor state before emulating a priviledged instruction.

I also renamed kvm_s390_handle_priv to kvm_s390_handle_b2. After all
there are non priviledged b2 instructions like stck (store clock).

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/s390/kvm/intercept.c |  2 +-
 arch/s390/kvm/kvm-s390.h  |  2 +-
 arch/s390/kvm/priv.c      | 18 +++++++++++++++---
 3 files changed, 17 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 61236102203..9d19803111b 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -103,7 +103,7 @@ static int handle_lctl(struct kvm_vcpu *vcpu)
 static intercept_handler_t instruction_handlers[256] = {
 	[0x83] = kvm_s390_handle_diag,
 	[0xae] = kvm_s390_handle_sigp,
-	[0xb2] = kvm_s390_handle_priv,
+	[0xb2] = kvm_s390_handle_b2,
 	[0xb7] = handle_lctl,
 	[0xeb] = handle_lctlg,
 };
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 3893cf12eac..00bbe69b78d 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -50,7 +50,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
 
 /* implemented in priv.c */
-int kvm_s390_handle_priv(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
 
 /* implemented in sigp.c */
 int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 3605df45dd4..4b88834b8dd 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -304,12 +304,24 @@ static intercept_handler_t priv_handlers[256] = {
 	[0xb1] = handle_stfl,
 };
 
-int kvm_s390_handle_priv(struct kvm_vcpu *vcpu)
+int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
 {
 	intercept_handler_t handler;
 
+	/*
+	 * a lot of B2 instructions are priviledged. We first check for
+	 * the priviledges ones, that we can handle in the kernel. If the
+	 * kernel can handle this instruction, we check for the problem
+	 * state bit and (a) handle the instruction or (b) send a code 2
+	 * program check.
+	 * Anything else goes to userspace.*/
 	handler = priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
-	if (handler)
-		return handler(vcpu);
+	if (handler) {
+		if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+			return kvm_s390_inject_program_int(vcpu,
+						   PGM_PRIVILEGED_OPERATION);
+		else
+			return handler(vcpu);
+	}
 	return -ENOTSUPP;
 }
-- 
cgit v1.2.3


From b7e6e4d3602c738b8f61225d9f4514945df52f07 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 22 Jan 2009 10:29:08 +0100
Subject: KVM: s390: Fix SIGP set prefix ioctl

This patch fixes the SET PREFIX interrupt if triggered by userspace.
Until now, it was not necessary, but life migration will need it. In
addition, it helped me creating SMP support for my kvm_crashme tool
(lets kvm execute random guest memory content).

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/s390/kvm/interrupt.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index f4fe28a2521..0189356fe20 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -555,9 +555,14 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 		VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
 			   s390int->parm);
 		break;
+	case KVM_S390_SIGP_SET_PREFIX:
+		inti->prefix.address = s390int->parm;
+		inti->type = s390int->type;
+		VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
+			   s390int->parm);
+		break;
 	case KVM_S390_SIGP_STOP:
 	case KVM_S390_RESTART:
-	case KVM_S390_SIGP_SET_PREFIX:
 	case KVM_S390_INT_EMERGENCY:
 		VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
 		inti->type = s390int->type;
-- 
cgit v1.2.3


From 934d534f8a5a39e20d5682b3a3a45ff351706b59 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Wed, 21 Jan 2009 15:16:43 +0100
Subject: KVM: ia64: dynamic nr online cpus

Account for number of online cpus and use that in loops iterating over
the list of vpus instead of scanning the full array unconditionally.
This patch is a building block to facilitate allowing to bump up
the size of MAX_VCPUS significantly.

Signed-off-by: Jes Sorensen <jes@sgi.com>
Acked-by : Xiantao Zhang  <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/include/asm/kvm_host.h | 10 ++++++----
 arch/ia64/kvm/kvm-ia64.c         | 12 ++++++++----
 arch/ia64/kvm/vcpu.c             |  5 ++++-
 3 files changed, 18 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 46f8b0eb568..4542651e6ac 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -157,10 +157,10 @@ struct kvm_vm_data {
 	struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS];
 };
 
-#define VCPU_BASE(n)	KVM_VM_DATA_BASE + \
-				offsetof(struct kvm_vm_data, vcpu_data[n])
-#define VM_BASE		KVM_VM_DATA_BASE + \
-				offsetof(struct kvm_vm_data, kvm_vm_struct)
+#define VCPU_BASE(n)	(KVM_VM_DATA_BASE + \
+				offsetof(struct kvm_vm_data, vcpu_data[n]))
+#define KVM_VM_BASE	(KVM_VM_DATA_BASE + \
+				offsetof(struct kvm_vm_data, kvm_vm_struct))
 #define KVM_MEM_DIRTY_LOG_BASE	KVM_VM_DATA_BASE + \
 				offsetof(struct kvm_vm_data, kvm_mem_dirty_log)
 
@@ -464,6 +464,8 @@ struct kvm_arch {
 	unsigned long	metaphysical_rr4;
 	unsigned long	vmm_init_rr;
 
+	int		online_vcpus;
+
 	struct kvm_ioapic *vioapic;
 	struct kvm_vm_stat stat;
 	struct kvm_sal_data rdv_sal_data;
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index dbf527a5734..9c77e3939e9 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -314,7 +314,7 @@ static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
 	union ia64_lid lid;
 	int i;
 
-	for (i = 0; i < KVM_MAX_VCPUS; i++) {
+	for (i = 0; i < kvm->arch.online_vcpus; i++) {
 		if (kvm->vcpus[i]) {
 			lid.val = VCPU_LID(kvm->vcpus[i]);
 			if (lid.id == id && lid.eid == eid)
@@ -388,7 +388,7 @@ static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	call_data.ptc_g_data = p->u.ptc_g_data;
 
-	for (i = 0; i < KVM_MAX_VCPUS; i++) {
+	for (i = 0; i < kvm->arch.online_vcpus; i++) {
 		if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state ==
 						KVM_MP_STATE_UNINITIALIZED ||
 					vcpu == kvm->vcpus[i])
@@ -788,6 +788,8 @@ struct  kvm *kvm_arch_create_vm(void)
 		return ERR_PTR(-ENOMEM);
 	kvm_init_vm(kvm);
 
+	kvm->arch.online_vcpus = 0;
+
 	return kvm;
 
 }
@@ -1154,7 +1156,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
 		/*Initialize itc offset for vcpus*/
 		itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC);
-		for (i = 0; i < KVM_MAX_VCPUS; i++) {
+		for (i = 0; i < kvm->arch.online_vcpus; i++) {
 			v = (struct kvm_vcpu *)((char *)vcpu +
 					sizeof(struct kvm_vcpu_data) * i);
 			v->arch.itc_offset = itc_offset;
@@ -1288,6 +1290,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 		goto fail;
 	}
 
+	kvm->arch.online_vcpus++;
+
 	return vcpu;
 fail:
 	return ERR_PTR(r);
@@ -1828,7 +1832,7 @@ struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
 	struct kvm_vcpu *lvcpu = kvm->vcpus[0];
 	int i;
 
-	for (i = 1; i < KVM_MAX_VCPUS; i++) {
+	for (i = 1; i < kvm->arch.online_vcpus; i++) {
 		if (!kvm->vcpus[i])
 			continue;
 		if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp)
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index 4d8be4c252f..d4d28050587 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -807,12 +807,15 @@ static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val);
 static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
 {
 	struct kvm_vcpu *v;
+	struct kvm *kvm;
 	int i;
 	long itc_offset = val - ia64_getreg(_IA64_REG_AR_ITC);
 	unsigned long vitv = VCPU(vcpu, itv);
 
+	kvm = (struct kvm *)KVM_VM_BASE;
+
 	if (vcpu->vcpu_id == 0) {
-		for (i = 0; i < KVM_MAX_VCPUS; i++) {
+		for (i = 0; i < kvm->arch.online_vcpus; i++) {
 			v = (struct kvm_vcpu *)((char *)vcpu +
 					sizeof(struct kvm_vcpu_data) * i);
 			VMX(v, itc_offset) = itc_offset;
-- 
cgit v1.2.3


From 44882eed2ebe7f75f8cdae5671ab1d6e0fa40dbc Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 27 Jan 2009 15:12:38 -0200
Subject: KVM: make irq ack notifications aware of routing table

IRQ ack notifications assume an identity mapping between pin->gsi,
which might not be the case with, for example, HPET.

Translate before acking.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Acked-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/i8259.c | 5 +++--
 arch/x86/kvm/irq.h   | 2 ++
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 179dcb0103f..93160375c84 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -49,7 +49,8 @@ static void pic_unlock(struct kvm_pic *s)
 	spin_unlock(&s->lock);
 
 	while (acks) {
-		kvm_notify_acked_irq(kvm, __ffs(acks));
+		kvm_notify_acked_irq(kvm, SELECT_PIC(__ffs(acks)),
+				     __ffs(acks));
 		acks &= acks - 1;
 	}
 
@@ -232,7 +233,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
 	}
 	pic_update_irq(s);
 	pic_unlock(s);
-	kvm_notify_acked_irq(kvm, irq);
+	kvm_notify_acked_irq(kvm, SELECT_PIC(irq), irq);
 
 	return intno;
 }
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 82579ee538d..9f593188129 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -32,6 +32,8 @@
 #include "lapic.h"
 
 #define PIC_NUM_PINS 16
+#define SELECT_PIC(irq) \
+	((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE)
 
 struct kvm;
 struct kvm_vcpu;
-- 
cgit v1.2.3


From d20626936dd6aa783760e780dae5abb127564316 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 2 Feb 2009 16:23:50 +0100
Subject: x86: Add EFER descriptions for FFXSR

AMD k10 includes support for the FFXSR feature, which leaves out
XMM registers on FXSAVE/FXSAVE when the EFER_FFXSR bit is set in
EFER.

The CPUID feature bit exists already, but the EFER bit is missing
currently, so this patch adds it to the list of known EFER bits.

Signed-off-by: Alexander Graf <agraf@suse.de>
CC: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/msr-index.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 46e9646e7a6..f4e505f286b 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -19,12 +19,14 @@
 #define _EFER_LMA		10 /* Long mode active (read-only) */
 #define _EFER_NX		11 /* No execute enable */
 #define _EFER_SVME		12 /* Enable virtualization */
+#define _EFER_FFXSR		14 /* Enable Fast FXSAVE/FXRSTOR */
 
 #define EFER_SCE		(1<<_EFER_SCE)
 #define EFER_LME		(1<<_EFER_LME)
 #define EFER_LMA		(1<<_EFER_LMA)
 #define EFER_NX			(1<<_EFER_NX)
 #define EFER_SVME		(1<<_EFER_SVME)
+#define EFER_FFXSR		(1<<_EFER_FFXSR)
 
 /* Intel MSRs. Some also available on other CPUs */
 #define MSR_IA32_PERFCTR0		0x000000c1
-- 
cgit v1.2.3


From 1b2fd70c4eddef53f32639296818c0253e7ca48d Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 2 Feb 2009 16:23:51 +0100
Subject: KVM: Add FFXSR support

AMD K10 CPUs implement the FFXSR feature that gets enabled using
EFER. Let's check if the virtual CPU description includes that
CPUID feature bit and allow enabling it then.

This is required for Windows Server 2008 in Hyper-V mode.

v2 adds CPUID capability exposure

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/svm.c |  3 +++
 arch/x86/kvm/x86.c | 12 ++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index db5021b2b5a..1c4a018bf4b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -417,6 +417,9 @@ static __init int svm_hardware_setup(void)
 	if (boot_cpu_has(X86_FEATURE_NX))
 		kvm_enable_efer_bits(EFER_NX);
 
+	if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
+		kvm_enable_efer_bits(EFER_FFXSR);
+
 	if (nested) {
 		printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
 		kvm_enable_efer_bits(EFER_SVME);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 32e3a7ec6ad..8f83590b47d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -490,6 +490,17 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
 		return;
 	}
 
+	if (efer & EFER_FFXSR) {
+		struct kvm_cpuid_entry2 *feat;
+
+		feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+		if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) {
+			printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n");
+			kvm_inject_gp(vcpu, 0);
+			return;
+		}
+	}
+
 	if (efer & EFER_SVME) {
 		struct kvm_cpuid_entry2 *feat;
 
@@ -1240,6 +1251,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 #ifdef CONFIG_X86_64
 		bit(X86_FEATURE_LM) |
 #endif
+		bit(X86_FEATURE_FXSR_OPT) |
 		bit(X86_FEATURE_MMXEXT) |
 		bit(X86_FEATURE_3DNOWEXT) |
 		bit(X86_FEATURE_3DNOW);
-- 
cgit v1.2.3


From 34c33d163fe509da8414a736c6328855f8c164e5 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@web.de>
Date: Sun, 8 Feb 2009 13:28:15 +0100
Subject: KVM: Drop unused evaluations from string pio handlers

Looks like neither the direction nor the rep prefix are used anymore.
Drop related evaluations from SVM's and VMX's I/O exit handlers.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/svm.c | 4 +---
 arch/x86/kvm/vmx.c | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1c4a018bf4b..22e88a4a51c 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1194,7 +1194,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
-	int size, down, in, string, rep;
+	int size, in, string;
 	unsigned port;
 
 	++svm->vcpu.stat.io_exits;
@@ -1213,8 +1213,6 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
 	port = io_info >> 16;
 	size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
-	rep = (io_info & SVM_IOIO_REP_MASK) != 0;
-	down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
 
 	skip_emulated_instruction(&svm->vcpu);
 	return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index df454de8acf..509b3530540 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2698,7 +2698,7 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	unsigned long exit_qualification;
-	int size, down, in, string, rep;
+	int size, in, string;
 	unsigned port;
 
 	++vcpu->stat.io_exits;
@@ -2714,8 +2714,6 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	size = (exit_qualification & 7) + 1;
 	in = (exit_qualification & 8) != 0;
-	down = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0;
-	rep = (exit_qualification & 32) != 0;
 	port = exit_qualification >> 16;
 
 	skip_emulated_instruction(vcpu);
-- 
cgit v1.2.3


From 49cd7d2238e44f7ee4269481cd8a1261cc8f93a5 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Wed, 11 Feb 2009 13:50:40 +0800
Subject: KVM: VMX: Use kvm_mmu_page_fault() handle EPT violation mmio

Removed duplicated code.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 30 +-----------------------------
 1 file changed, 1 insertion(+), 29 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 509b3530540..cb27ffccf46 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3058,11 +3058,8 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	u64 exit_qualification;
-	enum emulation_result er;
 	gpa_t gpa;
-	unsigned long hva;
 	int gla_validity;
-	int r;
 
 	exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
 
@@ -3085,32 +3082,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	}
 
 	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
-	hva = gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT);
-	if (!kvm_is_error_hva(hva)) {
-		r = kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
-		if (r < 0) {
-			printk(KERN_ERR "EPT: Not enough memory!\n");
-			return -ENOMEM;
-		}
-		return 1;
-	} else {
-		/* must be MMIO */
-		er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
-
-		if (er == EMULATE_FAIL) {
-			printk(KERN_ERR
-			 "EPT: Fail to handle EPT violation vmexit!er is %d\n",
-			 er);
-			printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
-			 (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
-			 (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS));
-			printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
-				(long unsigned int)exit_qualification);
-			return -ENOTSUPP;
-		} else if (er == EMULATE_DO_MMIO)
-			return 0;
-	}
-	return 1;
+	return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
 }
 
 static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-- 
cgit v1.2.3


From c807660407a695f390034e402edfe544a1d2e40c Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Wed, 4 Feb 2009 17:52:04 +0100
Subject: KVM: Fix kvmclock on !constant_tsc boxes

kvmclock currently falls apart on machines without constant tsc.
This patch fixes it.  Changes:

  * keep tsc frequency in a per-cpu variable.
  * handle kvmclock update using a new request flag, thus checking
    whenever we need an update each time we enter guest context.
  * use a cpufreq notifier to track frequency changes and force
    kvmclock updates.
  * send ipis to kick cpu out of guest context if needed to make
    sure the guest doesn't see stale values.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 94 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8f83590b47d..05d7be89b5e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -36,6 +36,7 @@
 #include <linux/highmem.h>
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
+#include <linux/cpufreq.h>
 
 #include <asm/uaccess.h>
 #include <asm/msr.h>
@@ -617,6 +618,8 @@ static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *
 		 hv_clock->tsc_to_system_mul);
 }
 
+static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
+
 static void kvm_write_guest_time(struct kvm_vcpu *v)
 {
 	struct timespec ts;
@@ -627,9 +630,9 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
 	if ((!vcpu->time_page))
 		return;
 
-	if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) {
-		kvm_set_time_scale(tsc_khz, &vcpu->hv_clock);
-		vcpu->hv_clock_tsc_khz = tsc_khz;
+	if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) {
+		kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock);
+		vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz);
 	}
 
 	/* Keep irq disabled to prevent changes to the clock */
@@ -660,6 +663,16 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
 	mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
 }
 
+static int kvm_request_guest_time_update(struct kvm_vcpu *v)
+{
+	struct kvm_vcpu_arch *vcpu = &v->arch;
+
+	if (!vcpu->time_page)
+		return 0;
+	set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests);
+	return 1;
+}
+
 static bool msr_mtrr_valid(unsigned msr)
 {
 	switch (msr) {
@@ -790,7 +803,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 			vcpu->arch.time_page = NULL;
 		}
 
-		kvm_write_guest_time(vcpu);
+		kvm_request_guest_time_update(vcpu);
 		break;
 	}
 	default:
@@ -1000,6 +1013,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
 	case KVM_CAP_SET_TSS_ADDR:
 	case KVM_CAP_EXT_CPUID:
+	case KVM_CAP_CLOCKSOURCE:
 	case KVM_CAP_PIT:
 	case KVM_CAP_NOP_IO_DELAY:
 	case KVM_CAP_MP_STATE:
@@ -1025,9 +1039,6 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_IOMMU:
 		r = iommu_found();
 		break;
-	case KVM_CAP_CLOCKSOURCE:
-		r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC);
-		break;
 	default:
 		r = 0;
 		break;
@@ -1098,7 +1109,7 @@ out:
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	kvm_x86_ops->vcpu_load(vcpu, cpu);
-	kvm_write_guest_time(vcpu);
+	kvm_request_guest_time_update(vcpu);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -2642,9 +2653,72 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
 
+static void bounce_off(void *info)
+{
+	/* nothing */
+}
+
+static unsigned int  ref_freq;
+static unsigned long tsc_khz_ref;
+
+static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+				     void *data)
+{
+	struct cpufreq_freqs *freq = data;
+	struct kvm *kvm;
+	struct kvm_vcpu *vcpu;
+	int i, send_ipi = 0;
+
+	if (!ref_freq)
+		ref_freq = freq->old;
+
+	if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
+		return 0;
+	if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
+		return 0;
+	per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
+
+	spin_lock(&kvm_lock);
+	list_for_each_entry(kvm, &vm_list, vm_list) {
+		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+			vcpu = kvm->vcpus[i];
+			if (!vcpu)
+				continue;
+			if (vcpu->cpu != freq->cpu)
+				continue;
+			if (!kvm_request_guest_time_update(vcpu))
+				continue;
+			if (vcpu->cpu != smp_processor_id())
+				send_ipi++;
+		}
+	}
+	spin_unlock(&kvm_lock);
+
+	if (freq->old < freq->new && send_ipi) {
+		/*
+		 * We upscale the frequency.  Must make the guest
+		 * doesn't see old kvmclock values while running with
+		 * the new frequency, otherwise we risk the guest sees
+		 * time go backwards.
+		 *
+		 * In case we update the frequency for another cpu
+		 * (which might be in guest context) send an interrupt
+		 * to kick the cpu out of guest context.  Next time
+		 * guest context is entered kvmclock will be updated,
+		 * so the guest will not see stale values.
+		 */
+		smp_call_function_single(freq->cpu, bounce_off, NULL, 1);
+	}
+	return 0;
+}
+
+static struct notifier_block kvmclock_cpufreq_notifier_block = {
+        .notifier_call  = kvmclock_cpufreq_notifier
+};
+
 int kvm_arch_init(void *opaque)
 {
-	int r;
+	int r, cpu;
 	struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
 
 	if (kvm_x86_ops) {
@@ -2675,6 +2749,15 @@ int kvm_arch_init(void *opaque)
 	kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
 	kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
 			PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
+
+	for_each_possible_cpu(cpu)
+		per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
+	if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+		tsc_khz_ref = tsc_khz;
+		cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
+					  CPUFREQ_TRANSITION_NOTIFIER);
+	}
+
 	return 0;
 
 out:
@@ -3010,6 +3093,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if (vcpu->requests) {
 		if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
 			__kvm_migrate_timers(vcpu);
+		if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests))
+			kvm_write_guest_time(vcpu);
 		if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
 			kvm_mmu_sync_roots(vcpu);
 		if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
-- 
cgit v1.2.3


From 6bed6b9e84bf5e6b468847a50c0751389fdd01d4 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 18 Feb 2009 14:08:59 +0100
Subject: KVM: MMU: remove redundant check in mmu_set_spte

The following code flow is unnecessary:

	if (largepage)
		was_rmapped = is_large_pte(*shadow_pte);
	 else
	 	was_rmapped = 1;

The is_large_pte() function will always evaluate to one here because the
(largepage && !is_large_pte) case is already handled in the first
if-clause. So we can remove this check and set was_rmapped to one always
here.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Acked-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ef060ec444a..c90b4b2f2ab 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1791,12 +1791,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 			pgprintk("hfn old %lx new %lx\n",
 				 spte_to_pfn(*shadow_pte), pfn);
 			rmap_remove(vcpu->kvm, shadow_pte);
-		} else {
-			if (largepage)
-				was_rmapped = is_large_pte(*shadow_pte);
-			else
-				was_rmapped = 1;
-		}
+		} else
+			was_rmapped = 1;
 	}
 	if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
 		      dirty, largepage, global, gfn, pfn, speculative, true)) {
-- 
cgit v1.2.3


From 452425dbaa1974e9fc489e64a8de46a47b4c2754 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 18 Feb 2009 14:54:37 +0100
Subject: KVM: MMU: remove assertion in kvm_mmu_alloc_page

The assertion no longer makes sense since we don't clear page tables on
allocation; instead we clear them during prefetch.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index c90b4b2f2ab..4a21b0f8491 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -802,7 +802,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
 	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
 	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
 	INIT_LIST_HEAD(&sp->oos_link);
-	ASSERT(is_empty_shadow_page(sp->spt));
 	bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
 	sp->multimapped = 0;
 	sp->parent_pte = parent_pte;
-- 
cgit v1.2.3


From 4925663a079c77d95d8685228ad6675fc5639c8e Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Wed, 4 Feb 2009 17:28:14 +0200
Subject: KVM: Report IRQ injection status to userspace.

IRQ injection status is either -1 (if there was no CPU found
that should except the interrupt because IRQ was masked or
ioapic was misconfigured or ...) or >= 0 in that case the
number indicates to how many CPUs interrupt was injected.
If the value is 0 it means that the interrupt was coalesced
and probably should be reinjected.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c        | 12 ++++++++++--
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/i8259.c            | 18 +++++++++++++-----
 arch/x86/kvm/x86.c              | 13 +++++++++++--
 4 files changed, 35 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 9c77e3939e9..076b00d1dbf 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -182,7 +182,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	switch (ext) {
 	case KVM_CAP_IRQCHIP:
 	case KVM_CAP_MP_STATE:
-
+	case KVM_CAP_IRQ_INJECT_STATUS:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -927,6 +927,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			goto out;
 		}
 		break;
+	case KVM_IRQ_LINE_STATUS:
 	case KVM_IRQ_LINE: {
 		struct kvm_irq_level irq_event;
 
@@ -934,10 +935,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		if (copy_from_user(&irq_event, argp, sizeof irq_event))
 			goto out;
 		if (irqchip_in_kernel(kvm)) {
+			__s32 status;
 			mutex_lock(&kvm->lock);
-			kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
 				    irq_event.irq, irq_event.level);
 			mutex_unlock(&kvm->lock);
+			if (ioctl == KVM_IRQ_LINE_STATUS) {
+				irq_event.status = status;
+				if (copy_to_user(argp, &irq_event,
+							sizeof irq_event))
+					goto out;
+			}
 			r = 0;
 		}
 		break;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 55fd4c5fd38..f0faf58044f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -616,7 +616,7 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
 			   u32 error_code);
 
-void kvm_pic_set_irq(void *opaque, int irq, int level);
+int kvm_pic_set_irq(void *opaque, int irq, int level);
 
 void kvm_inject_nmi(struct kvm_vcpu *vcpu);
 
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 93160375c84..b4e662e94dd 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -77,12 +77,13 @@ void kvm_pic_clear_isr_ack(struct kvm *kvm)
 /*
  * set irq level. If an edge is detected, then the IRR is set to 1
  */
-static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
+static inline int pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
 {
-	int mask;
+	int mask, ret = 1;
 	mask = 1 << irq;
 	if (s->elcr & mask)	/* level triggered */
 		if (level) {
+			ret = !(s->irr & mask);
 			s->irr |= mask;
 			s->last_irr |= mask;
 		} else {
@@ -91,11 +92,15 @@ static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
 		}
 	else	/* edge triggered */
 		if (level) {
-			if ((s->last_irr & mask) == 0)
+			if ((s->last_irr & mask) == 0) {
+				ret = !(s->irr & mask);
 				s->irr |= mask;
+			}
 			s->last_irr |= mask;
 		} else
 			s->last_irr &= ~mask;
+
+	return (s->imr & mask) ? -1 : ret;
 }
 
 /*
@@ -172,16 +177,19 @@ void kvm_pic_update_irq(struct kvm_pic *s)
 	pic_unlock(s);
 }
 
-void kvm_pic_set_irq(void *opaque, int irq, int level)
+int kvm_pic_set_irq(void *opaque, int irq, int level)
 {
 	struct kvm_pic *s = opaque;
+	int ret = -1;
 
 	pic_lock(s);
 	if (irq >= 0 && irq < PIC_NUM_PINS) {
-		pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
+		ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
 		pic_update_irq(s);
 	}
 	pic_unlock(s);
+
+	return ret;
 }
 
 /*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 05d7be89b5e..e4db5be7c95 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1019,6 +1019,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_MP_STATE:
 	case KVM_CAP_SYNC_MMU:
 	case KVM_CAP_REINJECT_CONTROL:
+	case KVM_CAP_IRQ_INJECT_STATUS:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -1877,6 +1878,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	create_pit_unlock:
 		mutex_unlock(&kvm->lock);
 		break;
+	case KVM_IRQ_LINE_STATUS:
 	case KVM_IRQ_LINE: {
 		struct kvm_irq_level irq_event;
 
@@ -1884,10 +1886,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		if (copy_from_user(&irq_event, argp, sizeof irq_event))
 			goto out;
 		if (irqchip_in_kernel(kvm)) {
+			__s32 status;
 			mutex_lock(&kvm->lock);
-			kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
-				    irq_event.irq, irq_event.level);
+			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+					irq_event.irq, irq_event.level);
 			mutex_unlock(&kvm->lock);
+			if (ioctl == KVM_IRQ_LINE_STATUS) {
+				irq_event.status = status;
+				if (copy_to_user(argp, &irq_event,
+							sizeof irq_event))
+					goto out;
+			}
 			r = 0;
 		}
 		break;
-- 
cgit v1.2.3


From 1fbdc7a58512a6283e10fd27108197679db95ffa Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@amd.com>
Date: Sun, 11 Jan 2009 22:39:44 +0100
Subject: KVM: SVM: set accessed bit for VMCB segment selectors

In the segment descriptor _cache_ the accessed bit is always set
(although it can be cleared in the descriptor itself). Since Intel
checks for this condition on a VMENTRY, set this bit in the AMD path
to enable cross vendor migration.

Cc: stable@kernel.org
Signed-off-by: Andre Przywara <andre.przywara@amd.com>
Acked-By: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/svm.c | 41 +++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 22e88a4a51c..1821c207819 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -796,20 +796,37 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
 	var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
 	var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
 
-	/*
-	 * SVM always stores 0 for the 'G' bit in the CS selector in
-	 * the VMCB on a VMEXIT. This hurts cross-vendor migration:
-	 * Intel's VMENTRY has a check on the 'G' bit.
-	 */
-	if (seg == VCPU_SREG_CS)
+	switch (seg) {
+	case VCPU_SREG_CS:
+		/*
+		 * SVM always stores 0 for the 'G' bit in the CS selector in
+		 * the VMCB on a VMEXIT. This hurts cross-vendor migration:
+		 * Intel's VMENTRY has a check on the 'G' bit.
+		 */
 		var->g = s->limit > 0xfffff;
-
-	/*
-	 * Work around a bug where the busy flag in the tr selector
-	 * isn't exposed
-	 */
-	if (seg == VCPU_SREG_TR)
+		break;
+	case VCPU_SREG_TR:
+		/*
+		 * Work around a bug where the busy flag in the tr selector
+		 * isn't exposed
+		 */
 		var->type |= 0x2;
+		break;
+	case VCPU_SREG_DS:
+	case VCPU_SREG_ES:
+	case VCPU_SREG_FS:
+	case VCPU_SREG_GS:
+		/*
+		 * The accessed bit must always be set in the segment
+		 * descriptor cache, although it can be cleared in the
+		 * descriptor, the cached bit always remains at 1. Since
+		 * Intel has a check on this, set it here to support
+		 * cross-vendor migration.
+		 */
+		if (!var->unusable)
+			var->type |= 0x1;
+		break;
+	}
 
 	var->unusable = !var->present;
 }
-- 
cgit v1.2.3


From c5bc22424021cabda862727fb3f5098b866f074d Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 19 Feb 2009 12:18:56 +0100
Subject: KVM: MMU: Fix another largepage memory leak

In the paging_fetch function rmap_remove is called after setting a large
pte to non-present. This causes rmap_remove to not drop the reference to
the large page. The result is a memory leak of that page.

Cc: stable@kernel.org
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Acked-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/paging_tmpl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 7314c0944c5..0f11792fafa 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -306,9 +306,9 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 			continue;
 
 		if (is_large_pte(*sptep)) {
+			rmap_remove(vcpu->kvm, sptep);
 			set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
 			kvm_flush_remote_tlbs(vcpu->kvm);
-			rmap_remove(vcpu->kvm, sptep);
 		}
 
 		if (level == PT_DIRECTORY_LEVEL
-- 
cgit v1.2.3


From b0a1835d53c57bc38b36867c04436b60454cb610 Mon Sep 17 00:00:00 2001
From: Liu Yu <yu.liu@freescale.com>
Date: Tue, 17 Feb 2009 16:52:08 +0800
Subject: KVM: ppc: Add emulation of E500 register mmucsr0

Latest kernel flushes TLB via mmucsr0.

Signed-off-by: Liu Yu <yu.liu@freescale.com>
Acked-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kvm/e500_emulate.c |  8 ++++++++
 arch/powerpc/kvm/e500_tlb.c     | 16 ++++++++++++++++
 arch/powerpc/kvm/e500_tlb.h     |  1 +
 3 files changed, 25 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index 7a98d4a4c1e..3f760414b9f 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -105,6 +105,11 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 	case SPRN_HID1:
 		vcpu_e500->hid1 = vcpu->arch.gpr[rs]; break;
 
+	case SPRN_MMUCSR0:
+		emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500,
+				vcpu->arch.gpr[rs]);
+		break;
+
 	/* extra exceptions */
 	case SPRN_IVOR32:
 		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = vcpu->arch.gpr[rs];
@@ -172,6 +177,9 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 	case SPRN_HID1:
 		vcpu->arch.gpr[rt] = vcpu_e500->hid1; break;
 
+	case SPRN_MMUCSR0:
+		vcpu->arch.gpr[rt] = 0; break;
+
 	/* extra exceptions */
 	case SPRN_IVOR32:
 		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index d437160d388..72386ddbd9d 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -391,6 +391,22 @@ static int kvmppc_e500_gtlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
 	return 0;
 }
 
+int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value)
+{
+	int esel;
+
+	if (value & MMUCSR0_TLB0FI)
+		for (esel = 0; esel < vcpu_e500->guest_tlb_size[0]; esel++)
+			kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel);
+	if (value & MMUCSR0_TLB1FI)
+		for (esel = 0; esel < vcpu_e500->guest_tlb_size[1]; esel++)
+			kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel);
+
+	_tlbil_all();
+
+	return EMULATE_DONE;
+}
+
 int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
index ab49e935518..4d5cc0f7d79 100644
--- a/arch/powerpc/kvm/e500_tlb.h
+++ b/arch/powerpc/kvm/e500_tlb.h
@@ -44,6 +44,7 @@
 	   | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)
 
 extern void kvmppc_dump_tlbs(struct kvm_vcpu *);
+extern int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *, ulong);
 extern int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *);
 extern int kvmppc_e500_emul_tlbre(struct kvm_vcpu *);
 extern int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *, int, int);
-- 
cgit v1.2.3


From 2fa8937f3af8873e006ce324e7b2d3f9b2077b87 Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <xiantao.zhang@intel.com>
Date: Mon, 16 Feb 2009 15:14:48 +0800
Subject: ia64: Move the macro definitions related to MSI to one header file.

For kvm's MSI support, it needs these macros defined in ia64_msi.c, and
to avoid duplicate them, move them to one header file and share with
kvm.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/include/asm/msidef.h | 42 ++++++++++++++++++++++++++++++++
 arch/ia64/kernel/msi_ia64.c    | 55 +++++++-----------------------------------
 2 files changed, 51 insertions(+), 46 deletions(-)
 create mode 100644 arch/ia64/include/asm/msidef.h

(limited to 'arch')

diff --git a/arch/ia64/include/asm/msidef.h b/arch/ia64/include/asm/msidef.h
new file mode 100644
index 00000000000..592c1047a0c
--- /dev/null
+++ b/arch/ia64/include/asm/msidef.h
@@ -0,0 +1,42 @@
+#ifndef _IA64_MSI_DEF_H
+#define _IA64_MSI_DEF_H
+
+/*
+ * Shifts for APIC-based data
+ */
+
+#define     MSI_DATA_VECTOR_SHIFT	0
+#define	    MSI_DATA_VECTOR(v)		(((u8)v) << MSI_DATA_VECTOR_SHIFT)
+#define     MSI_DATA_VECTOR_MASK	0xffffff00
+
+#define     MSI_DATA_DELIVERY_MODE_SHIFT	8
+#define     MSI_DATA_DELIVERY_FIXED	(0 << MSI_DATA_DELIVERY_MODE_SHIFT)
+#define     MSI_DATA_DELIVERY_LOWPRI	(1 << MSI_DATA_DELIVERY_MODE_SHIFT)
+
+#define     MSI_DATA_LEVEL_SHIFT	14
+#define     MSI_DATA_LEVEL_DEASSERT	(0 << MSI_DATA_LEVEL_SHIFT)
+#define     MSI_DATA_LEVEL_ASSERT	(1 << MSI_DATA_LEVEL_SHIFT)
+
+#define     MSI_DATA_TRIGGER_SHIFT	15
+#define     MSI_DATA_TRIGGER_EDGE	(0 << MSI_DATA_TRIGGER_SHIFT)
+#define     MSI_DATA_TRIGGER_LEVEL	(1 << MSI_DATA_TRIGGER_SHIFT)
+
+/*
+ * Shift/mask fields for APIC-based bus address
+ */
+
+#define     MSI_ADDR_DEST_ID_SHIFT	4
+#define     MSI_ADDR_HEADER		0xfee00000
+
+#define     MSI_ADDR_DEST_ID_MASK	0xfff0000f
+#define     MSI_ADDR_DEST_ID_CPU(cpu)	((cpu) << MSI_ADDR_DEST_ID_SHIFT)
+
+#define     MSI_ADDR_DEST_MODE_SHIFT	2
+#define     MSI_ADDR_DEST_MODE_PHYS	(0 << MSI_ADDR_DEST_MODE_SHIFT)
+#define	    MSI_ADDR_DEST_MODE_LOGIC	(1 << MSI_ADDR_DEST_MODE_SHIFT)
+
+#define     MSI_ADDR_REDIRECTION_SHIFT	3
+#define     MSI_ADDR_REDIRECTION_CPU	(0 << MSI_ADDR_REDIRECTION_SHIFT)
+#define     MSI_ADDR_REDIRECTION_LOWPRI	(1 << MSI_ADDR_REDIRECTION_SHIFT)
+
+#endif/* _IA64_MSI_DEF_H */
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 89033933903..368ee4e5266 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -7,44 +7,7 @@
 #include <linux/msi.h>
 #include <linux/dmar.h>
 #include <asm/smp.h>
-
-/*
- * Shifts for APIC-based data
- */
-
-#define MSI_DATA_VECTOR_SHIFT		0
-#define	    MSI_DATA_VECTOR(v)		(((u8)v) << MSI_DATA_VECTOR_SHIFT)
-#define MSI_DATA_VECTOR_MASK		0xffffff00
-
-#define MSI_DATA_DELIVERY_SHIFT		8
-#define     MSI_DATA_DELIVERY_FIXED	(0 << MSI_DATA_DELIVERY_SHIFT)
-#define     MSI_DATA_DELIVERY_LOWPRI	(1 << MSI_DATA_DELIVERY_SHIFT)
-
-#define MSI_DATA_LEVEL_SHIFT		14
-#define     MSI_DATA_LEVEL_DEASSERT	(0 << MSI_DATA_LEVEL_SHIFT)
-#define     MSI_DATA_LEVEL_ASSERT	(1 << MSI_DATA_LEVEL_SHIFT)
-
-#define MSI_DATA_TRIGGER_SHIFT		15
-#define     MSI_DATA_TRIGGER_EDGE	(0 << MSI_DATA_TRIGGER_SHIFT)
-#define     MSI_DATA_TRIGGER_LEVEL	(1 << MSI_DATA_TRIGGER_SHIFT)
-
-/*
- * Shift/mask fields for APIC-based bus address
- */
-
-#define MSI_TARGET_CPU_SHIFT		4
-#define MSI_ADDR_HEADER			0xfee00000
-
-#define MSI_ADDR_DESTID_MASK		0xfff0000f
-#define     MSI_ADDR_DESTID_CPU(cpu)	((cpu) << MSI_TARGET_CPU_SHIFT)
-
-#define MSI_ADDR_DESTMODE_SHIFT		2
-#define     MSI_ADDR_DESTMODE_PHYS	(0 << MSI_ADDR_DESTMODE_SHIFT)
-#define	    MSI_ADDR_DESTMODE_LOGIC	(1 << MSI_ADDR_DESTMODE_SHIFT)
-
-#define MSI_ADDR_REDIRECTION_SHIFT	3
-#define     MSI_ADDR_REDIRECTION_CPU	(0 << MSI_ADDR_REDIRECTION_SHIFT)
-#define     MSI_ADDR_REDIRECTION_LOWPRI	(1 << MSI_ADDR_REDIRECTION_SHIFT)
+#include <asm/msidef.h>
 
 static struct irq_chip	ia64_msi_chip;
 
@@ -65,8 +28,8 @@ static void ia64_set_msi_irq_affinity(unsigned int irq,
 	read_msi_msg(irq, &msg);
 
 	addr = msg.address_lo;
-	addr &= MSI_ADDR_DESTID_MASK;
-	addr |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu));
+	addr &= MSI_ADDR_DEST_ID_MASK;
+	addr |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu));
 	msg.address_lo = addr;
 
 	data = msg.data;
@@ -98,9 +61,9 @@ int ia64_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
 	msg.address_hi = 0;
 	msg.address_lo =
 		MSI_ADDR_HEADER |
-		MSI_ADDR_DESTMODE_PHYS |
+		MSI_ADDR_DEST_MODE_PHYS |
 		MSI_ADDR_REDIRECTION_CPU |
-		MSI_ADDR_DESTID_CPU(dest_phys_id);
+		MSI_ADDR_DEST_ID_CPU(dest_phys_id);
 
 	msg.data =
 		MSI_DATA_TRIGGER_EDGE |
@@ -183,8 +146,8 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 
 	msg.data &= ~MSI_DATA_VECTOR_MASK;
 	msg.data |= MSI_DATA_VECTOR(cfg->vector);
-	msg.address_lo &= ~MSI_ADDR_DESTID_MASK;
-	msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu));
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu));
 
 	dmar_msi_write(irq, &msg);
 	irq_desc[irq].affinity = *mask;
@@ -215,9 +178,9 @@ msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
 	msg->address_hi = 0;
 	msg->address_lo =
 		MSI_ADDR_HEADER |
-		MSI_ADDR_DESTMODE_PHYS |
+		MSI_ADDR_DEST_MODE_PHYS |
 		MSI_ADDR_REDIRECTION_CPU |
-		MSI_ADDR_DESTID_CPU(dest);
+		MSI_ADDR_DEST_ID_CPU(dest);
 
 	msg->data =
 		MSI_DATA_TRIGGER_EDGE |
-- 
cgit v1.2.3


From 6b08035f3e64d8e474be166d682b52c95941662e Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <xiantao.zhang@intel.com>
Date: Mon, 16 Feb 2009 15:24:05 +0800
Subject: KVM: ia64: Fix the build errors due to lack of macros related to MSI.

Include the newly introduced msidef.h to solve the build issues.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/irq.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h
index c6786e8b1bf..c0785a72827 100644
--- a/arch/ia64/kvm/irq.h
+++ b/arch/ia64/kvm/irq.h
@@ -23,6 +23,8 @@
 #ifndef __IRQ_H
 #define __IRQ_H
 
+#include "lapic.h"
+
 static inline int irqchip_in_kernel(struct kvm *kvm)
 {
 	return 1;
-- 
cgit v1.2.3


From 401d10dee083bda281f2fdcdf654080313ba30ec Mon Sep 17 00:00:00 2001
From: Amit Shah <amit.shah@redhat.com>
Date: Fri, 20 Feb 2009 22:53:37 +0530
Subject: KVM: VMX: Update necessary state when guest enters long mode

setup_msrs() should be called when entering long mode to save the
shadow state for the 64-bit guest state.

Using vmx_set_efer() in enter_lmode() removes some duplicated code
and also ensures we call setup_msrs(). We can safely pass the value
of shadow_efer to vmx_set_efer() as no other bits in the efer change
while enabling long mode (guest first sets EFER.LME, then sets CR0.PG
which causes a vmexit where we activate long mode).

With this fix, is_long_mode() can check for EFER.LMA set instead of
EFER.LME and 5e23049e86dd298b72e206b420513dbc3a240cd9 can be reverted.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 54 ++++++++++++++++++++++++------------------------------
 1 file changed, 24 insertions(+), 30 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index cb27ffccf46..48063a0aa24 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1430,6 +1430,29 @@ continue_rmode:
 	init_rmode(vcpu->kvm);
 }
 
+static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
+
+	vcpu->arch.shadow_efer = efer;
+	if (!msr)
+		return;
+	if (efer & EFER_LMA) {
+		vmcs_write32(VM_ENTRY_CONTROLS,
+			     vmcs_read32(VM_ENTRY_CONTROLS) |
+			     VM_ENTRY_IA32E_MODE);
+		msr->data = efer;
+	} else {
+		vmcs_write32(VM_ENTRY_CONTROLS,
+			     vmcs_read32(VM_ENTRY_CONTROLS) &
+			     ~VM_ENTRY_IA32E_MODE);
+
+		msr->data = efer & ~EFER_LME;
+	}
+	setup_msrs(vmx);
+}
+
 #ifdef CONFIG_X86_64
 
 static void enter_lmode(struct kvm_vcpu *vcpu)
@@ -1444,13 +1467,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
 			     (guest_tr_ar & ~AR_TYPE_MASK)
 			     | AR_TYPE_BUSY_64_TSS);
 	}
-
 	vcpu->arch.shadow_efer |= EFER_LMA;
-
-	find_msr_entry(to_vmx(vcpu), MSR_EFER)->data |= EFER_LMA | EFER_LME;
-	vmcs_write32(VM_ENTRY_CONTROLS,
-		     vmcs_read32(VM_ENTRY_CONTROLS)
-		     | VM_ENTRY_IA32E_MODE);
+	vmx_set_efer(vcpu, vcpu->arch.shadow_efer);
 }
 
 static void exit_lmode(struct kvm_vcpu *vcpu)
@@ -1609,30 +1627,6 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 	vmcs_writel(GUEST_CR4, hw_cr4);
 }
 
-static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
-{
-	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
-
-	vcpu->arch.shadow_efer = efer;
-	if (!msr)
-		return;
-	if (efer & EFER_LMA) {
-		vmcs_write32(VM_ENTRY_CONTROLS,
-				     vmcs_read32(VM_ENTRY_CONTROLS) |
-				     VM_ENTRY_IA32E_MODE);
-		msr->data = efer;
-
-	} else {
-		vmcs_write32(VM_ENTRY_CONTROLS,
-				     vmcs_read32(VM_ENTRY_CONTROLS) &
-				     ~VM_ENTRY_IA32E_MODE);
-
-		msr->data = efer & ~EFER_LME;
-	}
-	setup_msrs(vmx);
-}
-
 static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
 {
 	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
-- 
cgit v1.2.3


From 41d6af119206e98764b4ae6d264d63acefcf851e Mon Sep 17 00:00:00 2001
From: Amit Shah <amit.shah@qumranet.com>
Date: Thu, 28 Feb 2008 16:06:15 +0530
Subject: KVM: is_long_mode() should check for EFER.LMA

is_long_mode currently checks the LongModeEnable bit in
EFER instead of the LongModeActive bit. This is wrong, but
we survived this till now since it wasn't triggered. This
breaks guests that go from long mode to compatibility mode.

This is noticed on a solaris guest and fixes bug #1842160

Signed-off-by: Amit Shah <amit.shah@qumranet.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/mmu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 258e5d56298..eaab2145f62 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -54,7 +54,7 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
 static inline int is_long_mode(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_X86_64
-	return vcpu->arch.shadow_efer & EFER_LME;
+	return vcpu->arch.shadow_efer & EFER_LMA;
 #else
 	return 0;
 #endif
-- 
cgit v1.2.3


From d7364a29b305862cbc92b3f8b6ad9968270d068c Mon Sep 17 00:00:00 2001
From: Hannes Eder <hannes@hanneseder.net>
Date: Sat, 21 Feb 2009 02:18:13 +0100
Subject: KVM: fix sparse warnings: context imbalance

Impact: Attribute function with __acquires(...) resp. __releases(...).

Fix this sparse warnings:
  arch/x86/kvm/i8259.c:34:13: warning: context imbalance in 'pic_lock' - wrong count at exit
  arch/x86/kvm/i8259.c:39:13: warning: context imbalance in 'pic_unlock' - unexpected unlock

Signed-off-by: Hannes Eder <hannes@hanneseder.net>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/i8259.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index b4e662e94dd..1ccb50c74f1 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -32,11 +32,13 @@
 #include <linux/kvm_host.h>
 
 static void pic_lock(struct kvm_pic *s)
+	__acquires(&s->lock)
 {
 	spin_lock(&s->lock);
 }
 
 static void pic_unlock(struct kvm_pic *s)
+	__releases(&s->lock)
 {
 	struct kvm *kvm = s->kvm;
 	unsigned acks = s->pending_acks;
-- 
cgit v1.2.3


From cded19f396bc3c9d299331709d0a9fbc6ecc148a Mon Sep 17 00:00:00 2001
From: Hannes Eder <hannes@hanneseder.net>
Date: Sat, 21 Feb 2009 02:19:13 +0100
Subject: KVM: fix sparse warnings: Should it be static?

Impact: Make symbols static.

Fix this sparse warnings:
  arch/x86/kvm/mmu.c:992:5: warning: symbol 'mmu_pages_add' was not declared. Should it be static?
  arch/x86/kvm/mmu.c:1124:5: warning: symbol 'mmu_pages_next' was not declared. Should it be static?
  arch/x86/kvm/mmu.c:1144:6: warning: symbol 'mmu_pages_clear_parents' was not declared. Should it be static?
  arch/x86/kvm/x86.c:2037:5: warning: symbol 'kvm_read_guest_virt' was not declared. Should it be static?
  arch/x86/kvm/x86.c:2067:5: warning: symbol 'kvm_write_guest_virt' was not declared. Should it be static?
  virt/kvm/irq_comm.c:220:5: warning: symbol 'setup_routing_entry' was not declared. Should it be static?

Signed-off-by: Hannes Eder <hannes@hanneseder.net>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 11 ++++++-----
 arch/x86/kvm/x86.c |  8 ++++----
 2 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 4a21b0f8491..2a36f7f7c4c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -989,8 +989,8 @@ struct kvm_mmu_pages {
 	     idx < 512;					\
 	     idx = find_next_bit(bitmap, 512, idx+1))
 
-int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
-		   int idx)
+static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
+			 int idx)
 {
 	int i;
 
@@ -1121,8 +1121,9 @@ struct mmu_page_path {
 			i < pvec.nr && ({ sp = pvec.page[i].sp; 1;});	\
 			i = mmu_pages_next(&pvec, &parents, i))
 
-int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
-		   int i)
+static int mmu_pages_next(struct kvm_mmu_pages *pvec,
+			  struct mmu_page_path *parents,
+			  int i)
 {
 	int n;
 
@@ -1141,7 +1142,7 @@ int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
 	return n;
 }
 
-void mmu_pages_clear_parents(struct mmu_page_path *parents)
+static void mmu_pages_clear_parents(struct mmu_page_path *parents)
 {
 	struct kvm_mmu_page *sp;
 	unsigned int level = 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e4db5be7c95..8ca100a9eca 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2043,8 +2043,8 @@ static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
 	return dev;
 }
 
-int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
-			struct kvm_vcpu *vcpu)
+static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
+			       struct kvm_vcpu *vcpu)
 {
 	void *data = val;
 	int r = X86EMUL_CONTINUE;
@@ -2073,8 +2073,8 @@ out:
 	return r;
 }
 
-int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
-			 struct kvm_vcpu *vcpu)
+static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
+				struct kvm_vcpu *vcpu)
 {
 	void *data = val;
 	int r = X86EMUL_CONTINUE;
-- 
cgit v1.2.3


From 4539b35881ae9664b0e2953438dd83f5ee02c0b4 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 12 Mar 2009 18:18:43 +0100
Subject: KVM: Fix missing smp tlb flush in invlpg

When kvm emulates an invlpg instruction, it can drop a shadow pte, but
leaves the guest tlbs intact.  This can cause memory corruption when
swapping out.

Without this the other cpu can still write to a freed host physical page.
tlb smp flush must happen if rmap_remove is called always before mmu_lock
is released because the VM will take the mmu_lock before it can finally add
the page to the freelist after swapout. mmu notifier makes it safe to flush
the tlb after freeing the page (otherwise it would never be safe) so we can do
a single flush for multiple sptes invalidated.

Cc: stable@kernel.org
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/paging_tmpl.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 0f11792fafa..6bd70206c56 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -446,6 +446,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 	gpa_t pte_gpa = -1;
 	int level;
 	u64 *sptep;
+	int need_flush = 0;
 
 	spin_lock(&vcpu->kvm->mmu_lock);
 
@@ -465,6 +466,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 				rmap_remove(vcpu->kvm, sptep);
 				if (is_large_pte(*sptep))
 					--vcpu->kvm->stat.lpages;
+				need_flush = 1;
 			}
 			set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
 			break;
@@ -474,6 +476,8 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 			break;
 	}
 
+	if (need_flush)
+		kvm_flush_remote_tlbs(vcpu->kvm);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
 	if (pte_gpa == -1)
-- 
cgit v1.2.3


From bc35cbc85cd78213590761618a13da6a9707652c Mon Sep 17 00:00:00 2001
From: Liu Yu <yu.liu@freescale.com>
Date: Tue, 17 Mar 2009 16:57:45 +0800
Subject: KVM: ppc: e500: Fix the bug that mas0 update to wrong value when read
 TLB entry

Should clear and then update the next victim area here.

Guest kernel only read TLB1 when startup kernel,
this bug result in an extra 4K TLB1 mapping in guest from 0x0 to 0x0.

As the problem has no impact to bootup a guest,
we didn't notice it before.

Signed-off-by: Liu Yu <yu.liu@freescale.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kvm/e500_tlb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 72386ddbd9d..ec933209e8a 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -448,7 +448,7 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
 	esel = get_tlb_esel(vcpu_e500, tlbsel);
 
 	gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
-	vcpu_e500->mas0 &= MAS0_NV(0);
+	vcpu_e500->mas0 &= ~MAS0_NV(~0);
 	vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
 	vcpu_e500->mas1 = gtlbe->mas1;
 	vcpu_e500->mas2 = gtlbe->mas2;
-- 
cgit v1.2.3


From 046a48b35baa7c66d0d0331256ba12ca51665411 Mon Sep 17 00:00:00 2001
From: Liu Yu <yu.liu@freescale.com>
Date: Tue, 17 Mar 2009 16:57:46 +0800
Subject: KVM: ppc: e500: Fix the bug that KVM is unstable in SMP

TLB entry should enable memory coherence in SMP.

And like commit 631fba9dd3aca519355322cef035730609e91593,
remove guard attribute to enable the prefetch of guest memory.

Signed-off-by: Liu Yu <yu.liu@freescale.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kvm/e500_tlb.c | 4 ++++
 arch/powerpc/kvm/e500_tlb.h | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index ec933209e8a..0e773fc2d5e 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -99,7 +99,11 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
 
 static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
 {
+#ifdef CONFIG_SMP
+	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
+#else
 	return mas2 & MAS2_ATTRIB_MASK;
+#endif
 }
 
 /*
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
index 4d5cc0f7d79..45b064b7690 100644
--- a/arch/powerpc/kvm/e500_tlb.h
+++ b/arch/powerpc/kvm/e500_tlb.h
@@ -38,7 +38,7 @@
 #define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW)
 #define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW)
 #define MAS2_ATTRIB_MASK \
-	  (MAS2_X0 | MAS2_X1 | MAS2_W | MAS2_I | MAS2_M | MAS2_G | MAS2_E)
+	  (MAS2_X0 | MAS2_X1)
 #define MAS3_ATTRIB_MASK \
 	  (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \
 	   | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)
-- 
cgit v1.2.3


From 16175a796d061833aacfbd9672235f2d2725df65 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 23 Mar 2009 22:13:44 +0200
Subject: KVM: VMX: Don't allow uninhibited access to EFER on i386

vmx_set_msr() does not allow i386 guests to touch EFER, but they can still
do so through the default: label in the switch.  If they set EFER_LME, they
can oops the host.

Fix by having EFER access through the normal channel (which will check for
EFER_LME) even on i386.

Reported-and-tested-by: Benjamin Gilbert <bgilbert@cs.cmu.edu>
Cc: stable@kernel.org
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 48063a0aa24..bb481330716 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -936,11 +936,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 	int ret = 0;
 
 	switch (msr_index) {
-#ifdef CONFIG_X86_64
 	case MSR_EFER:
 		vmx_load_host_state(vmx);
 		ret = kvm_set_msr_common(vcpu, msr_index, data);
 		break;
+#ifdef CONFIG_X86_64
 	case MSR_FS_BASE:
 		vmcs_writel(GUEST_FS_BASE, data);
 		break;
-- 
cgit v1.2.3