aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-10-05 12:07:39 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2009-10-05 12:07:39 -0700
commit19d031e052bc213cdcbee70696d476136994b8c1 (patch)
tree906e78a07d15e0154692f82489387e3ad7f3f94b
parente8d809c61325a2f799dc753b0ac72ace6958b92c (diff)
parent3da0dd433dc399a8c0124d0614d82a09b6a49bce (diff)
Merge branch 'kvm-updates/2.6.32' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.32' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: add support for change_pte mmu notifiers KVM: MMU: add SPTE_HOST_WRITEABLE flag to the shadow ptes KVM: MMU: dont hold pagecount reference for mapped sptes pages KVM: Prevent overflow in KVM_GET_SUPPORTED_CPUID KVM: VMX: flush TLB with INVEPT on cpu migration KVM: fix LAPIC timer period overflow KVM: s390: fix memsize >= 4G KVM: SVM: Handle tsc in svm_get_msr/svm_set_msr correctly KVM: SVM: Fix tsc offset adjustment when running nested
-rw-r--r--arch/s390/kvm/kvm-s390.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/mmu.c84
-rw-r--r--arch/x86/kvm/paging_tmpl.h18
-rw-r--r--arch/x86/kvm/svm.c25
-rw-r--r--arch/x86/kvm/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--virt/kvm/kvm_main.c14
9 files changed, 120 insertions, 30 deletions
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index ec5eee7c25d..06cce8285ba 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -58,7 +58,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
-static inline int kvm_s390_vcpu_get_memsize(struct kvm_vcpu *vcpu)
+static inline long kvm_s390_vcpu_get_memsize(struct kvm_vcpu *vcpu)
{
return vcpu->arch.sie_block->gmslm
- vcpu->arch.sie_block->gmsor
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3be000435fa..d83892226f7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -796,6 +796,7 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 1ae5ceba7eb..7024224f0fc 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -664,7 +664,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
{
ktime_t now = apic->lapic_timer.timer.base->get_time();
- apic->lapic_timer.period = apic_get_reg(apic, APIC_TMICT) *
+ apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) *
APIC_BUS_CYCLE_NS * apic->divide_count;
atomic_set(&apic->lapic_timer.pending, 0);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index eca41ae9f45..685a4ffac8e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -156,6 +156,8 @@ module_param(oos_shadow, bool, 0644);
#define CREATE_TRACE_POINTS
#include "mmutrace.h"
+#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
+
#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
struct kvm_rmap_desc {
@@ -634,9 +636,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
if (*spte & shadow_accessed_mask)
kvm_set_pfn_accessed(pfn);
if (is_writeble_pte(*spte))
- kvm_release_pfn_dirty(pfn);
- else
- kvm_release_pfn_clean(pfn);
+ kvm_set_pfn_dirty(pfn);
rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level);
if (!*rmapp) {
printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
@@ -748,7 +748,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
return write_protected;
}
-static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
+static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data)
{
u64 *spte;
int need_tlb_flush = 0;
@@ -763,8 +763,45 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
return need_tlb_flush;
}
-static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
- int (*handler)(struct kvm *kvm, unsigned long *rmapp))
+static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data)
+{
+ int need_flush = 0;
+ u64 *spte, new_spte;
+ pte_t *ptep = (pte_t *)data;
+ pfn_t new_pfn;
+
+ WARN_ON(pte_huge(*ptep));
+ new_pfn = pte_pfn(*ptep);
+ spte = rmap_next(kvm, rmapp, NULL);
+ while (spte) {
+ BUG_ON(!is_shadow_present_pte(*spte));
+ rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte);
+ need_flush = 1;
+ if (pte_write(*ptep)) {
+ rmap_remove(kvm, spte);
+ __set_spte(spte, shadow_trap_nonpresent_pte);
+ spte = rmap_next(kvm, rmapp, NULL);
+ } else {
+ new_spte = *spte &~ (PT64_BASE_ADDR_MASK);
+ new_spte |= (u64)new_pfn << PAGE_SHIFT;
+
+ new_spte &= ~PT_WRITABLE_MASK;
+ new_spte &= ~SPTE_HOST_WRITEABLE;
+ if (is_writeble_pte(*spte))
+ kvm_set_pfn_dirty(spte_to_pfn(*spte));
+ __set_spte(spte, new_spte);
+ spte = rmap_next(kvm, rmapp, spte);
+ }
+ }
+ if (need_flush)
+ kvm_flush_remote_tlbs(kvm);
+
+ return 0;
+}
+
+static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, u64 data,
+ int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+ u64 data))
{
int i, j;
int retval = 0;
@@ -786,13 +823,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
if (hva >= start && hva < end) {
gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
- retval |= handler(kvm, &memslot->rmap[gfn_offset]);
+ retval |= handler(kvm, &memslot->rmap[gfn_offset],
+ data);
for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
int idx = gfn_offset;
idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j);
retval |= handler(kvm,
- &memslot->lpage_info[j][idx].rmap_pde);
+ &memslot->lpage_info[j][idx].rmap_pde,
+ data);
}
}
}
@@ -802,10 +841,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
{
- return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
+ return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
}
-static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+ kvm_handle_hva(kvm, hva, (u64)&pte, kvm_set_pte_rmapp);
+}
+
+static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data)
{
u64 *spte;
int young = 0;
@@ -841,13 +885,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
gfn = unalias_gfn(vcpu->kvm, gfn);
rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
- kvm_unmap_rmapp(vcpu->kvm, rmapp);
+ kvm_unmap_rmapp(vcpu->kvm, rmapp, 0);
kvm_flush_remote_tlbs(vcpu->kvm);
}
int kvm_age_hva(struct kvm *kvm, unsigned long hva)
{
- return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
+ return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp);
}
#ifdef MMU_DEBUG
@@ -1756,7 +1800,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
unsigned pte_access, int user_fault,
int write_fault, int dirty, int level,
gfn_t gfn, pfn_t pfn, bool speculative,
- bool can_unsync)
+ bool can_unsync, bool reset_host_protection)
{
u64 spte;
int ret = 0;
@@ -1783,6 +1827,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
kvm_is_mmio_pfn(pfn));
+ if (reset_host_protection)
+ spte |= SPTE_HOST_WRITEABLE;
+
spte |= (u64)pfn << PAGE_SHIFT;
if ((pte_access & ACC_WRITE_MASK)
@@ -1828,7 +1875,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
unsigned pt_access, unsigned pte_access,
int user_fault, int write_fault, int dirty,
int *ptwrite, int level, gfn_t gfn,
- pfn_t pfn, bool speculative)
+ pfn_t pfn, bool speculative,
+ bool reset_host_protection)
{
int was_rmapped = 0;
int was_writeble = is_writeble_pte(*sptep);
@@ -1860,7 +1908,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
}
if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault,
- dirty, level, gfn, pfn, speculative, true)) {
+ dirty, level, gfn, pfn, speculative, true,
+ reset_host_protection)) {
if (write_fault)
*ptwrite = 1;
kvm_x86_ops->tlb_flush(vcpu);
@@ -1877,8 +1926,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
page_header_update_slot(vcpu->kvm, sptep, gfn);
if (!was_rmapped) {
rmap_count = rmap_add(vcpu, sptep, gfn);
- if (!is_rmap_spte(*sptep))
- kvm_release_pfn_clean(pfn);
+ kvm_release_pfn_clean(pfn);
if (rmap_count > RMAP_RECYCLE_THRESHOLD)
rmap_recycle(vcpu, sptep, gfn);
} else {
@@ -1909,7 +1957,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
if (iterator.level == level) {
mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
0, write, 1, &pt_write,
- level, gfn, pfn, false);
+ level, gfn, pfn, false, true);
++vcpu->stat.pf_fixed;
break;
}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index d2fec9c12d2..72558f8ff3f 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -273,9 +273,13 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
return;
kvm_get_pfn(pfn);
+ /*
+ * we call mmu_set_spte() with reset_host_protection = true beacuse that
+ * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
+ */
mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL,
- gpte_to_gfn(gpte), pfn, true);
+ gpte_to_gfn(gpte), pfn, true, true);
}
/*
@@ -308,7 +312,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
user_fault, write_fault,
gw->ptes[gw->level-1] & PT_DIRTY_MASK,
ptwrite, level,
- gw->gfn, pfn, false);
+ gw->gfn, pfn, false, true);
break;
}
@@ -558,6 +562,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
int i, offset, nr_present;
+ bool reset_host_protection;
offset = nr_present = 0;
@@ -595,9 +600,16 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
nr_present++;
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
+ if (!(sp->spt[i] & SPTE_HOST_WRITEABLE)) {
+ pte_access &= ~ACC_WRITE_MASK;
+ reset_host_protection = 0;
+ } else {
+ reset_host_protection = 1;
+ }
set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn,
- spte_to_pfn(sp->spt[i]), true, false);
+ spte_to_pfn(sp->spt[i]), true, false,
+ reset_host_protection);
}
return !nr_present;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 944cc9c04b3..c17404add91 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -767,6 +767,8 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
rdtscll(tsc_this);
delta = vcpu->arch.host_tsc - tsc_this;
svm->vmcb->control.tsc_offset += delta;
+ if (is_nested(svm))
+ svm->nested.hsave->control.tsc_offset += delta;
vcpu->cpu = cpu;
kvm_migrate_timers(vcpu);
svm->asid_generation = 0;
@@ -2057,10 +2059,14 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
switch (ecx) {
case MSR_IA32_TSC: {
- u64 tsc;
+ u64 tsc_offset;
+
+ if (is_nested(svm))
+ tsc_offset = svm->nested.hsave->control.tsc_offset;
+ else
+ tsc_offset = svm->vmcb->control.tsc_offset;
- rdtscll(tsc);
- *data = svm->vmcb->control.tsc_offset + tsc;
+ *data = tsc_offset + native_read_tsc();
break;
}
case MSR_K6_STAR:
@@ -2146,10 +2152,17 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
switch (ecx) {
case MSR_IA32_TSC: {
- u64 tsc;
+ u64 tsc_offset = data - native_read_tsc();
+ u64 g_tsc_offset = 0;
+
+ if (is_nested(svm)) {
+ g_tsc_offset = svm->vmcb->control.tsc_offset -
+ svm->nested.hsave->control.tsc_offset;
+ svm->nested.hsave->control.tsc_offset = tsc_offset;
+ }
+
+ svm->vmcb->control.tsc_offset = tsc_offset + g_tsc_offset;
- rdtscll(tsc);
- svm->vmcb->control.tsc_offset = data - tsc;
break;
}
case MSR_K6_STAR:
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f3812014bd0..ed53b42caba 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -709,7 +709,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (vcpu->cpu != cpu) {
vcpu_clear(vmx);
kvm_migrate_timers(vcpu);
- vpid_sync_vcpu_all(vmx);
+ set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
local_irq_disable();
list_add(&vmx->local_vcpus_link,
&per_cpu(vcpus_on_cpu, cpu));
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index be451ee4424..9b9695322f5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1591,6 +1591,8 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
if (cpuid->nent < 1)
goto out;
+ if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
+ cpuid->nent = KVM_MAX_CPUID_ENTRIES;
r = -ENOMEM;
cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
if (!cpuid_entries)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e79c54034bc..b7c78a403dc 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -850,6 +850,19 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
}
+static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long address,
+ pte_t pte)
+{
+ struct kvm *kvm = mmu_notifier_to_kvm(mn);
+
+ spin_lock(&kvm->mmu_lock);
+ kvm->mmu_notifier_seq++;
+ kvm_set_spte_hva(kvm, address, pte);
+ spin_unlock(&kvm->mmu_lock);
+}
+
static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
@@ -929,6 +942,7 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
.invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
.invalidate_range_end = kvm_mmu_notifier_invalidate_range_end,
.clear_flush_young = kvm_mmu_notifier_clear_flush_young,
+ .change_pte = kvm_mmu_notifier_change_pte,
.release = kvm_mmu_notifier_release,
};
#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */