aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig8
-rw-r--r--arch/x86/Kconfig.cpu2
-rw-r--r--arch/x86/boot/vesa.h9
-rw-r--r--arch/x86/boot/video-vesa.c2
-rw-r--r--arch/x86/ia32/ia32_signal.c4
-rw-r--r--arch/x86/kernel/aperture_64.c3
-rw-r--r--arch/x86/kernel/cpu/cpufreq/e_powersaver.c8
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-smi.c39
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c42
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c3
-rw-r--r--arch/x86/kernel/e820_32.c26
-rw-r--r--arch/x86/kernel/e820_64.c27
-rw-r--r--arch/x86/kernel/head_32.S2
-rw-r--r--arch/x86/kernel/i387.c10
-rw-r--r--arch/x86/kernel/io_delay.c8
-rw-r--r--arch/x86/kernel/mfgpt_32.c1
-rw-r--r--arch/x86/kernel/pci-dma_64.c5
-rw-r--r--arch/x86/kernel/ptrace.c19
-rw-r--r--arch/x86/kernel/quirks.c2
-rw-r--r--arch/x86/kernel/reboot.c24
-rw-r--r--arch/x86/kernel/setup64.c14
-rw-r--r--arch/x86/kernel/setup_32.c9
-rw-r--r--arch/x86/kernel/setup_64.c2
-rw-r--r--arch/x86/kernel/signal_32.c4
-rw-r--r--arch/x86/kernel/signal_64.c40
-rw-r--r--arch/x86/kernel/step.c4
-rw-r--r--arch/x86/kvm/lapic.c4
-rw-r--r--arch/x86/kvm/mmu.c56
-rw-r--r--arch/x86/kvm/paging_tmpl.h20
-rw-r--r--arch/x86/kvm/svm.c26
-rw-r--r--arch/x86/kvm/vmx.c21
-rw-r--r--arch/x86/kvm/x86.c114
-rw-r--r--arch/x86/lguest/boot.c55
-rw-r--r--arch/x86/mach-visws/traps.c5
-rw-r--r--arch/x86/mm/discontig_32.c1
-rw-r--r--arch/x86/mm/ioremap.c14
-rw-r--r--arch/x86/mm/numa_64.c11
-rw-r--r--arch/x86/mm/pageattr.c23
-rw-r--r--arch/x86/mm/pgtable_32.c18
-rw-r--r--arch/x86/pci/pcbios.c10
-rw-r--r--arch/x86/xen/setup.c3
41 files changed, 457 insertions, 241 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4a88cf7695b..6c70fed0f9a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -21,7 +21,8 @@ config X86
select HAVE_IDE
select HAVE_OPROFILE
select HAVE_KPROBES
- select HAVE_KVM
+ select HAVE_KRETPROBES
+ select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
config GENERIC_LOCKBREAK
@@ -65,9 +66,6 @@ config MMU
config ZONE_DMA
def_bool y
-config QUICKLIST
- def_bool X86_32
-
config SBUS
bool
@@ -1261,7 +1259,7 @@ menuconfig APM
machines with more than one CPU.
In order to use APM, you will need supporting software. For location
- and more information, read <file:Documentation/pm.txt> and the
+ and more information, read <file:Documentation/power/pm.txt> and the
Battery Powered Linux mini-HOWTO, available from
<http://www.tldp.org/docs.html#howto>.
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 6d50064db18..9304bfba7d4 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -388,7 +388,7 @@ config X86_OOSTORE
#
config X86_P6_NOP
def_bool y
- depends on (X86_64 || !X86_GENERIC) && (M686 || MPENTIUMII || MPENTIUMIII || MPENTIUMM || MCORE2 || PENTIUM4)
+ depends on (X86_64 || !X86_GENERIC) && (M686 || MPENTIUMII || MPENTIUMIII || MPENTIUMM || MCORE2 || MPENTIUM4)
config X86_TSC
def_bool y
diff --git a/arch/x86/boot/vesa.h b/arch/x86/boot/vesa.h
index ff5b73cd406..468e444622c 100644
--- a/arch/x86/boot/vesa.h
+++ b/arch/x86/boot/vesa.h
@@ -26,17 +26,10 @@ struct vesa_general_info {
far_ptr video_mode_ptr; /* 14 */
u16 total_memory; /* 18 */
- u16 oem_software_rev; /* 20 */
- far_ptr oem_vendor_name_ptr; /* 22 */
- far_ptr oem_product_name_ptr; /* 26 */
- far_ptr oem_product_rev_ptr; /* 30 */
-
- u8 reserved[222]; /* 34 */
- u8 oem_data[256]; /* 256 */
+ u8 reserved[236]; /* 20 */
} __attribute__ ((packed));
#define VESA_MAGIC ('V' + ('E' << 8) + ('S' << 16) + ('A' << 24))
-#define VBE2_MAGIC ('V' + ('B' << 8) + ('E' << 16) + ('2' << 24))
struct vesa_mode_info {
u16 mode_attr; /* 0 */
diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c
index 662dd2f1306..419b5c27337 100644
--- a/arch/x86/boot/video-vesa.c
+++ b/arch/x86/boot/video-vesa.c
@@ -37,8 +37,6 @@ static int vesa_probe(void)
video_vesa.modes = GET_HEAP(struct mode_info, 0);
- vginfo.signature = VBE2_MAGIC;
-
ax = 0x4f00;
di = (size_t)&vginfo;
asm(INT10
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 1c0503bdfb1..5e7771a3ba2 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -500,7 +500,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
regs->ss = __USER32_DS;
set_fs(USER_DS);
- regs->flags &= ~X86_EFLAGS_TF;
+ regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
if (test_thread_flag(TIF_SINGLESTEP))
ptrace_notify(SIGTRAP);
@@ -600,7 +600,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->ss = __USER32_DS;
set_fs(USER_DS);
- regs->flags &= ~X86_EFLAGS_TF;
+ regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
if (test_thread_flag(TIF_SINGLESTEP))
ptrace_notify(SIGTRAP);
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 608152a2a05..00df126169b 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -18,6 +18,7 @@
#include <linux/pci.h>
#include <linux/bitops.h>
#include <linux/ioport.h>
+#include <linux/suspend.h>
#include <asm/e820.h>
#include <asm/io.h>
#include <asm/gart.h>
@@ -76,6 +77,8 @@ static u32 __init allocate_aperture(void)
printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n",
aper_size >> 10, __pa(p));
insert_aperture_resource((u32)__pa(p), aper_size);
+ register_nosave_region((u32)__pa(p) >> PAGE_SHIFT,
+ (u32)__pa(p+aper_size) >> PAGE_SHIFT);
return (u32)__pa(p);
}
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
index 39f8cb18296..c2f930d8664 100644
--- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
+++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
@@ -55,7 +55,6 @@ static int eps_set_state(struct eps_cpu_data *centaur,
{
struct cpufreq_freqs freqs;
u32 lo, hi;
- u8 current_multiplier, current_voltage;
int err = 0;
int i;
@@ -95,6 +94,10 @@ postchange:
rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
freqs.new = centaur->fsb * ((lo >> 8) & 0xff);
+#ifdef DEBUG
+ {
+ u8 current_multiplier, current_voltage;
+
/* Print voltage and multiplier */
rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
current_voltage = lo & 0xff;
@@ -103,7 +106,8 @@ postchange:
current_multiplier = (lo >> 8) & 0xff;
printk(KERN_INFO "eps: Current multiplier = %d\n",
current_multiplier);
-
+ }
+#endif
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
return err;
}
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
index f2b5a621d27..8a85c93bd62 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
@@ -63,7 +63,7 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
*/
static int speedstep_smi_ownership (void)
{
- u32 command, result, magic;
+ u32 command, result, magic, dummy;
u32 function = GET_SPEEDSTEP_OWNER;
unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation";
@@ -73,8 +73,11 @@ static int speedstep_smi_ownership (void)
dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port);
__asm__ __volatile__(
+ "push %%ebp\n"
"out %%al, (%%dx)\n"
- : "=D" (result)
+ "pop %%ebp\n"
+ : "=D" (result), "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy),
+ "=S" (dummy)
: "a" (command), "b" (function), "c" (0), "d" (smi_port),
"D" (0), "S" (magic)
: "memory"
@@ -96,7 +99,7 @@ static int speedstep_smi_ownership (void)
*/
static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
{
- u32 command, result = 0, edi, high_mhz, low_mhz;
+ u32 command, result = 0, edi, high_mhz, low_mhz, dummy;
u32 state=0;
u32 function = GET_SPEEDSTEP_FREQS;
@@ -109,10 +112,12 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port);
- __asm__ __volatile__("movl $0, %%edi\n"
+ __asm__ __volatile__(
+ "push %%ebp\n"
"out %%al, (%%dx)\n"
- : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi)
- : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0)
+ "pop %%ebp"
+ : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi), "=S" (dummy)
+ : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0)
);
dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz);
@@ -135,16 +140,18 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
static int speedstep_get_state (void)
{
u32 function=GET_SPEEDSTEP_STATE;
- u32 result, state, edi, command;
+ u32 result, state, edi, command, dummy;
command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port);
- __asm__ __volatile__("movl $0, %%edi\n"
+ __asm__ __volatile__(
+ "push %%ebp\n"
"out %%al, (%%dx)\n"
- : "=a" (result), "=b" (state), "=D" (edi)
- : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0)
+ "pop %%ebp\n"
+ : "=a" (result), "=b" (state), "=D" (edi), "=c" (dummy), "=d" (dummy), "=S" (dummy)
+ : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0), "D" (0)
);
dprintk("state is %x, result is %x\n", state, result);
@@ -160,7 +167,7 @@ static int speedstep_get_state (void)
*/
static void speedstep_set_state (unsigned int state)
{
- unsigned int result = 0, command, new_state;
+ unsigned int result = 0, command, new_state, dummy;
unsigned long flags;
unsigned int function=SET_SPEEDSTEP_STATE;
unsigned int retry = 0;
@@ -182,10 +189,12 @@ static void speedstep_set_state (unsigned int state)
}
retry++;
__asm__ __volatile__(
- "movl $0, %%edi\n"
+ "push %%ebp\n"
"out %%al, (%%dx)\n"
- : "=b" (new_state), "=D" (result)
- : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0)
+ "pop %%ebp"
+ : "=b" (new_state), "=D" (result), "=c" (dummy), "=a" (dummy),
+ "=d" (dummy), "=S" (dummy)
+ : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0)
);
} while ((new_state != state) && (retry <= SMI_TRIES));
@@ -195,7 +204,7 @@ static void speedstep_set_state (unsigned int state)
if (new_state == state) {
dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result);
} else {
- printk(KERN_ERR "cpufreq: change failed with new_state %u and result %u\n", new_state, result);
+ printk(KERN_ERR "cpufreq: change to state %u failed with new_state %u and result %u\n", state, new_state, result);
}
return;
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 103d61a59b1..3e18db4cefe 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -176,12 +176,13 @@ static inline void k8_enable_fixed_iorrs(void)
}
/**
- * Checks and updates an fixed-range MTRR if it differs from the value it
- * should have. If K8 extentions are wanted, update the K8 SYSCFG MSR also.
- * see AMD publication no. 24593, chapter 7.8.1, page 233 for more information
- * \param msr MSR address of the MTTR which should be checked and updated
- * \param changed pointer which indicates whether the MTRR needed to be changed
- * \param msrwords pointer to the MSR values which the MSR should have
+ * set_fixed_range - checks & updates a fixed-range MTRR if it differs from the value it should have
+ * @msr: MSR address of the MTTR which should be checked and updated
+ * @changed: pointer which indicates whether the MTRR needed to be changed
+ * @msrwords: pointer to the MSR values which the MSR should have
+ *
+ * If K8 extentions are wanted, update the K8 SYSCFG MSR also.
+ * See AMD publication no. 24593, chapter 7.8.1, page 233 for more information.
*/
static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
{
@@ -199,12 +200,15 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
}
}
+/**
+ * generic_get_free_region - Get a free MTRR.
+ * @base: The starting (base) address of the region.
+ * @size: The size (in bytes) of the region.
+ * @replace_reg: mtrr index to be replaced; set to invalid value if none.
+ *
+ * Returns: The index of the region on success, else negative on error.
+ */
int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
-/* [SUMMARY] Get a free MTRR.
- <base> The starting (base) address of the region.
- <size> The size (in bytes) of the region.
- [RETURNS] The index of the region on success, else -1 on error.
-*/
{
int i, max;
mtrr_type ltype;
@@ -249,8 +253,8 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
}
/**
- * Checks and updates the fixed-range MTRRs if they differ from the saved set
- * \param frs pointer to fixed-range MTRR values, saved by get_fixed_ranges()
+ * set_fixed_ranges - checks & updates the fixed-range MTRRs if they differ from the saved set
+ * @frs: pointer to fixed-range MTRR values, saved by get_fixed_ranges()
*/
static int set_fixed_ranges(mtrr_type * frs)
{
@@ -294,13 +298,13 @@ static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
static u32 deftype_lo, deftype_hi;
+/**
+ * set_mtrr_state - Set the MTRR state for this CPU.
+ *
+ * NOTE: The CPU must already be in a safe state for MTRR changes.
+ * RETURNS: 0 if no changes made, else a mask indicating what was changed.
+ */
static unsigned long set_mtrr_state(void)
-/* [SUMMARY] Set the MTRR state for this CPU.
- <state> The MTRR state information to read.
- <ctxt> Some relevant CPU context.
- [NOTE] The CPU must already be in a safe state for MTRR changes.
- [RETURNS] 0 if no changes made, else a mask indication what was changed.
-*/
{
unsigned int i;
unsigned long change_mask = 0;
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index be83336fddb..a6450b3ae75 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -711,7 +711,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
trim_size = end_pfn;
trim_size <<= PAGE_SHIFT;
trim_size -= trim_start;
- add_memory_region(trim_start, trim_size, E820_RESERVED);
+ update_memory_range(trim_start, trim_size, E820_RAM,
+ E820_RESERVED);
update_e820();
return 1;
}
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index 4e16ef4a265..80444c5c9b1 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -749,6 +749,32 @@ static int __init parse_memmap(char *arg)
return 0;
}
early_param("memmap", parse_memmap);
+void __init update_memory_range(u64 start, u64 size, unsigned old_type,
+ unsigned new_type)
+{
+ int i;
+
+ BUG_ON(old_type == new_type);
+
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+ u64 final_start, final_end;
+ if (ei->type != old_type)
+ continue;
+ /* totally covered? */
+ if (ei->addr >= start && ei->size <= size) {
+ ei->type = new_type;
+ continue;
+ }
+ /* partially covered */
+ final_start = max(start, ei->addr);
+ final_end = min(start + size, ei->addr + ei->size);
+ if (final_start >= final_end)
+ continue;
+ add_memory_region(final_start, final_end - final_start,
+ new_type);
+ }
+}
void __init update_e820(void)
{
u8 nr_map;
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 9f65b4cc323..9be69712601 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -744,6 +744,33 @@ void __init finish_e820_parsing(void)
}
}
+void __init update_memory_range(u64 start, u64 size, unsigned old_type,
+ unsigned new_type)
+{
+ int i;
+
+ BUG_ON(old_type == new_type);
+
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+ u64 final_start, final_end;
+ if (ei->type != old_type)
+ continue;
+ /* totally covered? */
+ if (ei->addr >= start && ei->size <= size) {
+ ei->type = new_type;
+ continue;
+ }
+ /* partially covered */
+ final_start = max(start, ei->addr);
+ final_end = min(start + size, ei->addr + ei->size);
+ if (final_start >= final_end)
+ continue;
+ add_memory_region(final_start, final_end - final_start,
+ new_type);
+ }
+}
+
void __init update_e820(void)
{
u8 nr_map;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index fd8ca53943a..74d87ea85b5 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -657,7 +657,7 @@ int_msg:
.asciz "Unknown interrupt or fault at EIP %p %p %p\n"
fault_msg:
- .ascii \
+ .asciz \
/* fault info: */ "BUG: Int %d: CR2 %p\n" \
/* pusha regs: */ " EDI %p ESI %p EBP %p ESP %p\n" \
" EBX %p EDX %p ECX %p EAX %p\n" \
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 763dfc40723..d2e39e69aaf 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -132,7 +132,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
if (!cpu_has_fxsr)
return -ENODEV;
- unlazy_fpu(target);
+ init_fpu(target);
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&target->thread.i387.fxsave, 0, -1);
@@ -147,7 +147,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
if (!cpu_has_fxsr)
return -ENODEV;
- unlazy_fpu(target);
+ init_fpu(target);
set_stopped_child_used_math(target);
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
@@ -261,7 +261,7 @@ static void convert_from_fxsr(struct user_i387_ia32_struct *env,
}
#else
env->fip = fxsave->fip;
- env->fcs = fxsave->fcs;
+ env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16);
env->foo = fxsave->foo;
env->fos = fxsave->fos;
#endif
@@ -307,7 +307,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
if (!HAVE_HWFP)
return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
- unlazy_fpu(target);
+ init_fpu(target);
if (!cpu_has_fxsr)
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
@@ -332,7 +332,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
if (!HAVE_HWFP)
return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
- unlazy_fpu(target);
+ init_fpu(target);
set_stopped_child_used_math(target);
if (!cpu_has_fxsr)
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
index c706a306155..5921e5f0a64 100644
--- a/arch/x86/kernel/io_delay.c
+++ b/arch/x86/kernel/io_delay.c
@@ -78,6 +78,14 @@ static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = {
},
{
.callback = dmi_io_delay_0xed_port,
+ .ident = "HP Pavilion dv6000",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30B8")
+ }
+ },
+ {
+ .callback = dmi_io_delay_0xed_port,
.ident = "HP Pavilion tx1000",
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 027fc067b39..b402c0f3f19 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -30,6 +30,7 @@
#include <linux/kernel.h>
#include <linux/interrupt.h>
+#include <linux/module.h>
#include <asm/geode.h>
static struct mfgpt_timer_t {
diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c
index a82473d192a..375cb2bc45b 100644
--- a/arch/x86/kernel/pci-dma_64.c
+++ b/arch/x86/kernel/pci-dma_64.c
@@ -53,11 +53,6 @@ dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
int node;
node = dev_to_node(dev);
- if (node == -1)
- node = numa_node_id();
-
- if (node < first_node(node_online_map))
- node = first_node(node_online_map);
page = alloc_pages_node(node, gfp, order);
return page ? page_address(page) : NULL;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index f41fdc98efb..d5904eef1d3 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -323,6 +323,16 @@ static int putreg(struct task_struct *child,
return set_flags(child, value);
#ifdef CONFIG_X86_64
+ /*
+ * Orig_ax is really just a flag with small positive and
+ * negative values, so make sure to always sign-extend it
+ * from 32 bits so that it works correctly regardless of
+ * whether we come from a 32-bit environment or not.
+ */
+ case offsetof(struct user_regs_struct, orig_ax):
+ value = (long) (s32) value;
+ break;
+
case offsetof(struct user_regs_struct,fs_base):
if (value >= TASK_SIZE_OF(child))
return -EIO;
@@ -1045,10 +1055,17 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
R32(esi, si);
R32(ebp, bp);
R32(eax, ax);
- R32(orig_eax, orig_ax);
R32(eip, ip);
R32(esp, sp);
+ case offsetof(struct user32, regs.orig_eax):
+ /*
+ * Sign-extend the value so that orig_eax = -1
+ * causes (long)orig_ax < 0 tests to fire correctly.
+ */
+ regs->orig_ax = (long) (s32) value;
+ break;
+
case offsetof(struct user32, regs.eflags):
return set_flags(child, value);
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index c47208fc593..d89a648fe71 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -363,6 +363,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0051,
nvidia_force_enable_hpet);
/* LPC bridges */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0260,
+ nvidia_force_enable_hpet);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0360,
nvidia_force_enable_hpet);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0361,
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 7fd6ac43e4a..484c4a80d38 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -152,6 +152,24 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_BOARD_NAME, "0WF810"),
},
},
+ { /* Handle problems with rebooting on Dell Optiplex 745's DFF*/
+ .callback = set_bios_reboot,
+ .ident = "Dell OptiPlex 745",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
+ DMI_MATCH(DMI_BOARD_NAME, "0MM599"),
+ },
+ },
+ { /* Handle problems with rebooting on Dell Optiplex 745 with 0KW626 */
+ .callback = set_bios_reboot,
+ .ident = "Dell OptiPlex 745",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
+ DMI_MATCH(DMI_BOARD_NAME, "0KW626"),
+ },
+ },
{ /* Handle problems with rebooting on Dell 2400's */
.callback = set_bios_reboot,
.ident = "Dell PowerEdge 2400",
@@ -326,6 +344,10 @@ static inline void kb_wait(void)
}
}
+void __attribute__((weak)) mach_reboot_fixups(void)
+{
+}
+
static void native_machine_emergency_restart(void)
{
int i;
@@ -337,6 +359,8 @@ static void native_machine_emergency_restart(void)
/* Could also try the reset bit in the Hammer NB */
switch (reboot_type) {
case BOOT_KBD:
+ mach_reboot_fixups(); /* for board specific fixups */
+
for (i = 0; i < 10; i++) {
kb_wait();
udelay(50);
diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
index 309366f8f60..e24c4567709 100644
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -142,14 +142,16 @@ void __init setup_per_cpu_areas(void)
printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size);
for_each_cpu_mask (i, cpu_possible_map) {
char *ptr;
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+ ptr = alloc_bootmem_pages(size);
+#else
+ int node = early_cpu_to_node(i);
- if (!NODE_DATA(early_cpu_to_node(i))) {
- printk("cpu with no node %d, num_online_nodes %d\n",
- i, num_online_nodes());
+ if (!node_online(node) || !NODE_DATA(node))
ptr = alloc_bootmem_pages(size);
- } else {
- ptr = alloc_bootmem_pages_node(NODE_DATA(early_cpu_to_node(i)), size);
- }
+ else
+ ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
+#endif
if (!ptr)
panic("Cannot allocate cpu data for CPU %d\n", i);
cpu_pda(i)->data_offset = ptr - __per_cpu_start;
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index a1d7071a51c..2b3e5d45176 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -406,8 +406,6 @@ static unsigned long __init setup_memory(void)
*/
min_low_pfn = PFN_UP(init_pg_tables_end);
- find_max_pfn();
-
max_low_pfn = find_max_low_pfn();
#ifdef CONFIG_HIGHMEM
@@ -764,12 +762,13 @@ void __init setup_arch(char **cmdline_p)
if (efi_enabled)
efi_init();
- max_low_pfn = setup_memory();
-
/* update e820 for memory not covered by WB MTRRs */
+ find_max_pfn();
mtrr_bp_init();
if (mtrr_trim_uncached_memory(max_pfn))
- max_low_pfn = setup_memory();
+ find_max_pfn();
+
+ max_low_pfn = setup_memory();
#ifdef CONFIG_VMI
/*
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 7637dc91c79..f4f7ecfb898 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -801,7 +801,7 @@ static void __cpuinit srat_detect_node(void)
/* Don't do the funky fallback heuristics the AMD version employs
for now. */
node = apicid_to_node[apicid];
- if (node == NUMA_NO_NODE)
+ if (node == NUMA_NO_NODE || !node_online(node))
node = first_node(node_online_map);
numa_set_node(cpu, node);
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index caee1f002fe..0157a6f0f41 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -407,7 +407,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
* The tracer may want to single-step inside the
* handler too.
*/
- regs->flags &= ~TF_MASK;
+ regs->flags &= ~(TF_MASK | X86_EFLAGS_DF);
if (test_thread_flag(TIF_SINGLESTEP))
ptrace_notify(SIGTRAP);
@@ -500,7 +500,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
* The tracer may want to single-step inside the
* handler too.
*/
- regs->flags &= ~TF_MASK;
+ regs->flags &= ~(TF_MASK | X86_EFLAGS_DF);
if (test_thread_flag(TIF_SINGLESTEP))
ptrace_notify(SIGTRAP);
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 7347bb14e30..1c83e5124c6 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -295,7 +295,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
see include/asm-x86_64/uaccess.h for details. */
set_fs(USER_DS);
- regs->flags &= ~X86_EFLAGS_TF;
+ regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
if (test_thread_flag(TIF_SINGLESTEP))
ptrace_notify(SIGTRAP);
#ifdef DEBUG_SIG
@@ -311,6 +311,35 @@ give_sigsegv:
}
/*
+ * Return -1L or the syscall number that @regs is executing.
+ */
+static long current_syscall(struct pt_regs *regs)
+{
+ /*
+ * We always sign-extend a -1 value being set here,
+ * so this is always either -1L or a syscall number.
+ */
+ return regs->orig_ax;
+}
+
+/*
+ * Return a value that is -EFOO if the system call in @regs->orig_ax
+ * returned an error. This only works for @regs from @current.
+ */
+static long current_syscall_ret(struct pt_regs *regs)
+{
+#ifdef CONFIG_IA32_EMULATION
+ if (test_thread_flag(TIF_IA32))
+ /*
+ * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
+ * and will match correctly in comparisons.
+ */
+ return (int) regs->ax;
+#endif
+ return regs->ax;
+}
+
+/*
* OK, we're invoking a handler
*/
@@ -327,9 +356,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
#endif
/* Are we from a system call? */
- if ((long)regs->orig_ax >= 0) {
+ if (current_syscall(regs) >= 0) {
/* If so, check system call restarting.. */
- switch (regs->ax) {
+ switch (current_syscall_ret(regs)) {
case -ERESTART_RESTARTBLOCK:
case -ERESTARTNOHAND:
regs->ax = -EINTR;
@@ -426,10 +455,9 @@ static void do_signal(struct pt_regs *regs)
}
/* Did we come from a system call? */
- if ((long)regs->orig_ax >= 0) {
+ if (current_syscall(regs) >= 0) {
/* Restart the system call - no handlers present */
- long res = regs->ax;
- switch (res) {
+ switch (current_syscall_ret(regs)) {
case -ERESTARTNOHAND:
case -ERESTARTSYS:
case -ERESTARTNOINTR:
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 2ef1a5f8d67..9d406cdc847 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -166,7 +166,7 @@ static void enable_step(struct task_struct *child, bool block)
child->thread.debugctlmsr | DEBUGCTLMSR_BTF);
} else {
write_debugctlmsr(child,
- child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR);
+ child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF);
if (!child->thread.debugctlmsr)
clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
@@ -189,7 +189,7 @@ void user_disable_single_step(struct task_struct *child)
* Make sure block stepping (BTF) is disabled.
*/
write_debugctlmsr(child,
- child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR);
+ child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF);
if (!child->thread.debugctlmsr)
clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2cbee9479ce..68a6b151193 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -647,6 +647,10 @@ static void start_apic_timer(struct kvm_lapic *apic)
apic->timer.period = apic_get_reg(apic, APIC_TMICT) *
APIC_BUS_CYCLE_NS * apic->timer.divide_count;
atomic_set(&apic->timer.pending, 0);
+
+ if (!apic->timer.period)
+ return;
+
hrtimer_start(&apic->timer.dev,
ktime_add_ns(now, apic->timer.period),
HRTIMER_MODE_ABS);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8efdcdbebb0..e55af12e11b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -222,8 +222,7 @@ static int is_io_pte(unsigned long pte)
static int is_rmap_pte(u64 pte)
{
- return pte != shadow_trap_nonpresent_pte
- && pte != shadow_notrap_nonpresent_pte;
+ return is_shadow_present_pte(pte);
}
static gfn_t pse36_gfn_delta(u32 gpte)
@@ -681,8 +680,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
unsigned level,
int metaphysical,
unsigned access,
- u64 *parent_pte,
- bool *new_page)
+ u64 *parent_pte)
{
union kvm_mmu_page_role role;
unsigned index;
@@ -722,8 +720,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
vcpu->arch.mmu.prefetch_page(vcpu, sp);
if (!metaphysical)
rmap_write_protect(vcpu->kvm, gfn);
- if (new_page)
- *new_page = 1;
return sp;
}
@@ -876,11 +872,18 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
{
+ struct page *page;
+
gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
if (gpa == UNMAPPED_GVA)
return NULL;
- return gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+
+ down_read(&current->mm->mmap_sem);
+ page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+ up_read(&current->mm->mmap_sem);
+
+ return page;
}
static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
@@ -889,14 +892,25 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
int *ptwrite, gfn_t gfn, struct page *page)
{
u64 spte;
- int was_rmapped = is_rmap_pte(*shadow_pte);
+ int was_rmapped = 0;
int was_writeble = is_writeble_pte(*shadow_pte);
+ hfn_t host_pfn = (*shadow_pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
pgprintk("%s: spte %llx access %x write_fault %d"
" user_fault %d gfn %lx\n",
__FUNCTION__, *shadow_pte, pt_access,
write_fault, user_fault, gfn);
+ if (is_rmap_pte(*shadow_pte)) {
+ if (host_pfn != page_to_pfn(page)) {
+ pgprintk("hfn old %lx new %lx\n",
+ host_pfn, page_to_pfn(page));
+ rmap_remove(vcpu->kvm, shadow_pte);
+ }
+ else
+ was_rmapped = 1;
+ }
+
/*
* We don't set the accessed bit, since we sometimes want to see
* whether the guest actually used the pte (in order to detect
@@ -999,8 +1013,7 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write,
>> PAGE_SHIFT;
new_table = kvm_mmu_get_page(vcpu, pseudo_gfn,
v, level - 1,
- 1, ACC_ALL, &table[index],
- NULL);
+ 1, ACC_ALL, &table[index]);
if (!new_table) {
pgprintk("nonpaging_map: ENOMEM\n");
kvm_release_page_clean(page);
@@ -1020,15 +1033,18 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
struct page *page;
+ down_read(&vcpu->kvm->slots_lock);
+
down_read(&current->mm->mmap_sem);
page = gfn_to_page(vcpu->kvm, gfn);
+ up_read(&current->mm->mmap_sem);
spin_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_free_some_pages(vcpu);
r = __nonpaging_map(vcpu, v, write, gfn, page);
spin_unlock(&vcpu->kvm->mmu_lock);
- up_read(&current->mm->mmap_sem);
+ up_read(&vcpu->kvm->slots_lock);
return r;
}
@@ -1090,7 +1106,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
ASSERT(!VALID_PAGE(root));
sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
- PT64_ROOT_LEVEL, 0, ACC_ALL, NULL, NULL);
+ PT64_ROOT_LEVEL, 0, ACC_ALL, NULL);
root = __pa(sp->spt);
++sp->root_count;
vcpu->arch.mmu.root_hpa = root;
@@ -1111,7 +1127,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
root_gfn = 0;
sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
PT32_ROOT_LEVEL, !is_paging(vcpu),
- ACC_ALL, NULL, NULL);
+ ACC_ALL, NULL);
root = __pa(sp->spt);
++sp->root_count;
vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
@@ -1172,7 +1188,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
static void paging_new_cr3(struct kvm_vcpu *vcpu)
{
- pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3);
+ pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->arch.cr3);
mmu_free_roots(vcpu);
}
@@ -1362,6 +1378,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
gfn_t gfn;
int r;
u64 gpte = 0;
+ struct page *page;
if (bytes != 4 && bytes != 8)
return;
@@ -1389,8 +1406,13 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
if (!is_present_pte(gpte))
return;
gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
+
+ down_read(&current->mm->mmap_sem);
+ page = gfn_to_page(vcpu->kvm, gfn);
+ up_read(&current->mm->mmap_sem);
+
vcpu->arch.update_pte.gfn = gfn;
- vcpu->arch.update_pte.page = gfn_to_page(vcpu->kvm, gfn);
+ vcpu->arch.update_pte.page = page;
}
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
@@ -1496,9 +1518,9 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
gpa_t gpa;
int r;
- down_read(&current->mm->mmap_sem);
+ down_read(&vcpu->kvm->slots_lock);
gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
- up_read(&current->mm->mmap_sem);
+ up_read(&vcpu->kvm->slots_lock);
spin_lock(&vcpu->kvm->mmu_lock);
r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 03ba8608fe0..ecc0856268c 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -91,7 +91,10 @@ static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
pt_element_t *table;
struct page *page;
+ down_read(&current->mm->mmap_sem);
page = gfn_to_page(kvm, table_gfn);
+ up_read(&current->mm->mmap_sem);
+
table = kmap_atomic(page, KM_USER0);
ret = CMPXCHG(&table[index], orig_pte, new_pte);
@@ -140,7 +143,7 @@ walk:
}
#endif
ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
- (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
+ (vcpu->arch.cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
pt_access = ACC_ALL;
@@ -297,7 +300,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
u64 shadow_pte;
int metaphysical;
gfn_t table_gfn;
- bool new_page = 0;
shadow_ent = ((u64 *)__va(shadow_addr)) + index;
if (level == PT_PAGE_TABLE_LEVEL)
@@ -319,8 +321,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
}
shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
metaphysical, access,
- shadow_ent, &new_page);
- if (new_page && !metaphysical) {
+ shadow_ent);
+ if (!metaphysical) {
int r;
pt_element_t curr_pte;
r = kvm_read_guest_atomic(vcpu->kvm,
@@ -378,7 +380,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
if (r)
return r;
- down_read(&current->mm->mmap_sem);
+ down_read(&vcpu->kvm->slots_lock);
/*
* Look up the shadow pte for the faulting address.
*/
@@ -392,11 +394,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
pgprintk("%s: guest page fault\n", __FUNCTION__);
inject_page_fault(vcpu, addr, walker.error_code);
vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
- up_read(&current->mm->mmap_sem);
+ up_read(&vcpu->kvm->slots_lock);
return 0;
}
+ down_read(&current->mm->mmap_sem);
page = gfn_to_page(vcpu->kvm, walker.gfn);
+ up_read(&current->mm->mmap_sem);
spin_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_free_some_pages(vcpu);
@@ -413,14 +417,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
*/
if (shadow_pte && is_io_pte(*shadow_pte)) {
spin_unlock(&vcpu->kvm->mmu_lock);
- up_read(&current->mm->mmap_sem);
+ up_read(&vcpu->kvm->slots_lock);
return 1;
}
++vcpu->stat.pf_fixed;
kvm_mmu_audit(vcpu, "post page fault (fixed)");
spin_unlock(&vcpu->kvm->mmu_lock);
- up_read(&current->mm->mmap_sem);
+ up_read(&vcpu->kvm->slots_lock);
return write_pt;
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index de755cb1431..1a582f1090e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -792,6 +792,10 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
vcpu->arch.cr0 = cr0;
cr0 |= X86_CR0_PG | X86_CR0_WP;
cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
+ if (!vcpu->fpu_active) {
+ svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
+ cr0 |= X86_CR0_TS;
+ }
svm->vmcb->save.cr0 = cr0;
}
@@ -1096,6 +1100,24 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
case MSR_IA32_SYSENTER_ESP:
*data = svm->vmcb->save.sysenter_esp;
break;
+ /* Nobody will change the following 5 values in the VMCB so
+ we can safely return them on rdmsr. They will always be 0
+ until LBRV is implemented. */
+ case MSR_IA32_DEBUGCTLMSR:
+ *data = svm->vmcb->save.dbgctl;
+ break;
+ case MSR_IA32_LASTBRANCHFROMIP:
+ *data = svm->vmcb->save.br_from;
+ break;
+ case MSR_IA32_LASTBRANCHTOIP:
+ *data = svm->vmcb->save.br_to;
+ break;
+ case MSR_IA32_LASTINTFROMIP:
+ *data = svm->vmcb->save.last_excp_from;
+ break;
+ case MSR_IA32_LASTINTTOIP:
+ *data = svm->vmcb->save.last_excp_to;
+ break;
default:
return kvm_get_msr_common(vcpu, ecx, data);
}
@@ -1156,6 +1178,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
case MSR_IA32_SYSENTER_ESP:
svm->vmcb->save.sysenter_esp = data;
break;
+ case MSR_IA32_DEBUGCTLMSR:
+ pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
+ __FUNCTION__, data);
+ break;
case MSR_K7_EVNTSEL0:
case MSR_K7_EVNTSEL1:
case MSR_K7_EVNTSEL2:
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ad36447e696..8e1462880d1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -349,8 +349,6 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
static void reload_tss(void)
{
-#ifndef CONFIG_X86_64
-
/*
* VT restores TR but not its size. Useless.
*/
@@ -361,7 +359,6 @@ static void reload_tss(void)
descs = (void *)gdt.base;
descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
load_TR_desc();
-#endif
}
static void load_transition_efer(struct vcpu_vmx *vmx)
@@ -638,6 +635,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
{
int save_nmsrs;
+ vmx_load_host_state(vmx);
save_nmsrs = 0;
#ifdef CONFIG_X86_64
if (is_long_mode(&vmx->vcpu)) {
@@ -1435,7 +1433,7 @@ static int init_rmode_tss(struct kvm *kvm)
int ret = 0;
int r;
- down_read(&current->mm->mmap_sem);
+ down_read(&kvm->slots_lock);
r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
if (r < 0)
goto out;
@@ -1458,7 +1456,7 @@ static int init_rmode_tss(struct kvm *kvm)
ret = 1;
out:
- up_read(&current->mm->mmap_sem);
+ up_read(&kvm->slots_lock);
return ret;
}
@@ -1477,7 +1475,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
struct kvm_userspace_memory_region kvm_userspace_mem;
int r = 0;
- down_write(&current->mm->mmap_sem);
+ down_write(&kvm->slots_lock);
if (kvm->arch.apic_access_page)
goto out;
kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
@@ -1487,9 +1485,12 @@ static int alloc_apic_access_page(struct kvm *kvm)
r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0);
if (r)
goto out;
+
+ down_read(&current->mm->mmap_sem);
kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
+ up_read(&current->mm->mmap_sem);
out:
- up_write(&current->mm->mmap_sem);
+ up_write(&kvm->slots_lock);
return r;
}
@@ -1602,9 +1603,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
- if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
- if (alloc_apic_access_page(vmx->vcpu.kvm) != 0)
- return -ENOMEM;
return 0;
}
@@ -2534,6 +2532,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
put_cpu();
if (err)
goto free_vmcs;
+ if (vm_need_virtualize_apic_accesses(kvm))
+ if (alloc_apic_access_page(kvm) != 0)
+ goto free_vmcs;
return &vmx->vcpu;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cf530814868..6b01552bd1f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -46,6 +46,9 @@
#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
+ struct kvm_cpuid_entry2 __user *entries);
+
struct kvm_x86_ops *kvm_x86_ops;
struct kvm_stats_debugfs_item debugfs_entries[] = {
@@ -181,7 +184,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
int ret;
u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)];
- down_read(&current->mm->mmap_sem);
+ down_read(&vcpu->kvm->slots_lock);
ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
offset * sizeof(u64), sizeof(pdpte));
if (ret < 0) {
@@ -198,7 +201,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs));
out:
- up_read(&current->mm->mmap_sem);
+ up_read(&vcpu->kvm->slots_lock);
return ret;
}
@@ -212,13 +215,13 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu)
if (is_long_mode(vcpu) || !is_pae(vcpu))
return false;
- down_read(&current->mm->mmap_sem);
+ down_read(&vcpu->kvm->slots_lock);
r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte));
if (r < 0)
goto out;
changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0;
out:
- up_read(&current->mm->mmap_sem);
+ up_read(&vcpu->kvm->slots_lock);
return changed;
}
@@ -356,7 +359,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
*/
}
- down_read(&current->mm->mmap_sem);
+ down_read(&vcpu->kvm->slots_lock);
/*
* Does the new cr3 value map to physical memory? (Note, we
* catch an invalid cr3 even in real-mode, because it would
@@ -372,7 +375,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
vcpu->arch.cr3 = cr3;
vcpu->arch.mmu.new_cr3(vcpu);
}
- up_read(&current->mm->mmap_sem);
+ up_read(&vcpu->kvm->slots_lock);
}
EXPORT_SYMBOL_GPL(set_cr3);
@@ -484,6 +487,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
__FUNCTION__, data);
break;
+ case MSR_IA32_MCG_CTL:
+ pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n",
+ __FUNCTION__, data);
+ break;
case MSR_IA32_UCODE_REV:
case MSR_IA32_UCODE_WRITE:
case 0x200 ... 0x2ff: /* MTRRs */
@@ -526,6 +533,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case MSR_IA32_MC0_CTL:
case MSR_IA32_MCG_STATUS:
case MSR_IA32_MCG_CAP:
+ case MSR_IA32_MCG_CTL:
case MSR_IA32_MC0_MISC:
case MSR_IA32_MC0_MISC+4:
case MSR_IA32_MC0_MISC+8:
@@ -727,6 +735,24 @@ long kvm_arch_dev_ioctl(struct file *filp,
r = 0;
break;
}
+ case KVM_GET_SUPPORTED_CPUID: {
+ struct kvm_cpuid2 __user *cpuid_arg = argp;
+ struct kvm_cpuid2 cpuid;
+
+ r = -EFAULT;
+ if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
+ goto out;
+ r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
+ cpuid_arg->entries);
+ if (r)
+ goto out;
+
+ r = -EFAULT;
+ if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
+ goto out;
+ r = 0;
+ break;
+ }
default:
r = -EINVAL;
}
@@ -974,8 +1000,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
put_cpu();
}
-static int kvm_vm_ioctl_get_supported_cpuid(struct kvm *kvm,
- struct kvm_cpuid2 *cpuid,
+static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 __user *entries)
{
struct kvm_cpuid_entry2 *cpuid_entries;
@@ -1207,12 +1232,12 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
return -EINVAL;
- down_write(&current->mm->mmap_sem);
+ down_write(&kvm->slots_lock);
kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
- up_write(&current->mm->mmap_sem);
+ up_write(&kvm->slots_lock);
return 0;
}
@@ -1261,7 +1286,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
< alias->target_phys_addr)
goto out;
- down_write(&current->mm->mmap_sem);
+ down_write(&kvm->slots_lock);
p = &kvm->arch.aliases[alias->slot];
p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
@@ -1275,7 +1300,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
kvm_mmu_zap_all(kvm);
- up_write(&current->mm->mmap_sem);
+ up_write(&kvm->slots_lock);
return 0;
@@ -1351,7 +1376,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
struct kvm_memory_slot *memslot;
int is_dirty = 0;
- down_write(&current->mm->mmap_sem);
+ down_write(&kvm->slots_lock);
r = kvm_get_dirty_log(kvm, log, &is_dirty);
if (r)
@@ -1367,7 +1392,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
}
r = 0;
out:
- up_write(&current->mm->mmap_sem);
+ up_write(&kvm->slots_lock);
return r;
}
@@ -1487,24 +1512,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = 0;
break;
}
- case KVM_GET_SUPPORTED_CPUID: {
- struct kvm_cpuid2 __user *cpuid_arg = argp;
- struct kvm_cpuid2 cpuid;
-
- r = -EFAULT;
- if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
- goto out;
- r = kvm_vm_ioctl_get_supported_cpuid(kvm, &cpuid,
- cpuid_arg->entries);
- if (r)
- goto out;
-
- r = -EFAULT;
- if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
- goto out;
- r = 0;
- break;
- }
default:
;
}
@@ -1563,7 +1570,7 @@ int emulator_read_std(unsigned long addr,
void *data = val;
int r = X86EMUL_CONTINUE;
- down_read(&current->mm->mmap_sem);
+ down_read(&vcpu->kvm->slots_lock);
while (bytes) {
gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
unsigned offset = addr & (PAGE_SIZE-1);
@@ -1585,7 +1592,7 @@ int emulator_read_std(unsigned long addr,
addr += tocopy;
}
out:
- up_read(&current->mm->mmap_sem);
+ up_read(&vcpu->kvm->slots_lock);
return r;
}
EXPORT_SYMBOL_GPL(emulator_read_std);
@@ -1604,9 +1611,9 @@ static int emulator_read_emulated(unsigned long addr,
return X86EMUL_CONTINUE;
}
- down_read(&current->mm->mmap_sem);
+ down_read(&vcpu->kvm->slots_lock);
gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
- up_read(&current->mm->mmap_sem);
+ up_read(&vcpu->kvm->slots_lock);
/* For APIC access vmexit */
if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@ -1644,14 +1651,14 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
{
int ret;
- down_read(&current->mm->mmap_sem);
+ down_read(&vcpu->kvm->slots_lock);
ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
if (ret < 0) {
- up_read(&current->mm->mmap_sem);
+ up_read(&vcpu->kvm->slots_lock);
return 0;
}
kvm_mmu_pte_write(vcpu, gpa, val, bytes);
- up_read(&current->mm->mmap_sem);
+ up_read(&vcpu->kvm->slots_lock);
return 1;
}
@@ -1663,9 +1670,9 @@ static int emulator_write_emulated_onepage(unsigned long addr,
struct kvm_io_device *mmio_dev;
gpa_t gpa;
- down_read(&current->mm->mmap_sem);
+ down_read(&vcpu->kvm->slots_lock);
gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
- up_read(&current->mm->mmap_sem);
+ up_read(&vcpu->kvm->slots_lock);
if (gpa == UNMAPPED_GVA) {
kvm_inject_page_fault(vcpu, addr, 2);
@@ -1742,7 +1749,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
char *kaddr;
u64 val;
- down_read(&current->mm->mmap_sem);
+ down_read(&vcpu->kvm->slots_lock);
gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
if (gpa == UNMAPPED_GVA ||
@@ -1753,13 +1760,17 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
goto emul_write;
val = *(u64 *)new;
+
+ down_read(&current->mm->mmap_sem);
page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+ up_read(&current->mm->mmap_sem);
+
kaddr = kmap_atomic(page, KM_USER0);
set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
kunmap_atomic(kaddr, KM_USER0);
kvm_release_page_dirty(page);
emul_write:
- up_read(&current->mm->mmap_sem);
+ up_read(&vcpu->kvm->slots_lock);
}
#endif
@@ -2152,10 +2163,10 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
kvm_x86_ops->skip_emulated_instruction(vcpu);
for (i = 0; i < nr_pages; ++i) {
- down_read(&current->mm->mmap_sem);
+ down_read(&vcpu->kvm->slots_lock);
page = gva_to_page(vcpu, address + i * PAGE_SIZE);
vcpu->arch.pio.guest_pages[i] = page;
- up_read(&current->mm->mmap_sem);
+ up_read(&vcpu->kvm->slots_lock);
if (!page) {
kvm_inject_gp(vcpu, 0);
free_pio_guest_pages(vcpu);
@@ -2478,8 +2489,9 @@ static void vapic_enter(struct kvm_vcpu *vcpu)
down_read(&current->mm->mmap_sem);
page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
- vcpu->arch.apic->vapic_page = page;
up_read(&current->mm->mmap_sem);
+
+ vcpu->arch.apic->vapic_page = page;
}
static void vapic_exit(struct kvm_vcpu *vcpu)
@@ -2861,8 +2873,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
kvm_x86_ops->decache_cr4_guest_bits(vcpu);
mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0;
- vcpu->arch.cr0 = sregs->cr0;
kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
+ vcpu->arch.cr0 = sregs->cr0;
mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4;
kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
@@ -2952,9 +2964,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
gpa_t gpa;
vcpu_load(vcpu);
- down_read(&current->mm->mmap_sem);
+ down_read(&vcpu->kvm->slots_lock);
gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr);
- up_read(&current->mm->mmap_sem);
+ up_read(&vcpu->kvm->slots_lock);
tr->physical_address = gpa;
tr->valid = gpa != UNMAPPED_GVA;
tr->writeable = 1;
@@ -3227,11 +3239,13 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
*/
if (!user_alloc) {
if (npages && !old.rmap) {
+ down_write(&current->mm->mmap_sem);
memslot->userspace_addr = do_mmap(NULL, 0,
npages * PAGE_SIZE,
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS,
0);
+ up_write(&current->mm->mmap_sem);
if (IS_ERR((void *)memslot->userspace_addr))
return PTR_ERR((void *)memslot->userspace_addr);
@@ -3239,8 +3253,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
if (!old.user_alloc && old.rmap) {
int ret;
+ down_write(&current->mm->mmap_sem);
ret = do_munmap(current->mm, old.userspace_addr,
old.npages * PAGE_SIZE);
+ up_write(&current->mm->mmap_sem);
if (ret < 0)
printk(KERN_WARNING
"kvm_vm_ioctl_set_memory_region: "
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index cccb38a5965..a104c532ff7 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -84,7 +84,6 @@ struct lguest_data lguest_data = {
.blocked_interrupts = { 1 }, /* Block timer interrupts */
.syscall_vec = SYSCALL_VECTOR,
};
-static cycle_t clock_base;
/*G:037 async_hcall() is pretty simple: I'm quite proud of it really. We have a
* ring buffer of stored hypercalls which the Host will run though next time we
@@ -327,8 +326,8 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
case 1: /* Basic feature request. */
/* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */
*cx &= 0x00002201;
- /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */
- *dx &= 0x07808101;
+ /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU. */
+ *dx &= 0x07808111;
/* The Host can do a nice optimization if it knows that the
* kernel mappings (addresses above 0xC0000000 or whatever
* PAGE_OFFSET is set to) haven't changed. But Linux calls
@@ -481,7 +480,7 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
{
*pmdp = pmdval;
lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK,
- (__pa(pmdp)&(PAGE_SIZE-1)), 0);
+ (__pa(pmdp)&(PAGE_SIZE-1))/4, 0);
}
/* There are a couple of legacy places where the kernel sets a PTE, but we
@@ -595,19 +594,25 @@ static unsigned long lguest_get_wallclock(void)
return lguest_data.time.tv_sec;
}
+/* The TSC is a Time Stamp Counter. The Host tells us what speed it runs at,
+ * or 0 if it's unusable as a reliable clock source. This matches what we want
+ * here: if we return 0 from this function, the x86 TSC clock will not register
+ * itself. */
+static unsigned long lguest_cpu_khz(void)
+{
+ return lguest_data.tsc_khz;
+}
+
+/* If we can't use the TSC, the kernel falls back to our "lguest_clock", where
+ * we read the time value given to us by the Host. */
static cycle_t lguest_clock_read(void)
{
unsigned long sec, nsec;
- /* If the Host tells the TSC speed, we can trust that. */
- if (lguest_data.tsc_khz)
- return native_read_tsc();
-
- /* If we can't use the TSC, we read the time value written by the Host.
- * Since it's in two parts (seconds and nanoseconds), we risk reading
- * it just as it's changing from 99 & 0.999999999 to 100 and 0, and
- * getting 99 and 0. As Linux tends to come apart under the stress of
- * time travel, we must be careful: */
+ /* Since the time is in two parts (seconds and nanoseconds), we risk
+ * reading it just as it's changing from 99 & 0.999999999 to 100 and 0,
+ * and getting 99 and 0. As Linux tends to come apart under the stress
+ * of time travel, we must be careful: */
do {
/* First we read the seconds part. */
sec = lguest_data.time.tv_sec;
@@ -622,14 +627,14 @@ static cycle_t lguest_clock_read(void)
/* Now if the seconds part has changed, try again. */
} while (unlikely(lguest_data.time.tv_sec != sec));
- /* Our non-TSC clock is in real nanoseconds. */
+ /* Our lguest clock is in real nanoseconds. */
return sec*1000000000ULL + nsec;
}
-/* This is what we tell the kernel is our clocksource. */
+/* This is the fallback clocksource: lower priority than the TSC clocksource. */
static struct clocksource lguest_clock = {
.name = "lguest",
- .rating = 400,
+ .rating = 200,
.read = lguest_clock_read,
.mask = CLOCKSOURCE_MASK(64),
.mult = 1 << 22,
@@ -637,12 +642,6 @@ static struct clocksource lguest_clock = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
-/* The "scheduler clock" is just our real clock, adjusted to start at zero */
-static unsigned long long lguest_sched_clock(void)
-{
- return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base);
-}
-
/* We also need a "struct clock_event_device": Linux asks us to set it to go
* off some time in the future. Actually, James Morris figured all this out, I
* just applied the patch. */
@@ -712,19 +711,8 @@ static void lguest_time_init(void)
/* Set up the timer interrupt (0) to go to our simple timer routine */
set_irq_handler(0, lguest_time_irq);
- /* Our clock structure looks like arch/x86/kernel/tsc_32.c if we can
- * use the TSC, otherwise it's a dumb nanosecond-resolution clock.
- * Either way, the "rating" is set so high that it's always chosen over
- * any other clocksource. */
- if (lguest_data.tsc_khz)
- lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz,
- lguest_clock.shift);
- clock_base = lguest_clock_read();
clocksource_register(&lguest_clock);
- /* Now we've set up our clock, we can use it as the scheduler clock */
- pv_time_ops.sched_clock = lguest_sched_clock;
-
/* We can't set cpumask in the initializer: damn C limitations! Set it
* here and register our timer device. */
lguest_clockevent.cpumask = cpumask_of_cpu(0);
@@ -995,6 +983,7 @@ __init void lguest_init(void)
/* time operations */
pv_time_ops.get_wallclock = lguest_get_wallclock;
pv_time_ops.time_init = lguest_time_init;
+ pv_time_ops.get_cpu_khz = lguest_cpu_khz;
/* Now is a good time to look at the implementations of these functions
* before returning to the rest of lguest_init(). */
diff --git a/arch/x86/mach-visws/traps.c b/arch/x86/mach-visws/traps.c
index 843b67acf43..bfac6ba10f8 100644
--- a/arch/x86/mach-visws/traps.c
+++ b/arch/x86/mach-visws/traps.c
@@ -46,8 +46,9 @@ static __init void cobalt_init(void)
*/
set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE);
setup_local_APIC();
- printk(KERN_INFO "Local APIC Version %#lx, ID %#lx\n",
- apic_read(APIC_LVR), apic_read(APIC_ID));
+ printk(KERN_INFO "Local APIC Version %#x, ID %#x\n",
+ (unsigned int)apic_read(APIC_LVR),
+ (unsigned int)apic_read(APIC_ID));
set_fixmap(FIX_CO_CPU, CO_CPU_PHYS);
set_fixmap(FIX_CO_APIC, CO_APIC_PHYS);
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index c394ca0720b..8e25e06ff73 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -324,7 +324,6 @@ unsigned long __init setup_memory(void)
* this space and use it to adjust the boundary between ZONE_NORMAL
* and ZONE_HIGHMEM.
*/
- find_max_pfn();
get_memcfg_numa();
kva_pages = calculate_numa_remap_pages();
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index ac3c959e271..794895c6dcc 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -106,7 +106,7 @@ static int ioremap_change_attr(unsigned long vaddr, unsigned long size,
* have to convert them into an offset in a page-aligned mapping, but the
* caller shouldn't need to know that small detail.
*/
-static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
+static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
enum ioremap_mode mode)
{
unsigned long pfn, offset, last_addr, vaddr;
@@ -134,12 +134,14 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
return NULL;
}
- WARN_ON_ONCE(page_is_ram(pfn));
-
switch (mode) {
case IOR_MODE_UNCACHED:
default:
- prot = PAGE_KERNEL_NOCACHE;
+ /*
+ * FIXME: we will use UC MINUS for now, as video fb drivers
+ * depend on it. Upcoming ioremap_wc() will fix this behavior.
+ */
+ prot = PAGE_KERNEL_UC_MINUS;
break;
case IOR_MODE_CACHED:
prot = PAGE_KERNEL;
@@ -195,13 +197,13 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
*
* Must be freed with iounmap.
*/
-void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size)
+void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
{
return __ioremap(phys_addr, size, IOR_MODE_UNCACHED);
}
EXPORT_SYMBOL(ioremap_nocache);
-void __iomem *ioremap_cache(unsigned long phys_addr, unsigned long size)
+void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
{
return __ioremap(phys_addr, size, IOR_MODE_CACHED);
}
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 59898fb0a4a..16b82ad34b9 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -221,8 +221,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
bootmap_pages<<PAGE_SHIFT, PAGE_SIZE);
if (bootmap == NULL) {
if (nodedata_phys < start || nodedata_phys >= end)
- free_bootmem((unsigned long)node_data[nodeid],
- pgdat_size);
+ free_bootmem(nodedata_phys, pgdat_size);
node_data[nodeid] = NULL;
return;
}
@@ -622,13 +621,17 @@ void __init init_cpu_to_node(void)
int i;
for (i = 0; i < NR_CPUS; i++) {
+ int node;
u16 apicid = x86_cpu_to_apicid_init[i];
if (apicid == BAD_APICID)
continue;
- if (apicid_to_node[apicid] == NUMA_NO_NODE)
+ node = apicid_to_node[apicid];
+ if (node == NUMA_NO_NODE)
continue;
- numa_set_node(i, apicid_to_node[apicid]);
+ if (!node_online(node))
+ continue;
+ numa_set_node(i, node);
}
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 7049294fb46..7b79f6be4e7 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -26,7 +26,6 @@ struct cpa_data {
pgprot_t mask_set;
pgprot_t mask_clr;
int numpages;
- int processed;
int flushtlb;
unsigned long pfn;
};
@@ -291,8 +290,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
*/
nextpage_addr = (address + psize) & pmask;
numpages = (nextpage_addr - address) >> PAGE_SHIFT;
- if (numpages < cpa->processed)
- cpa->processed = numpages;
+ if (numpages < cpa->numpages)
+ cpa->numpages = numpages;
/*
* We are safe now. Check whether the new pgprot is the same:
@@ -319,7 +318,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
*/
addr = address + PAGE_SIZE;
pfn++;
- for (i = 1; i < cpa->processed; i++, addr += PAGE_SIZE, pfn++) {
+ for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) {
pgprot_t chk_prot = static_protections(new_prot, addr, pfn);
if (pgprot_val(chk_prot) != pgprot_val(new_prot))
@@ -343,7 +342,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
* that we limited the number of possible pages already to
* the number of pages in the large page.
*/
- if (address == (nextpage_addr - psize) && cpa->processed == numpages) {
+ if (address == (nextpage_addr - psize) && cpa->numpages == numpages) {
/*
* The address is aligned and the number of pages
* covers the full page.
@@ -573,7 +572,7 @@ repeat:
set_pte_atomic(kpte, new_pte);
cpa->flushtlb = 1;
}
- cpa->processed = 1;
+ cpa->numpages = 1;
return 0;
}
@@ -584,7 +583,7 @@ repeat:
do_split = try_preserve_large_page(kpte, address, cpa);
/*
* When the range fits into the existing large page,
- * return. cp->processed and cpa->tlbflush have been updated in
+ * return. cp->numpages and cpa->tlbflush have been updated in
* try_large_page:
*/
if (do_split <= 0)
@@ -663,7 +662,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
* Store the remaining nr of pages for the large page
* preservation check.
*/
- cpa->numpages = cpa->processed = numpages;
+ cpa->numpages = numpages;
ret = __change_page_attr(cpa, checkalias);
if (ret)
@@ -680,9 +679,9 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
* CPA operation. Either a large page has been
* preserved or a single page update happened.
*/
- BUG_ON(cpa->processed > numpages);
- numpages -= cpa->processed;
- cpa->vaddr += cpa->processed * PAGE_SIZE;
+ BUG_ON(cpa->numpages > numpages);
+ numpages -= cpa->numpages;
+ cpa->vaddr += cpa->numpages * PAGE_SIZE;
}
return 0;
}
@@ -772,7 +771,7 @@ static inline int change_page_attr_clear(unsigned long addr, int numpages,
int set_memory_uc(unsigned long addr, int numpages)
{
return change_page_attr_set(addr, numpages,
- __pgprot(_PAGE_PCD | _PAGE_PWT));
+ __pgprot(_PAGE_PCD));
}
EXPORT_SYMBOL(set_memory_uc);
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 73aba712520..2f9e9afcb9f 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -342,12 +342,16 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
pgd_t *pgd_alloc(struct mm_struct *mm)
{
- pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor);
+ pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
- mm->pgd = pgd; /* so that alloc_pd can use it */
+ /* so that alloc_pd can use it */
+ mm->pgd = pgd;
+ if (pgd)
+ pgd_ctor(pgd);
if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
- quicklist_free(0, pgd_dtor, pgd);
+ pgd_dtor(pgd);
+ free_page((unsigned long)pgd);
pgd = NULL;
}
@@ -357,12 +361,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
pgd_mop_up_pmds(mm, pgd);
- quicklist_free(0, pgd_dtor, pgd);
-}
-
-void check_pgt_cache(void)
-{
- quicklist_trim(0, pgd_dtor, 25, 16);
+ pgd_dtor(pgd);
+ free_page((unsigned long)pgd);
}
void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index 10ac8c316c4..2f7109ac4c1 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -198,6 +198,11 @@ static int pci_bios_read(unsigned int seg, unsigned int bus,
"b" (bx),
"D" ((long)reg),
"S" (&pci_indirect));
+ /*
+ * Zero-extend the result beyond 8 bits, do not trust the
+ * BIOS having done it:
+ */
+ *value &= 0xff;
break;
case 2:
__asm__("lcall *(%%esi); cld\n\t"
@@ -210,6 +215,11 @@ static int pci_bios_read(unsigned int seg, unsigned int bus,
"b" (bx),
"D" ((long)reg),
"S" (&pci_indirect));
+ /*
+ * Zero-extend the result beyond 16 bits, do not trust the
+ * BIOS having done it:
+ */
+ *value &= 0xffff;
break;
case 4:
__asm__("lcall *(%%esi); cld\n\t"
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 3bad4773a2f..2341492bf7a 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -38,7 +38,8 @@ char * __init xen_memory_setup(void)
unsigned long max_pfn = xen_start_info->nr_pages;
e820.nr_map = 0;
- add_memory_region(0, PFN_PHYS(max_pfn), E820_RAM);
+ add_memory_region(0, LOWMEMSIZE(), E820_RAM);
+ add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM);
return "Xen";
}