41 files changed, 457 insertions, 241 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4a88cf7695b..6c70fed0f9a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -21,7 +21,8 @@ config X86
 	select HAVE_IDE
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
-	select HAVE_KVM
+	select HAVE_KRETPROBES
+	select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
 
 
 config GENERIC_LOCKBREAK
@@ -65,9 +66,6 @@ config MMU
 config ZONE_DMA
 	def_bool y
 
-config QUICKLIST
-	def_bool X86_32
-
 config SBUS
 	bool
 
@@ -1261,7 +1259,7 @@ menuconfig APM
 	  machines with more than one CPU.
 
 	  In order to use APM, you will need supporting software. For location
-	  and more information, read <file:Documentation/pm.txt> and the
+	  and more information, read <file:Documentation/power/pm.txt> and the
 	  Battery Powered Linux mini-HOWTO, available from
 	  <http://www.tldp.org/docs.html#howto>.
 
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 6d50064db18..9304bfba7d4 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -388,7 +388,7 @@ config X86_OOSTORE
 #
 config X86_P6_NOP
 	def_bool y
-	depends on (X86_64 || !X86_GENERIC) && (M686 || MPENTIUMII || MPENTIUMIII || MPENTIUMM || MCORE2 || PENTIUM4)
+	depends on (X86_64 || !X86_GENERIC) && (M686 || MPENTIUMII || MPENTIUMIII || MPENTIUMM || MCORE2 || MPENTIUM4)
 
 config X86_TSC
 	def_bool y
diff --git a/arch/x86/boot/vesa.h b/arch/x86/boot/vesa.h
index ff5b73cd406..468e444622c 100644
--- a/arch/x86/boot/vesa.h
+++ b/arch/x86/boot/vesa.h
@@ -26,17 +26,10 @@ struct vesa_general_info {
 	far_ptr video_mode_ptr;	/* 14 */
 	u16 total_memory;	/* 18 */
 
-	u16 oem_software_rev;	/* 20 */
-	far_ptr oem_vendor_name_ptr;	/* 22 */
-	far_ptr oem_product_name_ptr;	/* 26 */
-	far_ptr oem_product_rev_ptr;	/* 30 */
-
-	u8 reserved[222];	/* 34 */
-	u8 oem_data[256];	/* 256 */
+	u8 reserved[236];	/* 20 */
 } __attribute__ ((packed));
 
 #define VESA_MAGIC ('V' + ('E' << 8) + ('S' << 16) + ('A' << 24))
-#define VBE2_MAGIC ('V' + ('B' << 8) + ('E' << 16) + ('2' << 24))
 
 struct vesa_mode_info {
 	u16 mode_attr;		/* 0 */
diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c
index 662dd2f1306..419b5c27337 100644
--- a/arch/x86/boot/video-vesa.c
+++ b/arch/x86/boot/video-vesa.c
@@ -37,8 +37,6 @@ static int vesa_probe(void)
 
 	video_vesa.modes = GET_HEAP(struct mode_info, 0);
 
-	vginfo.signature = VBE2_MAGIC;
-
 	ax = 0x4f00;
 	di = (size_t)&vginfo;
 	asm(INT10
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 1c0503bdfb1..5e7771a3ba2 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -500,7 +500,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 	regs->ss = __USER32_DS;
 
 	set_fs(USER_DS);
-	regs->flags &= ~X86_EFLAGS_TF;
+	regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
 	if (test_thread_flag(TIF_SINGLESTEP))
 		ptrace_notify(SIGTRAP);
 
@@ -600,7 +600,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	regs->ss = __USER32_DS;
 
 	set_fs(USER_DS);
-	regs->flags &= ~X86_EFLAGS_TF;
+	regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
 	if (test_thread_flag(TIF_SINGLESTEP))
 		ptrace_notify(SIGTRAP);
 
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 608152a2a05..00df126169b 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -18,6 +18,7 @@
 #include <linux/pci.h>
 #include <linux/bitops.h>
 #include <linux/ioport.h>
+#include <linux/suspend.h>
 #include <asm/e820.h>
 #include <asm/io.h>
 #include <asm/gart.h>
@@ -76,6 +77,8 @@ static u32 __init allocate_aperture(void)
 	printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n",
 			aper_size >> 10, __pa(p));
 	insert_aperture_resource((u32)__pa(p), aper_size);
+	register_nosave_region((u32)__pa(p) >> PAGE_SHIFT,
+				(u32)__pa(p+aper_size) >> PAGE_SHIFT);
 
 	return (u32)__pa(p);
 }
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
index 39f8cb18296..c2f930d8664 100644
--- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
+++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
@@ -55,7 +55,6 @@ static int eps_set_state(struct eps_cpu_data *centaur,
 {
 	struct cpufreq_freqs freqs;
 	u32 lo, hi;
-	u8 current_multiplier, current_voltage;
 	int err = 0;
 	int i;
 
@@ -95,6 +94,10 @@ postchange:
 	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
 	freqs.new = centaur->fsb * ((lo >> 8) & 0xff);
 
+#ifdef DEBUG
+	{
+	u8 current_multiplier, current_voltage;
+
 	/* Print voltage and multiplier */
 	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
 	current_voltage = lo & 0xff;
@@ -103,7 +106,8 @@ postchange:
 	current_multiplier = (lo >> 8) & 0xff;
 	printk(KERN_INFO "eps: Current multiplier = %d\n",
 		current_multiplier);
-
+	}
+#endif
 	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
 	return err;
 }
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
index f2b5a621d27..8a85c93bd62 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
@@ -63,7 +63,7 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
  */
 static int speedstep_smi_ownership (void)
 {
-	u32 command, result, magic;
+	u32 command, result, magic, dummy;
 	u32 function = GET_SPEEDSTEP_OWNER;
 	unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation";
 
@@ -73,8 +73,11 @@ static int speedstep_smi_ownership (void)
 	dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port);
 
 	__asm__ __volatile__(
+		"push %%ebp\n"
 		"out %%al, (%%dx)\n"
-		: "=D" (result)
+		"pop %%ebp\n"
+		: "=D" (result), "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy),
+			"=S" (dummy)
 		: "a" (command), "b" (function), "c" (0), "d" (smi_port),
 			"D" (0), "S" (magic)
 		: "memory"
@@ -96,7 +99,7 @@ static int speedstep_smi_ownership (void)
  */
 static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
 {
-	u32 command, result = 0, edi, high_mhz, low_mhz;
+	u32 command, result = 0, edi, high_mhz, low_mhz, dummy;
 	u32 state=0;
 	u32 function = GET_SPEEDSTEP_FREQS;
 
@@ -109,10 +112,12 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
 
 	dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port);
 
-	__asm__ __volatile__("movl $0, %%edi\n"
+	__asm__ __volatile__(
+		"push %%ebp\n"
 		"out %%al, (%%dx)\n"
-		: "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi)
-		: "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0)
+		"pop %%ebp"
+		: "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi), "=S" (dummy)
+		: "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0)
 	);
 
 	dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz);
@@ -135,16 +140,18 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
 static int speedstep_get_state (void)
 {
 	u32 function=GET_SPEEDSTEP_STATE;
-	u32 result, state, edi, command;
+	u32 result, state, edi, command, dummy;
 
 	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
 
 	dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port);
 
-	__asm__ __volatile__("movl $0, %%edi\n"
+	__asm__ __volatile__(
+		"push %%ebp\n"
 		"out %%al, (%%dx)\n"
-		: "=a" (result), "=b" (state), "=D" (edi)
-		: "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0)
+		"pop %%ebp\n"
+		: "=a" (result), "=b" (state), "=D" (edi), "=c" (dummy), "=d" (dummy), "=S" (dummy)
+		: "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0), "D" (0)
 	);
 
 	dprintk("state is %x, result is %x\n", state, result);
@@ -160,7 +167,7 @@ static int speedstep_get_state (void)
  */
 static void speedstep_set_state (unsigned int state)
 {
-	unsigned int result = 0, command, new_state;
+	unsigned int result = 0, command, new_state, dummy;
 	unsigned long flags;
 	unsigned int function=SET_SPEEDSTEP_STATE;
 	unsigned int retry = 0;
@@ -182,10 +189,12 @@ static void speedstep_set_state (unsigned int state)
 		}
 		retry++;
 		__asm__ __volatile__(
-			"movl $0, %%edi\n"
+			"push %%ebp\n"
 			"out %%al, (%%dx)\n"
-			: "=b" (new_state), "=D" (result)
-			: "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0)
+			"pop %%ebp"
+			: "=b" (new_state), "=D" (result), "=c" (dummy), "=a" (dummy),
+				"=d" (dummy), "=S" (dummy)
+			: "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0)
 			);
 	} while ((new_state != state) && (retry <= SMI_TRIES));
 
@@ -195,7 +204,7 @@ static void speedstep_set_state (unsigned int state)
 	if (new_state == state) {
 		dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result);
 	} else {
-		printk(KERN_ERR "cpufreq: change failed with new_state %u and result %u\n", new_state, result);
+		printk(KERN_ERR "cpufreq: change to state %u failed with new_state %u and result %u\n", state, new_state, result);
 	}
 
 	return;
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 103d61a59b1..3e18db4cefe 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -176,12 +176,13 @@ static inline void k8_enable_fixed_iorrs(void)
 }
 
 /**
- * Checks and updates an fixed-range MTRR if it differs from the value it
- * should have. If K8 extentions are wanted, update the K8 SYSCFG MSR also.
- * see AMD publication no. 24593, chapter 7.8.1, page 233 for more information
- * \param msr MSR address of the MTTR which should be checked and updated
- * \param changed pointer which indicates whether the MTRR needed to be changed
- * \param msrwords pointer to the MSR values which the MSR should have
+ * set_fixed_range - checks & updates a fixed-range MTRR if it differs from the value it should have
+ * @msr: MSR address of the MTTR which should be checked and updated
+ * @changed: pointer which indicates whether the MTRR needed to be changed
+ * @msrwords: pointer to the MSR values which the MSR should have
+ *
+ * If K8 extentions are wanted, update the K8 SYSCFG MSR also.
+ * See AMD publication no. 24593, chapter 7.8.1, page 233 for more information.
  */
 static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
 {
@@ -199,12 +200,15 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
 	}
 }
 
+/**
+ * generic_get_free_region - Get a free MTRR.
+ * @base: The starting (base) address of the region.
+ * @size: The size (in bytes) of the region.
+ * @replace_reg: mtrr index to be replaced; set to invalid value if none.
+ *
+ * Returns: The index of the region on success, else negative on error.
+ */
 int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
-/*  [SUMMARY] Get a free MTRR.
-    <base> The starting (base) address of the region.
-    <size> The size (in bytes) of the region.
-    [RETURNS] The index of the region on success, else -1 on error.
-*/
 {
 	int i, max;
 	mtrr_type ltype;
@@ -249,8 +253,8 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
 }
 
 /**
- * Checks and updates the fixed-range MTRRs if they differ from the saved set
- * \param frs pointer to fixed-range MTRR values, saved by get_fixed_ranges()
+ * set_fixed_ranges - checks & updates the fixed-range MTRRs if they differ from the saved set
+ * @frs: pointer to fixed-range MTRR values, saved by get_fixed_ranges()
  */
 static int set_fixed_ranges(mtrr_type * frs)
 {
@@ -294,13 +298,13 @@ static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
 
 static u32 deftype_lo, deftype_hi;
 
+/**
+ * set_mtrr_state - Set the MTRR state for this CPU.
+ *
+ * NOTE: The CPU must already be in a safe state for MTRR changes.
+ * RETURNS: 0 if no changes made, else a mask indicating what was changed.
+ */
 static unsigned long set_mtrr_state(void)
-/*  [SUMMARY] Set the MTRR state for this CPU.
-    <state> The MTRR state information to read.
-    <ctxt> Some relevant CPU context.
-    [NOTE] The CPU must already be in a safe state for MTRR changes.
-    [RETURNS] 0 if no changes made, else a mask indication what was changed.
-*/
 {
 	unsigned int i;
 	unsigned long change_mask = 0;
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index be83336fddb..a6450b3ae75 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -711,7 +711,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 		trim_size = end_pfn;
 		trim_size <<= PAGE_SHIFT;
 		trim_size -= trim_start;
-		add_memory_region(trim_start, trim_size, E820_RESERVED);
+		update_memory_range(trim_start, trim_size, E820_RAM,
+					E820_RESERVED);
 		update_e820();
 		return 1;
 	}
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index 4e16ef4a265..80444c5c9b1 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -749,6 +749,32 @@ static int __init parse_memmap(char *arg)
 	return 0;
 }
 early_param("memmap", parse_memmap);
+void __init update_memory_range(u64 start, u64 size, unsigned old_type,
+				unsigned new_type)
+{
+	int i;
+
+	BUG_ON(old_type == new_type);
+
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		u64 final_start, final_end;
+		if (ei->type != old_type)
+			continue;
+		/* totally covered? */
+		if (ei->addr >= start && ei->size <= size) {
+			ei->type = new_type;
+			continue;
+		}
+		/* partially covered */
+		final_start = max(start, ei->addr);
+		final_end = min(start + size, ei->addr + ei->size);
+		if (final_start >= final_end)
+			continue;
+		add_memory_region(final_start, final_end - final_start,
+					 new_type);
+	}
+}
 void __init update_e820(void)
 {
 	u8 nr_map;
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 9f65b4cc323..9be69712601 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -744,6 +744,33 @@ void __init finish_e820_parsing(void)
 	}
 }
 
+void __init update_memory_range(u64 start, u64 size, unsigned old_type,
+				unsigned new_type)
+{
+	int i;
+
+	BUG_ON(old_type == new_type);
+
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		u64 final_start, final_end;
+		if (ei->type != old_type)
+			continue;
+		/* totally covered? */
+		if (ei->addr >= start && ei->size <= size) {
+			ei->type = new_type;
+			continue;
+		}
+		/* partially covered */
+		final_start = max(start, ei->addr);
+		final_end = min(start + size, ei->addr + ei->size);
+		if (final_start >= final_end)
+			continue;
+		add_memory_region(final_start, final_end - final_start,
+					 new_type);
+	}
+}
+
 void __init update_e820(void)
 {
 	u8 nr_map;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index fd8ca53943a..74d87ea85b5 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -657,7 +657,7 @@ int_msg:
 	.asciz "Unknown interrupt or fault at EIP %p %p %p\n"
 
 fault_msg:
-	.ascii								\
+	.asciz								\
 /* fault info: */	"BUG: Int %d: CR2 %p\n"				\
 /* pusha regs: */	"     EDI %p  ESI %p  EBP %p  ESP %p\n"		\
 			"     EBX %p  EDX %p  ECX %p  EAX %p\n"		\
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 763dfc40723..d2e39e69aaf 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -132,7 +132,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
 	if (!cpu_has_fxsr)
 		return -ENODEV;
 
-	unlazy_fpu(target);
+	init_fpu(target);
 
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
 				   &target->thread.i387.fxsave, 0, -1);
@@ -147,7 +147,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
 	if (!cpu_has_fxsr)
 		return -ENODEV;
 
-	unlazy_fpu(target);
+	init_fpu(target);
 	set_stopped_child_used_math(target);
 
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
@@ -261,7 +261,7 @@ static void convert_from_fxsr(struct user_i387_ia32_struct *env,
 	}
 #else
 	env->fip = fxsave->fip;
-	env->fcs = fxsave->fcs;
+	env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16);
 	env->foo = fxsave->foo;
 	env->fos = fxsave->fos;
 #endif
@@ -307,7 +307,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
 	if (!HAVE_HWFP)
 		return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
 
-	unlazy_fpu(target);
+	init_fpu(target);
 
 	if (!cpu_has_fxsr)
 		return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
@@ -332,7 +332,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
 	if (!HAVE_HWFP)
 		return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
 
-	unlazy_fpu(target);
+	init_fpu(target);
 	set_stopped_child_used_math(target);
 
 	if (!cpu_has_fxsr)
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
index c706a306155..5921e5f0a64 100644
--- a/arch/x86/kernel/io_delay.c
+++ b/arch/x86/kernel/io_delay.c
@@ -78,6 +78,14 @@ static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = {
 	},
 	{
 		.callback	= dmi_io_delay_0xed_port,
+		.ident		= "HP Pavilion dv6000",
+		.matches	= {
+			DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+			DMI_MATCH(DMI_BOARD_NAME, "30B8")
+		}
+	},
+	{
+		.callback	= dmi_io_delay_0xed_port,
 		.ident		= "HP Pavilion tx1000",
 		.matches	= {
 			DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 027fc067b39..b402c0f3f19 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -30,6 +30,7 @@
 
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/module.h>
 #include <asm/geode.h>
 
 static struct mfgpt_timer_t {
diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c
index a82473d192a..375cb2bc45b 100644
--- a/arch/x86/kernel/pci-dma_64.c
+++ b/arch/x86/kernel/pci-dma_64.c
@@ -53,11 +53,6 @@ dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
 	int node;
 
 	node = dev_to_node(dev);
-	if (node == -1)
-		node = numa_node_id();
-
-	if (node < first_node(node_online_map))
-		node = first_node(node_online_map);
 
 	page = alloc_pages_node(node, gfp, order);
 	return page ? page_address(page) : NULL;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index f41fdc98efb..d5904eef1d3 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -323,6 +323,16 @@ static int putreg(struct task_struct *child,
 		return set_flags(child, value);
 
 #ifdef CONFIG_X86_64
+	/*
+	 * Orig_ax is really just a flag with small positive and
+	 * negative values, so make sure to always sign-extend it
+	 * from 32 bits so that it works correctly regardless of
+	 * whether we come from a 32-bit environment or not.
+	 */
+	case offsetof(struct user_regs_struct, orig_ax):
+		value = (long) (s32) value;
+		break;
+
 	case offsetof(struct user_regs_struct,fs_base):
 		if (value >= TASK_SIZE_OF(child))
 			return -EIO;
@@ -1045,10 +1055,17 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
 	R32(esi, si);
 	R32(ebp, bp);
 	R32(eax, ax);
-	R32(orig_eax, orig_ax);
 	R32(eip, ip);
 	R32(esp, sp);
 
+	case offsetof(struct user32, regs.orig_eax):
+		/*
+		 * Sign-extend the value so that orig_eax = -1
+		 * causes (long)orig_ax < 0 tests to fire correctly.
+		 */
+		regs->orig_ax = (long) (s32) value;
+		break;
+
 	case offsetof(struct user32, regs.eflags):
 		return set_flags(child, value);
 
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index c47208fc593..d89a648fe71 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -363,6 +363,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0051,
 			nvidia_force_enable_hpet);
 
 /* LPC bridges */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0260,
+			nvidia_force_enable_hpet);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0360,
 			nvidia_force_enable_hpet);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0361,
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 7fd6ac43e4a..484c4a80d38 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -152,6 +152,24 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 			DMI_MATCH(DMI_BOARD_NAME, "0WF810"),
 		},
 	},
+	{       /* Handle problems with rebooting on Dell Optiplex 745's DFF*/
+		.callback = set_bios_reboot,
+		.ident = "Dell OptiPlex 745",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
+			DMI_MATCH(DMI_BOARD_NAME, "0MM599"),
+		},
+	},
+	{       /* Handle problems with rebooting on Dell Optiplex 745 with 0KW626 */
+		.callback = set_bios_reboot,
+		.ident = "Dell OptiPlex 745",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
+			DMI_MATCH(DMI_BOARD_NAME, "0KW626"),
+		},
+	},
 	{	/* Handle problems with rebooting on Dell 2400's */
 		.callback = set_bios_reboot,
 		.ident = "Dell PowerEdge 2400",
@@ -326,6 +344,10 @@ static inline void kb_wait(void)
 	}
 }
 
+void __attribute__((weak)) mach_reboot_fixups(void)
+{
+}
+
 static void native_machine_emergency_restart(void)
 {
 	int i;
@@ -337,6 +359,8 @@ static void native_machine_emergency_restart(void)
 		/* Could also try the reset bit in the Hammer NB */
 		switch (reboot_type) {
 		case BOOT_KBD:
+			mach_reboot_fixups(); /* for board specific fixups */
+
 			for (i = 0; i < 10; i++) {
 				kb_wait();
 				udelay(50);
diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
index 309366f8f60..e24c4567709 100644
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -142,14 +142,16 @@ void __init setup_per_cpu_areas(void)
 	printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size);
 	for_each_cpu_mask (i, cpu_possible_map) {
 		char *ptr;
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+		ptr = alloc_bootmem_pages(size);
+#else
+		int node = early_cpu_to_node(i);
 
-		if (!NODE_DATA(early_cpu_to_node(i))) {
-			printk("cpu with no node %d, num_online_nodes %d\n",
-			       i, num_online_nodes());
+		if (!node_online(node) || !NODE_DATA(node))
 			ptr = alloc_bootmem_pages(size);
-		} else { 
-			ptr = alloc_bootmem_pages_node(NODE_DATA(early_cpu_to_node(i)), size);
-		}
+		else
+			ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
+#endif
 		if (!ptr)
 			panic("Cannot allocate cpu data for CPU %d\n", i);
 		cpu_pda(i)->data_offset = ptr - __per_cpu_start;
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index a1d7071a51c..2b3e5d45176 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -406,8 +406,6 @@ static unsigned long __init setup_memory(void)
 	 */
 	min_low_pfn = PFN_UP(init_pg_tables_end);
 
-	find_max_pfn();
-
 	max_low_pfn = find_max_low_pfn();
 
 #ifdef CONFIG_HIGHMEM
@@ -764,12 +762,13 @@ void __init setup_arch(char **cmdline_p)
 	if (efi_enabled)
 		efi_init();
 
-	max_low_pfn = setup_memory();
-
 	/* update e820 for memory not covered by WB MTRRs */
+	find_max_pfn();
 	mtrr_bp_init();
 	if (mtrr_trim_uncached_memory(max_pfn))
-		max_low_pfn = setup_memory();
+		find_max_pfn();
+
+	max_low_pfn = setup_memory();
 
 #ifdef CONFIG_VMI
 	/*
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 7637dc91c79..f4f7ecfb898 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -801,7 +801,7 @@ static void __cpuinit srat_detect_node(void)
 	/* Don't do the funky fallback heuristics the AMD version employs
 	   for now. */
 	node = apicid_to_node[apicid];
-	if (node == NUMA_NO_NODE)
+	if (node == NUMA_NO_NODE || !node_online(node))
 		node = first_node(node_online_map);
 	numa_set_node(cpu, node);
 
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index caee1f002fe..0157a6f0f41 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -407,7 +407,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 	 * The tracer may want to single-step inside the
 	 * handler too.
 	 */
-	regs->flags &= ~TF_MASK;
+	regs->flags &= ~(TF_MASK | X86_EFLAGS_DF);
 	if (test_thread_flag(TIF_SINGLESTEP))
 		ptrace_notify(SIGTRAP);
 
@@ -500,7 +500,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	 * The tracer may want to single-step inside the
 	 * handler too.
 	 */
-	regs->flags &= ~TF_MASK;
+	regs->flags &= ~(TF_MASK | X86_EFLAGS_DF);
 	if (test_thread_flag(TIF_SINGLESTEP))
 		ptrace_notify(SIGTRAP);
 
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 7347bb14e30..1c83e5124c6 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -295,7 +295,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	   see include/asm-x86_64/uaccess.h for details. */
 	set_fs(USER_DS);
 
-	regs->flags &= ~X86_EFLAGS_TF;
+	regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
 	if (test_thread_flag(TIF_SINGLESTEP))
 		ptrace_notify(SIGTRAP);
 #ifdef DEBUG_SIG
@@ -311,6 +311,35 @@ give_sigsegv:
 }
 
 /*
+ * Return -1L or the syscall number that @regs is executing.
+ */
+static long current_syscall(struct pt_regs *regs)
+{
+	/*
+	 * We always sign-extend a -1 value being set here,
+	 * so this is always either -1L or a syscall number.
+	 */
+	return regs->orig_ax;
+}
+
+/*
+ * Return a value that is -EFOO if the system call in @regs->orig_ax
+ * returned an error.  This only works for @regs from @current.
+ */
+static long current_syscall_ret(struct pt_regs *regs)
+{
+#ifdef CONFIG_IA32_EMULATION
+	if (test_thread_flag(TIF_IA32))
+		/*
+		 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
+		 * and will match correctly in comparisons.
+		 */
+		return (int) regs->ax;
+#endif
+	return regs->ax;
+}
+
+/*
  * OK, we're invoking a handler
  */	
 
@@ -327,9 +356,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 #endif
 
 	/* Are we from a system call? */
-	if ((long)regs->orig_ax >= 0) {
+	if (current_syscall(regs) >= 0) {
 		/* If so, check system call restarting.. */
-		switch (regs->ax) {
+		switch (current_syscall_ret(regs)) {
 		        case -ERESTART_RESTARTBLOCK:
 			case -ERESTARTNOHAND:
 				regs->ax = -EINTR;
@@ -426,10 +455,9 @@ static void do_signal(struct pt_regs *regs)
 	}
 
 	/* Did we come from a system call? */
-	if ((long)regs->orig_ax >= 0) {
+	if (current_syscall(regs) >= 0) {
 		/* Restart the system call - no handlers present */
-		long res = regs->ax;
-		switch (res) {
+		switch (current_syscall_ret(regs)) {
 		case -ERESTARTNOHAND:
 		case -ERESTARTSYS:
 		case -ERESTARTNOINTR:
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 2ef1a5f8d67..9d406cdc847 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -166,7 +166,7 @@ static void enable_step(struct task_struct *child, bool block)
 				  child->thread.debugctlmsr | DEBUGCTLMSR_BTF);
 	} else {
 	    write_debugctlmsr(child,
-			      child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR);
+			      child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF);
 
 	    if (!child->thread.debugctlmsr)
 		    clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
@@ -189,7 +189,7 @@ void user_disable_single_step(struct task_struct *child)
 	 * Make sure block stepping (BTF) is disabled.
 	 */
 	write_debugctlmsr(child,
-			  child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR);
+			  child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF);
 
 	if (!child->thread.debugctlmsr)
 		clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2cbee9479ce..68a6b151193 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -647,6 +647,10 @@ static void start_apic_timer(struct kvm_lapic *apic)
 	apic->timer.period = apic_get_reg(apic, APIC_TMICT) *
 		    APIC_BUS_CYCLE_NS * apic->timer.divide_count;
 	atomic_set(&apic->timer.pending, 0);
+
+	if (!apic->timer.period)
+		return;
+
 	hrtimer_start(&apic->timer.dev,
 		      ktime_add_ns(now, apic->timer.period),
 		      HRTIMER_MODE_ABS);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8efdcdbebb0..e55af12e11b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -222,8 +222,7 @@ static int is_io_pte(unsigned long pte)
 
 static int is_rmap_pte(u64 pte)
 {
-	return pte != shadow_trap_nonpresent_pte
-		&& pte != shadow_notrap_nonpresent_pte;
+	return is_shadow_present_pte(pte);
 }
 
 static gfn_t pse36_gfn_delta(u32 gpte)
@@ -681,8 +680,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 					     unsigned level,
 					     int metaphysical,
 					     unsigned access,
-					     u64 *parent_pte,
-					     bool *new_page)
+					     u64 *parent_pte)
 {
 	union kvm_mmu_page_role role;
 	unsigned index;
@@ -722,8 +720,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	vcpu->arch.mmu.prefetch_page(vcpu, sp);
 	if (!metaphysical)
 		rmap_write_protect(vcpu->kvm, gfn);
-	if (new_page)
-		*new_page = 1;
 	return sp;
 }
 
@@ -876,11 +872,18 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
 
 struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
 {
+	struct page *page;
+
 	gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
 
 	if (gpa == UNMAPPED_GVA)
 		return NULL;
-	return gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+
+	down_read(&current->mm->mmap_sem);
+	page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+	up_read(&current->mm->mmap_sem);
+
+	return page;
 }
 
 static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
@@ -889,14 +892,25 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 			 int *ptwrite, gfn_t gfn, struct page *page)
 {
 	u64 spte;
-	int was_rmapped = is_rmap_pte(*shadow_pte);
+	int was_rmapped = 0;
 	int was_writeble = is_writeble_pte(*shadow_pte);
+	hfn_t host_pfn = (*shadow_pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
 
 	pgprintk("%s: spte %llx access %x write_fault %d"
 		 " user_fault %d gfn %lx\n",
 		 __FUNCTION__, *shadow_pte, pt_access,
 		 write_fault, user_fault, gfn);
 
+	if (is_rmap_pte(*shadow_pte)) {
+		if (host_pfn != page_to_pfn(page)) {
+			pgprintk("hfn old %lx new %lx\n",
+				 host_pfn, page_to_pfn(page));
+			rmap_remove(vcpu->kvm, shadow_pte);
+		}
+		else
+			was_rmapped = 1;
+	}
+
 	/*
 	 * We don't set the accessed bit, since we sometimes want to see
 	 * whether the guest actually used the pte (in order to detect
@@ -999,8 +1013,7 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write,
 				>> PAGE_SHIFT;
 			new_table = kvm_mmu_get_page(vcpu, pseudo_gfn,
 						     v, level - 1,
-						     1, ACC_ALL, &table[index],
-						     NULL);
+						     1, ACC_ALL, &table[index]);
 			if (!new_table) {
 				pgprintk("nonpaging_map: ENOMEM\n");
 				kvm_release_page_clean(page);
@@ -1020,15 +1033,18 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 
 	struct page *page;
 
+	down_read(&vcpu->kvm->slots_lock);
+
 	down_read(&current->mm->mmap_sem);
 	page = gfn_to_page(vcpu->kvm, gfn);
+	up_read(&current->mm->mmap_sem);
 
 	spin_lock(&vcpu->kvm->mmu_lock);
 	kvm_mmu_free_some_pages(vcpu);
 	r = __nonpaging_map(vcpu, v, write, gfn, page);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 
 	return r;
 }
@@ -1090,7 +1106,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
 
 		ASSERT(!VALID_PAGE(root));
 		sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
-				      PT64_ROOT_LEVEL, 0, ACC_ALL, NULL, NULL);
+				      PT64_ROOT_LEVEL, 0, ACC_ALL, NULL);
 		root = __pa(sp->spt);
 		++sp->root_count;
 		vcpu->arch.mmu.root_hpa = root;
@@ -1111,7 +1127,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
 			root_gfn = 0;
 		sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
 				      PT32_ROOT_LEVEL, !is_paging(vcpu),
-				      ACC_ALL, NULL, NULL);
+				      ACC_ALL, NULL);
 		root = __pa(sp->spt);
 		++sp->root_count;
 		vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
@@ -1172,7 +1188,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
 
 static void paging_new_cr3(struct kvm_vcpu *vcpu)
 {
-	pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3);
+	pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->arch.cr3);
 	mmu_free_roots(vcpu);
 }
 
@@ -1362,6 +1378,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	gfn_t gfn;
 	int r;
 	u64 gpte = 0;
+	struct page *page;
 
 	if (bytes != 4 && bytes != 8)
 		return;
@@ -1389,8 +1406,13 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	if (!is_present_pte(gpte))
 		return;
 	gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
+
+	down_read(&current->mm->mmap_sem);
+	page = gfn_to_page(vcpu->kvm, gfn);
+	up_read(&current->mm->mmap_sem);
+
 	vcpu->arch.update_pte.gfn = gfn;
-	vcpu->arch.update_pte.page = gfn_to_page(vcpu->kvm, gfn);
+	vcpu->arch.update_pte.page = page;
 }
 
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
@@ -1496,9 +1518,9 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
 	gpa_t gpa;
 	int r;
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 
 	spin_lock(&vcpu->kvm->mmu_lock);
 	r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 03ba8608fe0..ecc0856268c 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -91,7 +91,10 @@ static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
 	pt_element_t *table;
 	struct page *page;
 
+	down_read(&current->mm->mmap_sem);
 	page = gfn_to_page(kvm, table_gfn);
+	up_read(&current->mm->mmap_sem);
+
 	table = kmap_atomic(page, KM_USER0);
 
 	ret = CMPXCHG(&table[index], orig_pte, new_pte);
@@ -140,7 +143,7 @@ walk:
 	}
 #endif
 	ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
-	       (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
+	       (vcpu->arch.cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
 
 	pt_access = ACC_ALL;
 
@@ -297,7 +300,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 		u64 shadow_pte;
 		int metaphysical;
 		gfn_t table_gfn;
-		bool new_page = 0;
 
 		shadow_ent = ((u64 *)__va(shadow_addr)) + index;
 		if (level == PT_PAGE_TABLE_LEVEL)
@@ -319,8 +321,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 		}
 		shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
 					       metaphysical, access,
-					       shadow_ent, &new_page);
-		if (new_page && !metaphysical) {
+					       shadow_ent);
+		if (!metaphysical) {
 			int r;
 			pt_element_t curr_pte;
 			r = kvm_read_guest_atomic(vcpu->kvm,
@@ -378,7 +380,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 	if (r)
 		return r;
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	/*
 	 * Look up the shadow pte for the faulting address.
 	 */
@@ -392,11 +394,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 		pgprintk("%s: guest page fault\n", __FUNCTION__);
 		inject_page_fault(vcpu, addr, walker.error_code);
 		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
-		up_read(&current->mm->mmap_sem);
+		up_read(&vcpu->kvm->slots_lock);
 		return 0;
 	}
 
+	down_read(&current->mm->mmap_sem);
 	page = gfn_to_page(vcpu->kvm, walker.gfn);
+	up_read(&current->mm->mmap_sem);
 
 	spin_lock(&vcpu->kvm->mmu_lock);
 	kvm_mmu_free_some_pages(vcpu);
@@ -413,14 +417,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 	 */
 	if (shadow_pte && is_io_pte(*shadow_pte)) {
 		spin_unlock(&vcpu->kvm->mmu_lock);
-		up_read(&current->mm->mmap_sem);
+		up_read(&vcpu->kvm->slots_lock);
 		return 1;
 	}
 
 	++vcpu->stat.pf_fixed;
 	kvm_mmu_audit(vcpu, "post page fault (fixed)");
 	spin_unlock(&vcpu->kvm->mmu_lock);
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 
 	return write_pt;
 }
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index de755cb1431..1a582f1090e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -792,6 +792,10 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	vcpu->arch.cr0 = cr0;
 	cr0 |= X86_CR0_PG | X86_CR0_WP;
 	cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
+	if (!vcpu->fpu_active) {
+		svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
+		cr0 |= X86_CR0_TS;
+	}
 	svm->vmcb->save.cr0 = cr0;
 }
 
@@ -1096,6 +1100,24 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
 	case MSR_IA32_SYSENTER_ESP:
 		*data = svm->vmcb->save.sysenter_esp;
 		break;
+	/* Nobody will change the following 5 values in the VMCB so
+	   we can safely return them on rdmsr. They will always be 0
+	   until LBRV is implemented. */
+	case MSR_IA32_DEBUGCTLMSR:
+		*data = svm->vmcb->save.dbgctl;
+		break;
+	case MSR_IA32_LASTBRANCHFROMIP:
+		*data = svm->vmcb->save.br_from;
+		break;
+	case MSR_IA32_LASTBRANCHTOIP:
+		*data = svm->vmcb->save.br_to;
+		break;
+	case MSR_IA32_LASTINTFROMIP:
+		*data = svm->vmcb->save.last_excp_from;
+		break;
+	case MSR_IA32_LASTINTTOIP:
+		*data = svm->vmcb->save.last_excp_to;
+		break;
 	default:
 		return kvm_get_msr_common(vcpu, ecx, data);
 	}
@@ -1156,6 +1178,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
 	case MSR_IA32_SYSENTER_ESP:
 		svm->vmcb->save.sysenter_esp = data;
 		break;
+	case MSR_IA32_DEBUGCTLMSR:
+		pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
+				__FUNCTION__, data);
+		break;
 	case MSR_K7_EVNTSEL0:
 	case MSR_K7_EVNTSEL1:
 	case MSR_K7_EVNTSEL2:
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ad36447e696..8e1462880d1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -349,8 +349,6 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 
 static void reload_tss(void)
 {
-#ifndef CONFIG_X86_64
-
 	/*
 	 * VT restores TR but not its size.  Useless.
 	 */
@@ -361,7 +359,6 @@ static void reload_tss(void)
 	descs = (void *)gdt.base;
 	descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
 	load_TR_desc();
-#endif
 }
 
 static void load_transition_efer(struct vcpu_vmx *vmx)
@@ -638,6 +635,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 {
 	int save_nmsrs;
 
+	vmx_load_host_state(vmx);
 	save_nmsrs = 0;
 #ifdef CONFIG_X86_64
 	if (is_long_mode(&vmx->vcpu)) {
@@ -1435,7 +1433,7 @@ static int init_rmode_tss(struct kvm *kvm)
 	int ret = 0;
 	int r;
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&kvm->slots_lock);
 	r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
 	if (r < 0)
 		goto out;
@@ -1458,7 +1456,7 @@ static int init_rmode_tss(struct kvm *kvm)
 
 	ret = 1;
 out:
-	up_read(&current->mm->mmap_sem);
+	up_read(&kvm->slots_lock);
 	return ret;
 }
 
@@ -1477,7 +1475,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
 	struct kvm_userspace_memory_region kvm_userspace_mem;
 	int r = 0;
 
-	down_write(&current->mm->mmap_sem);
+	down_write(&kvm->slots_lock);
 	if (kvm->arch.apic_access_page)
 		goto out;
 	kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
@@ -1487,9 +1485,12 @@ static int alloc_apic_access_page(struct kvm *kvm)
 	r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0);
 	if (r)
 		goto out;
+
+	down_read(&current->mm->mmap_sem);
 	kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
+	up_read(&current->mm->mmap_sem);
 out:
-	up_write(&current->mm->mmap_sem);
+	up_write(&kvm->slots_lock);
 	return r;
 }
 
@@ -1602,9 +1603,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
 	vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
 
-	if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
-		if (alloc_apic_access_page(vmx->vcpu.kvm) != 0)
-			return -ENOMEM;
 
 	return 0;
 }
@@ -2534,6 +2532,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	put_cpu();
 	if (err)
 		goto free_vmcs;
+	if (vm_need_virtualize_apic_accesses(kvm))
+		if (alloc_apic_access_page(kvm) != 0)
+			goto free_vmcs;
 
 	return &vmx->vcpu;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cf530814868..6b01552bd1f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -46,6 +46,9 @@
 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
+static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
+				    struct kvm_cpuid_entry2 __user *entries);
+
 struct kvm_x86_ops *kvm_x86_ops;
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
@@ -181,7 +184,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
 	int ret;
 	u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)];
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
 				  offset * sizeof(u64), sizeof(pdpte));
 	if (ret < 0) {
@@ -198,7 +201,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
 
 	memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs));
 out:
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 
 	return ret;
 }
@@ -212,13 +215,13 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu)
 	if (is_long_mode(vcpu) || !is_pae(vcpu))
 		return false;
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte));
 	if (r < 0)
 		goto out;
 	changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0;
 out:
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 
 	return changed;
 }
@@ -356,7 +359,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 		 */
 	}
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	/*
 	 * Does the new cr3 value map to physical memory? (Note, we
 	 * catch an invalid cr3 even in real-mode, because it would
@@ -372,7 +375,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 		vcpu->arch.cr3 = cr3;
 		vcpu->arch.mmu.new_cr3(vcpu);
 	}
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 }
 EXPORT_SYMBOL_GPL(set_cr3);
 
@@ -484,6 +487,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
 			__FUNCTION__, data);
 		break;
+	case MSR_IA32_MCG_CTL:
+		pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n",
+			__FUNCTION__, data);
+		break;
 	case MSR_IA32_UCODE_REV:
 	case MSR_IA32_UCODE_WRITE:
 	case 0x200 ... 0x2ff: /* MTRRs */
@@ -526,6 +533,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case MSR_IA32_MC0_CTL:
 	case MSR_IA32_MCG_STATUS:
 	case MSR_IA32_MCG_CAP:
+	case MSR_IA32_MCG_CTL:
 	case MSR_IA32_MC0_MISC:
 	case MSR_IA32_MC0_MISC+4:
 	case MSR_IA32_MC0_MISC+8:
@@ -727,6 +735,24 @@ long kvm_arch_dev_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_GET_SUPPORTED_CPUID: {
+		struct kvm_cpuid2 __user *cpuid_arg = argp;
+		struct kvm_cpuid2 cpuid;
+
+		r = -EFAULT;
+		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
+			goto out;
+		r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
+			cpuid_arg->entries);
+		if (r)
+			goto out;
+
+		r = -EFAULT;
+		if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
+			goto out;
+		r = 0;
+		break;
+	}
 	default:
 		r = -EINVAL;
 	}
@@ -974,8 +1000,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	put_cpu();
 }
 
-static int kvm_vm_ioctl_get_supported_cpuid(struct kvm *kvm,
-				    struct kvm_cpuid2 *cpuid,
+static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 				    struct kvm_cpuid_entry2 __user *entries)
 {
 	struct kvm_cpuid_entry2 *cpuid_entries;
@@ -1207,12 +1232,12 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
 	if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
 		return -EINVAL;
 
-	down_write(&current->mm->mmap_sem);
+	down_write(&kvm->slots_lock);
 
 	kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
 	kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
 
-	up_write(&current->mm->mmap_sem);
+	up_write(&kvm->slots_lock);
 	return 0;
 }
 
@@ -1261,7 +1286,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
 	    < alias->target_phys_addr)
 		goto out;
 
-	down_write(&current->mm->mmap_sem);
+	down_write(&kvm->slots_lock);
 
 	p = &kvm->arch.aliases[alias->slot];
 	p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
@@ -1275,7 +1300,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
 
 	kvm_mmu_zap_all(kvm);
 
-	up_write(&current->mm->mmap_sem);
+	up_write(&kvm->slots_lock);
 
 	return 0;
 
@@ -1351,7 +1376,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	struct kvm_memory_slot *memslot;
 	int is_dirty = 0;
 
-	down_write(&current->mm->mmap_sem);
+	down_write(&kvm->slots_lock);
 
 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
 	if (r)
@@ -1367,7 +1392,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	}
 	r = 0;
 out:
-	up_write(&current->mm->mmap_sem);
+	up_write(&kvm->slots_lock);
 	return r;
 }
 
@@ -1487,24 +1512,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
-	case KVM_GET_SUPPORTED_CPUID: {
-		struct kvm_cpuid2 __user *cpuid_arg = argp;
-		struct kvm_cpuid2 cpuid;
-
-		r = -EFAULT;
-		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
-			goto out;
-		r = kvm_vm_ioctl_get_supported_cpuid(kvm, &cpuid,
-			cpuid_arg->entries);
-		if (r)
-			goto out;
-
-		r = -EFAULT;
-		if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
-			goto out;
-		r = 0;
-		break;
-	}
 	default:
 		;
 	}
@@ -1563,7 +1570,7 @@ int emulator_read_std(unsigned long addr,
 	void *data = val;
 	int r = X86EMUL_CONTINUE;
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	while (bytes) {
 		gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
 		unsigned offset = addr & (PAGE_SIZE-1);
@@ -1585,7 +1592,7 @@ int emulator_read_std(unsigned long addr,
 		addr += tocopy;
 	}
 out:
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 	return r;
 }
 EXPORT_SYMBOL_GPL(emulator_read_std);
@@ -1604,9 +1611,9 @@ static int emulator_read_emulated(unsigned long addr,
 		return X86EMUL_CONTINUE;
 	}
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 
 	/* For APIC access vmexit */
 	if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@ -1644,14 +1651,14 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 {
 	int ret;
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
 	if (ret < 0) {
-		up_read(&current->mm->mmap_sem);
+		up_read(&vcpu->kvm->slots_lock);
 		return 0;
 	}
 	kvm_mmu_pte_write(vcpu, gpa, val, bytes);
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 	return 1;
 }
 
@@ -1663,9 +1670,9 @@ static int emulator_write_emulated_onepage(unsigned long addr,
 	struct kvm_io_device *mmio_dev;
 	gpa_t                 gpa;
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 
 	if (gpa == UNMAPPED_GVA) {
 		kvm_inject_page_fault(vcpu, addr, 2);
@@ -1742,7 +1749,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
 		char *kaddr;
 		u64 val;
 
-		down_read(&current->mm->mmap_sem);
+		down_read(&vcpu->kvm->slots_lock);
 		gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
 
 		if (gpa == UNMAPPED_GVA ||
@@ -1753,13 +1760,17 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
 			goto emul_write;
 
 		val = *(u64 *)new;
+
+		down_read(&current->mm->mmap_sem);
 		page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+		up_read(&current->mm->mmap_sem);
+
 		kaddr = kmap_atomic(page, KM_USER0);
 		set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
 		kunmap_atomic(kaddr, KM_USER0);
 		kvm_release_page_dirty(page);
 	emul_write:
-		up_read(&current->mm->mmap_sem);
+		up_read(&vcpu->kvm->slots_lock);
 	}
 #endif
 
@@ -2152,10 +2163,10 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 		kvm_x86_ops->skip_emulated_instruction(vcpu);
 
 	for (i = 0; i < nr_pages; ++i) {
-		down_read(&current->mm->mmap_sem);
+		down_read(&vcpu->kvm->slots_lock);
 		page = gva_to_page(vcpu, address + i * PAGE_SIZE);
 		vcpu->arch.pio.guest_pages[i] = page;
-		up_read(&current->mm->mmap_sem);
+		up_read(&vcpu->kvm->slots_lock);
 		if (!page) {
 			kvm_inject_gp(vcpu, 0);
 			free_pio_guest_pages(vcpu);
@@ -2478,8 +2489,9 @@ static void vapic_enter(struct kvm_vcpu *vcpu)
 
 	down_read(&current->mm->mmap_sem);
 	page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
-	vcpu->arch.apic->vapic_page = page;
 	up_read(&current->mm->mmap_sem);
+
+	vcpu->arch.apic->vapic_page = page;
 }
 
 static void vapic_exit(struct kvm_vcpu *vcpu)
@@ -2861,8 +2873,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	kvm_x86_ops->decache_cr4_guest_bits(vcpu);
 
 	mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0;
-	vcpu->arch.cr0 = sregs->cr0;
 	kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
+	vcpu->arch.cr0 = sregs->cr0;
 
 	mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4;
 	kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
@@ -2952,9 +2964,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 	gpa_t gpa;
 
 	vcpu_load(vcpu);
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr);
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 	tr->physical_address = gpa;
 	tr->valid = gpa != UNMAPPED_GVA;
 	tr->writeable = 1;
@@ -3227,11 +3239,13 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 	 */
 	if (!user_alloc) {
 		if (npages && !old.rmap) {
+			down_write(&current->mm->mmap_sem);
 			memslot->userspace_addr = do_mmap(NULL, 0,
 						     npages * PAGE_SIZE,
 						     PROT_READ | PROT_WRITE,
 						     MAP_SHARED | MAP_ANONYMOUS,
 						     0);
+			up_write(&current->mm->mmap_sem);
 
 			if (IS_ERR((void *)memslot->userspace_addr))
 				return PTR_ERR((void *)memslot->userspace_addr);
@@ -3239,8 +3253,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 			if (!old.user_alloc && old.rmap) {
 				int ret;
 
+				down_write(&current->mm->mmap_sem);
 				ret = do_munmap(current->mm, old.userspace_addr,
 						old.npages * PAGE_SIZE);
+				up_write(&current->mm->mmap_sem);
 				if (ret < 0)
 					printk(KERN_WARNING
 				       "kvm_vm_ioctl_set_memory_region: "
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index cccb38a5965..a104c532ff7 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -84,7 +84,6 @@ struct lguest_data lguest_data = {
 	.blocked_interrupts = { 1 }, /* Block timer interrupts */
 	.syscall_vec = SYSCALL_VECTOR,
 };
-static cycle_t clock_base;
 
 /*G:037 async_hcall() is pretty simple: I'm quite proud of it really.  We have a
  * ring buffer of stored hypercalls which the Host will run though next time we
@@ -327,8 +326,8 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
 	case 1:	/* Basic feature request. */
 		/* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */
 		*cx &= 0x00002201;
-		/* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */
-		*dx &= 0x07808101;
+		/* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU. */
+		*dx &= 0x07808111;
 		/* The Host can do a nice optimization if it knows that the
 		 * kernel mappings (addresses above 0xC0000000 or whatever
 		 * PAGE_OFFSET is set to) haven't changed.  But Linux calls
@@ -481,7 +480,7 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
 {
 	*pmdp = pmdval;
 	lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK,
-		   (__pa(pmdp)&(PAGE_SIZE-1)), 0);
+		   (__pa(pmdp)&(PAGE_SIZE-1))/4, 0);
 }
 
 /* There are a couple of legacy places where the kernel sets a PTE, but we
@@ -595,19 +594,25 @@ static unsigned long lguest_get_wallclock(void)
 	return lguest_data.time.tv_sec;
 }
 
+/* The TSC is a Time Stamp Counter.  The Host tells us what speed it runs at,
+ * or 0 if it's unusable as a reliable clock source.  This matches what we want
+ * here: if we return 0 from this function, the x86 TSC clock will not register
+ * itself. */
+static unsigned long lguest_cpu_khz(void)
+{
+	return lguest_data.tsc_khz;
+}
+
+/* If we can't use the TSC, the kernel falls back to our "lguest_clock", where
+ * we read the time value given to us by the Host. */
 static cycle_t lguest_clock_read(void)
 {
 	unsigned long sec, nsec;
 
-	/* If the Host tells the TSC speed, we can trust that. */
-	if (lguest_data.tsc_khz)
-		return native_read_tsc();
-
-	/* If we can't use the TSC, we read the time value written by the Host.
-	 * Since it's in two parts (seconds and nanoseconds), we risk reading
-	 * it just as it's changing from 99 & 0.999999999 to 100 and 0, and
-	 * getting 99 and 0.  As Linux tends to come apart under the stress of
-	 * time travel, we must be careful: */
+	/* Since the time is in two parts (seconds and nanoseconds), we risk
+	 * reading it just as it's changing from 99 & 0.999999999 to 100 and 0,
+	 * and getting 99 and 0.  As Linux tends to come apart under the stress
+	 * of time travel, we must be careful: */
 	do {
 		/* First we read the seconds part. */
 		sec = lguest_data.time.tv_sec;
@@ -622,14 +627,14 @@ static cycle_t lguest_clock_read(void)
 		/* Now if the seconds part has changed, try again. */
 	} while (unlikely(lguest_data.time.tv_sec != sec));
 
-	/* Our non-TSC clock is in real nanoseconds. */
+	/* Our lguest clock is in real nanoseconds. */
 	return sec*1000000000ULL + nsec;
 }
 
-/* This is what we tell the kernel is our clocksource.  */
+/* This is the fallback clocksource: lower priority than the TSC clocksource. */
 static struct clocksource lguest_clock = {
 	.name		= "lguest",
-	.rating		= 400,
+	.rating		= 200,
 	.read		= lguest_clock_read,
 	.mask		= CLOCKSOURCE_MASK(64),
 	.mult		= 1 << 22,
@@ -637,12 +642,6 @@ static struct clocksource lguest_clock = {
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-/* The "scheduler clock" is just our real clock, adjusted to start at zero */
-static unsigned long long lguest_sched_clock(void)
-{
-	return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base);
-}
-
 /* We also need a "struct clock_event_device": Linux asks us to set it to go
  * off some time in the future.  Actually, James Morris figured all this out, I
  * just applied the patch. */
@@ -712,19 +711,8 @@ static void lguest_time_init(void)
 	/* Set up the timer interrupt (0) to go to our simple timer routine */
 	set_irq_handler(0, lguest_time_irq);
 
-	/* Our clock structure looks like arch/x86/kernel/tsc_32.c if we can
-	 * use the TSC, otherwise it's a dumb nanosecond-resolution clock.
-	 * Either way, the "rating" is set so high that it's always chosen over
-	 * any other clocksource. */
-	if (lguest_data.tsc_khz)
-		lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz,
-							 lguest_clock.shift);
-	clock_base = lguest_clock_read();
 	clocksource_register(&lguest_clock);
 
-	/* Now we've set up our clock, we can use it as the scheduler clock */
-	pv_time_ops.sched_clock = lguest_sched_clock;
-
 	/* We can't set cpumask in the initializer: damn C limitations!  Set it
 	 * here and register our timer device. */
 	lguest_clockevent.cpumask = cpumask_of_cpu(0);
@@ -995,6 +983,7 @@ __init void lguest_init(void)
 	/* time operations */
 	pv_time_ops.get_wallclock = lguest_get_wallclock;
 	pv_time_ops.time_init = lguest_time_init;
+	pv_time_ops.get_cpu_khz = lguest_cpu_khz;
 
 	/* Now is a good time to look at the implementations of these functions
 	 * before returning to the rest of lguest_init(). */
diff --git a/arch/x86/mach-visws/traps.c b/arch/x86/mach-visws/traps.c
index 843b67acf43..bfac6ba10f8 100644
--- a/arch/x86/mach-visws/traps.c
+++ b/arch/x86/mach-visws/traps.c
@@ -46,8 +46,9 @@ static __init void cobalt_init(void)
 	 */
 	set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE);
 	setup_local_APIC();
-	printk(KERN_INFO "Local APIC Version %#lx, ID %#lx\n",
-		apic_read(APIC_LVR), apic_read(APIC_ID));
+	printk(KERN_INFO "Local APIC Version %#x, ID %#x\n",
+		(unsigned int)apic_read(APIC_LVR),
+		(unsigned int)apic_read(APIC_ID));
 
 	set_fixmap(FIX_CO_CPU, CO_CPU_PHYS);
 	set_fixmap(FIX_CO_APIC, CO_APIC_PHYS);
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index c394ca0720b..8e25e06ff73 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -324,7 +324,6 @@ unsigned long __init setup_memory(void)
 	 * this space and use it to adjust the boundary between ZONE_NORMAL
 	 * and ZONE_HIGHMEM.
 	 */
-	find_max_pfn();
 	get_memcfg_numa();
 
 	kva_pages = calculate_numa_remap_pages();
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index ac3c959e271..794895c6dcc 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -106,7 +106,7 @@ static int ioremap_change_attr(unsigned long vaddr, unsigned long size,
  * have to convert them into an offset in a page-aligned mapping, but the
  * caller shouldn't need to know that small detail.
  */
-static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
+static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
 			       enum ioremap_mode mode)
 {
 	unsigned long pfn, offset, last_addr, vaddr;
@@ -134,12 +134,14 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 			return NULL;
 	}
 
-	WARN_ON_ONCE(page_is_ram(pfn));
-
 	switch (mode) {
 	case IOR_MODE_UNCACHED:
 	default:
-		prot = PAGE_KERNEL_NOCACHE;
+		/*
+		 * FIXME: we will use UC MINUS for now, as video fb drivers
+		 * depend on it. Upcoming ioremap_wc() will fix this behavior.
+		 */
+		prot = PAGE_KERNEL_UC_MINUS;
 		break;
 	case IOR_MODE_CACHED:
 		prot = PAGE_KERNEL;
@@ -195,13 +197,13 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
  *
  * Must be freed with iounmap.
  */
-void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size)
+void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
 {
 	return __ioremap(phys_addr, size, IOR_MODE_UNCACHED);
 }
 EXPORT_SYMBOL(ioremap_nocache);
 
-void __iomem *ioremap_cache(unsigned long phys_addr, unsigned long size)
+void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
 {
 	return __ioremap(phys_addr, size, IOR_MODE_CACHED);
 }
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 59898fb0a4a..16b82ad34b9 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -221,8 +221,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
 				 bootmap_pages<<PAGE_SHIFT, PAGE_SIZE);
 	if (bootmap == NULL)  {
 		if (nodedata_phys < start || nodedata_phys >= end)
-			free_bootmem((unsigned long)node_data[nodeid],
-				     pgdat_size);
+			free_bootmem(nodedata_phys, pgdat_size);
 		node_data[nodeid] = NULL;
 		return;
 	}
@@ -622,13 +621,17 @@ void __init init_cpu_to_node(void)
 	int i;
 
 	for (i = 0; i < NR_CPUS; i++) {
+		int node;
 		u16 apicid = x86_cpu_to_apicid_init[i];
 
 		if (apicid == BAD_APICID)
 			continue;
-		if (apicid_to_node[apicid] == NUMA_NO_NODE)
+		node = apicid_to_node[apicid];
+		if (node == NUMA_NO_NODE)
 			continue;
-		numa_set_node(i, apicid_to_node[apicid]);
+		if (!node_online(node))
+			continue;
+		numa_set_node(i, node);
 	}
 }
 
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 7049294fb46..7b79f6be4e7 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -26,7 +26,6 @@ struct cpa_data {
 	pgprot_t	mask_set;
 	pgprot_t	mask_clr;
 	int		numpages;
-	int		processed;
 	int		flushtlb;
 	unsigned long	pfn;
 };
@@ -291,8 +290,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 	 */
 	nextpage_addr = (address + psize) & pmask;
 	numpages = (nextpage_addr - address) >> PAGE_SHIFT;
-	if (numpages < cpa->processed)
-		cpa->processed = numpages;
+	if (numpages < cpa->numpages)
+		cpa->numpages = numpages;
 
 	/*
 	 * We are safe now. Check whether the new pgprot is the same:
@@ -319,7 +318,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 	 */
 	addr = address + PAGE_SIZE;
 	pfn++;
-	for (i = 1; i < cpa->processed; i++, addr += PAGE_SIZE, pfn++) {
+	for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) {
 		pgprot_t chk_prot = static_protections(new_prot, addr, pfn);
 
 		if (pgprot_val(chk_prot) != pgprot_val(new_prot))
@@ -343,7 +342,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 	 * that we limited the number of possible pages already to
 	 * the number of pages in the large page.
 	 */
-	if (address == (nextpage_addr - psize) && cpa->processed == numpages) {
+	if (address == (nextpage_addr - psize) && cpa->numpages == numpages) {
 		/*
 		 * The address is aligned and the number of pages
 		 * covers the full page.
@@ -573,7 +572,7 @@ repeat:
 			set_pte_atomic(kpte, new_pte);
 			cpa->flushtlb = 1;
 		}
-		cpa->processed = 1;
+		cpa->numpages = 1;
 		return 0;
 	}
 
@@ -584,7 +583,7 @@ repeat:
 	do_split = try_preserve_large_page(kpte, address, cpa);
 	/*
 	 * When the range fits into the existing large page,
-	 * return. cp->processed and cpa->tlbflush have been updated in
+	 * return. cp->numpages and cpa->tlbflush have been updated in
 	 * try_large_page:
 	 */
 	if (do_split <= 0)
@@ -663,7 +662,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
 		 * Store the remaining nr of pages for the large page
 		 * preservation check.
 		 */
-		cpa->numpages = cpa->processed = numpages;
+		cpa->numpages = numpages;
 
 		ret = __change_page_attr(cpa, checkalias);
 		if (ret)
@@ -680,9 +679,9 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
 		 * CPA operation. Either a large page has been
 		 * preserved or a single page update happened.
 		 */
-		BUG_ON(cpa->processed > numpages);
-		numpages -= cpa->processed;
-		cpa->vaddr += cpa->processed * PAGE_SIZE;
+		BUG_ON(cpa->numpages > numpages);
+		numpages -= cpa->numpages;
+		cpa->vaddr += cpa->numpages * PAGE_SIZE;
 	}
 	return 0;
 }
@@ -772,7 +771,7 @@ static inline int change_page_attr_clear(unsigned long addr, int numpages,
 int set_memory_uc(unsigned long addr, int numpages)
 {
 	return change_page_attr_set(addr, numpages,
-				    __pgprot(_PAGE_PCD | _PAGE_PWT));
+				    __pgprot(_PAGE_PCD));
 }
 EXPORT_SYMBOL(set_memory_uc);
 
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 73aba712520..2f9e9afcb9f 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -342,12 +342,16 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
 
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor);
+	pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 
-	mm->pgd = pgd;		/* so that alloc_pd can use it */
+	/* so that alloc_pd can use it */
+	mm->pgd = pgd;
+	if (pgd)
+		pgd_ctor(pgd);
 
 	if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
-		quicklist_free(0, pgd_dtor, pgd);
+		pgd_dtor(pgd);
+		free_page((unsigned long)pgd);
 		pgd = NULL;
 	}
 
@@ -357,12 +361,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
 	pgd_mop_up_pmds(mm, pgd);
-	quicklist_free(0, pgd_dtor, pgd);
-}
-
-void check_pgt_cache(void)
-{
-	quicklist_trim(0, pgd_dtor, 25, 16);
+	pgd_dtor(pgd);
+	free_page((unsigned long)pgd);
 }
 
 void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index 10ac8c316c4..2f7109ac4c1 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -198,6 +198,11 @@ static int pci_bios_read(unsigned int seg, unsigned int bus,
 			  "b" (bx),
 			  "D" ((long)reg),
 			  "S" (&pci_indirect));
+		/*
+		 * Zero-extend the result beyond 8 bits, do not trust the
+		 * BIOS having done it:
+		 */
+		*value &= 0xff;
 		break;
 	case 2:
 		__asm__("lcall *(%%esi); cld\n\t"
@@ -210,6 +215,11 @@ static int pci_bios_read(unsigned int seg, unsigned int bus,
 			  "b" (bx),
 			  "D" ((long)reg),
 			  "S" (&pci_indirect));
+		/*
+		 * Zero-extend the result beyond 16 bits, do not trust the
+		 * BIOS having done it:
+		 */
+		*value &= 0xffff;
 		break;
 	case 4:
 		__asm__("lcall *(%%esi); cld\n\t"
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 3bad4773a2f..2341492bf7a 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -38,7 +38,8 @@ char * __init xen_memory_setup(void)
 	unsigned long max_pfn = xen_start_info->nr_pages;
 
 	e820.nr_map = 0;
-	add_memory_region(0, PFN_PHYS(max_pfn), E820_RAM);
+	add_memory_region(0, LOWMEMSIZE(), E820_RAM);
+	add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM);
 
 	return "Xen";
 }