28 files changed, 469 insertions, 318 deletions
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt
index 842f0d1ab21..f2c0a684293 100644
--- a/Documentation/cpusets.txt
+++ b/Documentation/cpusets.txt
@@ -557,6 +557,9 @@ Set some flags:
 Add some cpus:
 # /bin/echo 0-7 > cpus
 
+Add some mems:
+# /bin/echo 0-7 > mems
+
 Now attach your shell to this cpuset:
 # /bin/echo $$ > tasks
 
diff --git a/arch/i386/boot/video.S b/arch/i386/boot/video.S
index 2c5b5cc55f7..8143c9516cb 100644
--- a/arch/i386/boot/video.S
+++ b/arch/i386/boot/video.S
@@ -571,6 +571,16 @@ setr1:	lodsw
 	jmp	_m_s
 
 check_vesa:
+#ifdef CONFIG_FIRMWARE_EDID
+	leaw	modelist+1024, %di
+	movw	$0x4f00, %ax
+	int	$0x10
+	cmpw	$0x004f, %ax
+	jnz	setbad
+
+	movw	4(%di), %ax
+	movw	%ax, vbe_version
+#endif
 	leaw	modelist+1024, %di
 	subb	$VIDEO_FIRST_VESA>>8, %bh
 	movw	%bx, %cx			# Get mode information structure
@@ -1945,6 +1955,9 @@ store_edid:
 	rep
 	stosl
 
+	cmpw	$0x0200, vbe_version		# only do EDID on >= VBE2.0
+	jl	no_edid
+
 	pushw   %es				# save ES
 	xorw    %di, %di                        # Report Capability
 	pushw   %di
@@ -1987,6 +2000,7 @@ do_restore:	.byte	0	# Screen contents altered during mode change
 svga_prefix:	.byte	VIDEO_FIRST_BIOS>>8	# Default prefix for BIOS modes
 graphic_mode:	.byte	0	# Graphic mode with a linear frame buffer
 dac_size:	.byte	6	# DAC bit depth
+vbe_version:	.word	0	# VBE bios version
 
 # Status messages
 keymsg:		.ascii	"Press <RETURN> to see video modes available, "
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index e88415282a6..93aa911646a 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -272,32 +272,6 @@ static void __devinit setup_APIC_timer(void)
 }
 
 /*
- * Detect systems with known broken BIOS implementations
- */
-static int __init lapic_check_broken_bios(struct dmi_system_id *d)
-{
-	printk(KERN_NOTICE "%s detected: disabling lapic timer.\n",
-		       d->ident);
-	local_apic_timer_disabled = 1;
-	return 0;
-}
-
-static struct dmi_system_id __initdata broken_bios_dmi_table[] = {
-	{
-		/*
-		 * BIOS exports only C1 state, but uses deeper power
-		 * modes behind the kernels back.
-		 */
-		  .callback = lapic_check_broken_bios,
-		  .ident = "HP nx6325",
-		  .matches = {
-			DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"),
-		  },
-	 },
-	 {}
-};
-
-/*
  * In this functions we calibrate APIC bus clocks to the external timer.
  *
  * We want to do the calibration only once since we want to have local timer
@@ -372,12 +346,12 @@ void __init setup_boot_APIC_clock(void)
 	long delta, deltapm;
 	int pm_referenced = 0;
 
-	/* Detect know broken systems */
-	dmi_check_system(broken_bios_dmi_table);
+	if (boot_cpu_has(X86_FEATURE_LAPIC_TIMER_BROKEN))
+		local_apic_timer_disabled = 1;
 
 	/*
 	 * The local apic timer can be disabled via the kernel
-	 * commandline or from the dmi quirk above. Register the lapic
+	 * commandline or from the test above. Register the lapic
 	 * timer as a dummy clock event source on SMP systems, so the
 	 * broadcast mechanism is used. On UP systems simply ignore it.
 	 */
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index 41cfea57232..2d47db48297 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -22,6 +22,37 @@
 extern void vide(void);
 __asm__(".align 4\nvide: ret");
 
+#define ENABLE_C1E_MASK         0x18000000
+#define CPUID_PROCESSOR_SIGNATURE       1
+#define CPUID_XFAM              0x0ff00000
+#define CPUID_XFAM_K8           0x00000000
+#define CPUID_XFAM_10H          0x00100000
+#define CPUID_XFAM_11H          0x00200000
+#define CPUID_XMOD              0x000f0000
+#define CPUID_XMOD_REV_F        0x00040000
+
+/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */
+static __cpuinit int amd_apic_timer_broken(void)
+{
+	u32 lo, hi;
+	u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
+	switch (eax & CPUID_XFAM) {
+	case CPUID_XFAM_K8:
+		if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F)
+			break;
+	case CPUID_XFAM_10H:
+	case CPUID_XFAM_11H:
+		rdmsr(MSR_K8_ENABLE_C1E, lo, hi);
+		if (lo & ENABLE_C1E_MASK)
+			return 1;
+                break;
+        default:
+                /* err on the side of caution */
+		return 1;
+        }
+	return 0;
+}
+
 static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
@@ -241,6 +272,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 
 	if (cpuid_eax(0x80000000) >= 0x80000006)
 		num_cache_leaves = 3;
+
+	if (amd_apic_timer_broken())
+		set_bit(X86_FEATURE_LAPIC_TIMER_BROKEN, c->x86_capability);
 }
 
 static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c
index b8f16633a6e..cbe7ec8dbb9 100644
--- a/arch/i386/kernel/microcode.c
+++ b/arch/i386/kernel/microcode.c
@@ -567,6 +567,53 @@ static int cpu_request_microcode(int cpu)
 	return error;
 }
 
+static int apply_microcode_on_cpu(int cpu)
+{
+	struct cpuinfo_x86 *c = cpu_data + cpu;
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	cpumask_t old;
+	unsigned int val[2];
+	int err = 0;
+
+	if (!uci->mc)
+		return -EINVAL;
+
+	old = current->cpus_allowed;
+	set_cpus_allowed(current, cpumask_of_cpu(cpu));
+
+	/* Check if the microcode we have in memory matches the CPU */
+	if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
+	    cpu_has(c, X86_FEATURE_IA64) || uci->sig != cpuid_eax(0x00000001))
+		err = -EINVAL;
+
+	if (!err && ((c->x86_model >= 5) || (c->x86 > 6))) {
+		/* get processor flags from MSR 0x17 */
+		rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
+		if (uci->pf != (1 << ((val[1] >> 18) & 7)))
+			err = -EINVAL;
+	}
+
+	if (!err) {
+		wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+		/* see notes above for revision 1.07.  Apparent chip bug */
+		sync_core();
+		/* get the current revision from MSR 0x8B */
+		rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
+		if (uci->rev != val[1])
+			err = -EINVAL;
+	}
+
+	if (!err)
+		apply_microcode(cpu);
+	else
+		printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:"
+			" sig=0x%x, pf=0x%x, rev=0x%x\n",
+			cpu, uci->sig, uci->pf, uci->rev);
+
+	set_cpus_allowed(current, old);
+	return err;
+}
+
 static void microcode_init_cpu(int cpu)
 {
 	cpumask_t old;
@@ -577,7 +624,8 @@ static void microcode_init_cpu(int cpu)
 	set_cpus_allowed(current, cpumask_of_cpu(cpu));
 	mutex_lock(&microcode_mutex);
 	collect_cpu_info(cpu);
-	if (uci->valid && system_state == SYSTEM_RUNNING)
+	if (uci->valid && system_state == SYSTEM_RUNNING &&
+	    !suspend_cpu_hotplug)
 		cpu_request_microcode(cpu);
 	mutex_unlock(&microcode_mutex);
 	set_cpus_allowed(current, old);
@@ -663,13 +711,24 @@ static int mc_sysdev_add(struct sys_device *sys_dev)
 		return 0;
 
 	pr_debug("Microcode:CPU %d added\n", cpu);
-	memset(uci, 0, sizeof(*uci));
+	/* If suspend_cpu_hotplug is set, the system is resuming and we should
+	 * use the data from before the suspend.
+	 */
+	if (suspend_cpu_hotplug) {
+		err = apply_microcode_on_cpu(cpu);
+		if (err)
+			microcode_fini_cpu(cpu);
+	}
+	if (!uci->valid)
+		memset(uci, 0, sizeof(*uci));
 
 	err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
 	if (err)
 		return err;
 
-	microcode_init_cpu(cpu);
+	if (!uci->valid)
+		microcode_init_cpu(cpu);
+
 	return 0;
 }
 
@@ -680,7 +739,11 @@ static int mc_sysdev_remove(struct sys_device *sys_dev)
 	if (!cpu_online(cpu))
 		return 0;
 	pr_debug("Microcode:CPU %d removed\n", cpu);
-	microcode_fini_cpu(cpu);
+	/* If suspend_cpu_hotplug is set, the system is suspending and we should
+	 * keep the microcode in memory for the resume.
+	 */
+	if (!suspend_cpu_hotplug)
+		microcode_fini_cpu(cpu);
 	sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
 	return 0;
 }
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index 14702427b10..a98ba88a8c0 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -122,64 +122,129 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
 /* checks for a bit availability (hack for oprofile) */
 int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
 {
+	int cpu;
 	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
-	return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
+	for_each_possible_cpu (cpu) {
+		if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
+			return 0;
+	}
+	return 1;
 }
 
 /* checks the an msr for availability */
 int avail_to_resrv_perfctr_nmi(unsigned int msr)
 {
 	unsigned int counter;
+	int cpu;
 
 	counter = nmi_perfctr_msr_to_bit(msr);
 	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
 
-	return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
+	for_each_possible_cpu (cpu) {
+		if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
+			return 0;
+	}
+	return 1;
 }
 
-int reserve_perfctr_nmi(unsigned int msr)
+static int __reserve_perfctr_nmi(int cpu, unsigned int msr)
 {
 	unsigned int counter;
+	if (cpu < 0)
+		cpu = smp_processor_id();
 
 	counter = nmi_perfctr_msr_to_bit(msr);
 	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
 
-	if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
+	if (!test_and_set_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
 		return 1;
 	return 0;
 }
 
-void release_perfctr_nmi(unsigned int msr)
+static void __release_perfctr_nmi(int cpu, unsigned int msr)
 {
 	unsigned int counter;
+	if (cpu < 0)
+		cpu = smp_processor_id();
 
 	counter = nmi_perfctr_msr_to_bit(msr);
 	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
 
-	clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
+	clear_bit(counter, &per_cpu(perfctr_nmi_owner, cpu));
 }
 
-int reserve_evntsel_nmi(unsigned int msr)
+int reserve_perfctr_nmi(unsigned int msr)
+{
+	int cpu, i;
+	for_each_possible_cpu (cpu) {
+		if (!__reserve_perfctr_nmi(cpu, msr)) {
+			for_each_possible_cpu (i) {
+				if (i >= cpu)
+					break;
+				__release_perfctr_nmi(i, msr);
+			}
+			return 0;
+		}
+	}
+	return 1;
+}
+
+void release_perfctr_nmi(unsigned int msr)
+{
+	int cpu;
+	for_each_possible_cpu (cpu) {
+		__release_perfctr_nmi(cpu, msr);
+	}
+}
+
+int __reserve_evntsel_nmi(int cpu, unsigned int msr)
 {
 	unsigned int counter;
+	if (cpu < 0)
+		cpu = smp_processor_id();
 
 	counter = nmi_evntsel_msr_to_bit(msr);
 	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
 
-	if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]))
+	if (!test_and_set_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]))
 		return 1;
 	return 0;
 }
 
-void release_evntsel_nmi(unsigned int msr)
+static void __release_evntsel_nmi(int cpu, unsigned int msr)
 {
 	unsigned int counter;
+	if (cpu < 0)
+		cpu = smp_processor_id();
 
 	counter = nmi_evntsel_msr_to_bit(msr);
 	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
 
-	clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]);
+	clear_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]);
+}
+
+int reserve_evntsel_nmi(unsigned int msr)
+{
+	int cpu, i;
+	for_each_possible_cpu (cpu) {
+		if (!__reserve_evntsel_nmi(cpu, msr)) {
+			for_each_possible_cpu (i) {
+				if (i >= cpu)
+					break;
+				__release_evntsel_nmi(i, msr);
+			}
+			return 0;
+		}
+	}
+	return 1;
+}
+
+void release_evntsel_nmi(unsigned int msr)
+{
+	int cpu;
+	for_each_possible_cpu (cpu) {
+		__release_evntsel_nmi(cpu, msr);
+	}
 }
 
 static __cpuinit inline int nmi_known_cpu(void)
@@ -263,7 +328,7 @@ static int __init check_nmi_watchdog(void)
 	for_each_possible_cpu(cpu)
 		prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
 	local_irq_enable();
-	mdelay((10*1000)/nmi_hz); // wait 10 ticks
+	mdelay((20*1000)/nmi_hz); // wait 20 ticks
 
 	for_each_possible_cpu(cpu) {
 #ifdef CONFIG_SMP
@@ -507,10 +572,10 @@ static int setup_k7_watchdog(void)
 
 	perfctr_msr = MSR_K7_PERFCTR0;
 	evntsel_msr = MSR_K7_EVNTSEL0;
-	if (!reserve_perfctr_nmi(perfctr_msr))
+	if (!__reserve_perfctr_nmi(-1, perfctr_msr))
 		goto fail;
 
-	if (!reserve_evntsel_nmi(evntsel_msr))
+	if (!__reserve_evntsel_nmi(-1, evntsel_msr))
 		goto fail1;
 
 	wrmsrl(perfctr_msr, 0UL);
@@ -533,7 +598,7 @@ static int setup_k7_watchdog(void)
 	wd->check_bit = 1ULL<<63;
 	return 1;
 fail1:
-	release_perfctr_nmi(perfctr_msr);
+	__release_perfctr_nmi(-1, perfctr_msr);
 fail:
 	return 0;
 }
@@ -544,8 +609,8 @@ static void stop_k7_watchdog(void)
 
 	wrmsr(wd->evntsel_msr, 0, 0);
 
-	release_evntsel_nmi(wd->evntsel_msr);
-	release_perfctr_nmi(wd->perfctr_msr);
+	__release_evntsel_nmi(-1, wd->evntsel_msr);
+	__release_perfctr_nmi(-1, wd->perfctr_msr);
 }
 
 #define P6_EVNTSEL0_ENABLE	(1 << 22)
@@ -563,10 +628,10 @@ static int setup_p6_watchdog(void)
 
 	perfctr_msr = MSR_P6_PERFCTR0;
 	evntsel_msr = MSR_P6_EVNTSEL0;
-	if (!reserve_perfctr_nmi(perfctr_msr))
+	if (!__reserve_perfctr_nmi(-1, perfctr_msr))
 		goto fail;
 
-	if (!reserve_evntsel_nmi(evntsel_msr))
+	if (!__reserve_evntsel_nmi(-1, evntsel_msr))
 		goto fail1;
 
 	wrmsrl(perfctr_msr, 0UL);
@@ -590,7 +655,7 @@ static int setup_p6_watchdog(void)
 	wd->check_bit = 1ULL<<39;
 	return 1;
 fail1:
-	release_perfctr_nmi(perfctr_msr);
+	__release_perfctr_nmi(-1, perfctr_msr);
 fail:
 	return 0;
 }
@@ -601,8 +666,8 @@ static void stop_p6_watchdog(void)
 
 	wrmsr(wd->evntsel_msr, 0, 0);
 
-	release_evntsel_nmi(wd->evntsel_msr);
-	release_perfctr_nmi(wd->perfctr_msr);
+	__release_evntsel_nmi(-1, wd->evntsel_msr);
+	__release_perfctr_nmi(-1, wd->perfctr_msr);
 }
 
 /* Note that these events don't tick when the CPU idles. This means
@@ -668,10 +733,10 @@ static int setup_p4_watchdog(void)
 		cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
 	}
 
-	if (!reserve_perfctr_nmi(perfctr_msr))
+	if (!__reserve_perfctr_nmi(-1, perfctr_msr))
 		goto fail;
 
-	if (!reserve_evntsel_nmi(evntsel_msr))
+	if (!__reserve_evntsel_nmi(-1, evntsel_msr))
 		goto fail1;
 
 	evntsel = P4_ESCR_EVENT_SELECT(0x3F)
@@ -695,7 +760,7 @@ static int setup_p4_watchdog(void)
 	wd->check_bit = 1ULL<<39;
 	return 1;
 fail1:
-	release_perfctr_nmi(perfctr_msr);
+	__release_perfctr_nmi(-1, perfctr_msr);
 fail:
 	return 0;
 }
@@ -707,8 +772,8 @@ static void stop_p4_watchdog(void)
 	wrmsr(wd->cccr_msr, 0, 0);
 	wrmsr(wd->evntsel_msr, 0, 0);
 
-	release_evntsel_nmi(wd->evntsel_msr);
-	release_perfctr_nmi(wd->perfctr_msr);
+	__release_evntsel_nmi(-1, wd->evntsel_msr);
+	__release_perfctr_nmi(-1, wd->perfctr_msr);
 }
 
 #define ARCH_PERFMON_NMI_EVENT_SEL	ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
@@ -736,10 +801,10 @@ static int setup_intel_arch_watchdog(void)
 	perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
 	evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
 
-	if (!reserve_perfctr_nmi(perfctr_msr))
+	if (!__reserve_perfctr_nmi(-1, perfctr_msr))
 		goto fail;
 
-	if (!reserve_evntsel_nmi(evntsel_msr))
+	if (!__reserve_evntsel_nmi(-1, evntsel_msr))
 		goto fail1;
 
 	wrmsrl(perfctr_msr, 0UL);
@@ -764,7 +829,7 @@ static int setup_intel_arch_watchdog(void)
 	wd->check_bit = 1ULL << (eax.split.bit_width - 1);
 	return 1;
 fail1:
-	release_perfctr_nmi(perfctr_msr);
+	__release_perfctr_nmi(-1, perfctr_msr);
 fail:
 	return 0;
 }
@@ -787,8 +852,8 @@ static void stop_intel_arch_watchdog(void)
 		return;
 
 	wrmsr(wd->evntsel_msr, 0, 0);
-	release_evntsel_nmi(wd->evntsel_msr);
-	release_perfctr_nmi(wd->perfctr_msr);
+	__release_evntsel_nmi(-1, wd->evntsel_msr);
+	__release_perfctr_nmi(-1, wd->perfctr_msr);
 }
 
 void setup_apic_nmi_watchdog (void *unused)
diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c
index d22cfc9d656..086b3726862 100644
--- a/arch/i386/lib/usercopy.c
+++ b/arch/i386/lib/usercopy.c
@@ -10,6 +10,7 @@
 #include <linux/blkdev.h>
 #include <linux/module.h>
 #include <linux/backing-dev.h>
+#include <linux/interrupt.h>
 #include <asm/uaccess.h>
 #include <asm/mmx.h>
 
@@ -719,6 +720,14 @@ unsigned long __copy_to_user_ll(void __user *to, const void *from,
 #ifndef CONFIG_X86_WP_WORKS_OK
 	if (unlikely(boot_cpu_data.wp_works_ok == 0) &&
 			((unsigned long )to) < TASK_SIZE) {
+		/*
+		 * When we are in an atomic section (see
+		 * mm/filemap.c:file_read_actor), return the full
+		 * length to take the slow path.
+		 */
+		if (in_atomic())
+			return n;
+
 		/* 
 		 * CPU does not honor the WP bit when writing
 		 * from supervisory mode, and due to preemption or SMP,
diff --git a/arch/um/sys-i386/delay.c b/arch/um/sys-i386/delay.c
index 2c11b9770e8..d623e074f41 100644
--- a/arch/um/sys-i386/delay.c
+++ b/arch/um/sys-i386/delay.c
@@ -27,14 +27,3 @@ void __udelay(unsigned long usecs)
 }
 
 EXPORT_SYMBOL(__udelay);
-
-void __const_udelay(unsigned long usecs)
-{
-	int i, n;
-
-	n = (loops_per_jiffy * HZ * usecs) / MILLION;
-        for(i=0;i<n;i++)
-                cpu_relax();
-}
-
-EXPORT_SYMBOL(__const_udelay);
diff --git a/arch/um/sys-x86_64/delay.c b/arch/um/sys-x86_64/delay.c
index 137f4446b43..dee5be66da8 100644
--- a/arch/um/sys-x86_64/delay.c
+++ b/arch/um/sys-x86_64/delay.c
@@ -28,14 +28,3 @@ void __udelay(unsigned long usecs)
 }
 
 EXPORT_SYMBOL(__udelay);
-
-void __const_udelay(unsigned long usecs)
-{
-	unsigned long i, n;
-
-	n = (loops_per_jiffy * HZ * usecs) / MILLION;
-        for(i=0;i<n;i++)
-                cpu_relax();
-}
-
-EXPORT_SYMBOL(__const_udelay);
diff --git a/arch/x86_64/boot/video.S b/arch/x86_64/boot/video.S
index d6ff88f3513..6090516c9c7 100644
--- a/arch/x86_64/boot/video.S
+++ b/arch/x86_64/boot/video.S
@@ -571,6 +571,16 @@ setr1:	lodsw
 	jmp	_m_s
 
 check_vesa:
+#ifdef CONFIG_FIRMWARE_EDID
+	leaw	modelist+1024, %di
+	movw	$0x4f00, %ax
+	int	$0x10
+	cmpw	$0x004f, %ax
+	jnz	setbad
+
+	movw	4(%di), %ax
+	movw	%ax, vbe_version
+#endif
 	leaw	modelist+1024, %di
 	subb	$VIDEO_FIRST_VESA>>8, %bh
 	movw	%bx, %cx			# Get mode information structure
@@ -1945,6 +1955,9 @@ store_edid:
 	rep
 	stosl
 
+	cmpw	$0x0200, vbe_version		# only do EDID on >= VBE2.0
+	jl	no_edid
+
 	pushw   %es				# save ES
 	xorw    %di, %di                        # Report Capability
 	pushw   %di
@@ -1987,6 +2000,7 @@ do_restore:	.byte	0	# Screen contents altered during mode change
 svga_prefix:	.byte	VIDEO_FIRST_BIOS>>8	# Default prefix for BIOS modes
 graphic_mode:	.byte	0	# Graphic mode with a linear frame buffer
 dac_size:	.byte	6	# DAC bit depth
+vbe_version:	.word	0	# VBE bios version
 
 # Status messages
 keymsg:		.ascii	"Press <RETURN> to see video modes available, "
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 82d9d85d527..a90996c27dc 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -108,64 +108,128 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
 /* checks for a bit availability (hack for oprofile) */
 int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
 {
+	int cpu;
 	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
-	return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
+	for_each_possible_cpu (cpu) {
+		if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
+			return 0;
+	}
+	return 1;
 }
 
 /* checks the an msr for availability */
 int avail_to_resrv_perfctr_nmi(unsigned int msr)
 {
 	unsigned int counter;
+	int cpu;
 
 	counter = nmi_perfctr_msr_to_bit(msr);
 	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
 
-	return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
+	for_each_possible_cpu (cpu) {
+		if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
+			return 0;
+	}
+	return 1;
 }
 
-int reserve_perfctr_nmi(unsigned int msr)
+static int __reserve_perfctr_nmi(int cpu, unsigned int msr)
 {
 	unsigned int counter;
+	if (cpu < 0)
+		cpu = smp_processor_id();
 
 	counter = nmi_perfctr_msr_to_bit(msr);
 	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
 
-	if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
+	if (!test_and_set_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
 		return 1;
 	return 0;
 }
 
-void release_perfctr_nmi(unsigned int msr)
+static void __release_perfctr_nmi(int cpu, unsigned int msr)
 {
 	unsigned int counter;
+	if (cpu < 0)
+		cpu = smp_processor_id();
 
 	counter = nmi_perfctr_msr_to_bit(msr);
 	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
 
-	clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
+	clear_bit(counter, &per_cpu(perfctr_nmi_owner, cpu));
 }
 
-int reserve_evntsel_nmi(unsigned int msr)
+int reserve_perfctr_nmi(unsigned int msr)
+{
+	int cpu, i;
+	for_each_possible_cpu (cpu) {
+		if (!__reserve_perfctr_nmi(cpu, msr)) {
+			for_each_possible_cpu (i) {
+				if (i >= cpu)
+					break;
+				__release_perfctr_nmi(i, msr);
+			}
+			return 0;
+		}
+	}
+	return 1;
+}
+
+void release_perfctr_nmi(unsigned int msr)
+{
+	int cpu;
+	for_each_possible_cpu (cpu)
+		__release_perfctr_nmi(cpu, msr);
+}
+
+int __reserve_evntsel_nmi(int cpu, unsigned int msr)
 {
 	unsigned int counter;
+	if (cpu < 0)
+		cpu = smp_processor_id();
 
 	counter = nmi_evntsel_msr_to_bit(msr);
 	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
 
-	if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)))
+	if (!test_and_set_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]))
 		return 1;
 	return 0;
 }
 
-void release_evntsel_nmi(unsigned int msr)
+static void __release_evntsel_nmi(int cpu, unsigned int msr)
 {
 	unsigned int counter;
+	if (cpu < 0)
+		cpu = smp_processor_id();
 
 	counter = nmi_evntsel_msr_to_bit(msr);
 	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
 
-	clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner));
+	clear_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]);
+}
+
+int reserve_evntsel_nmi(unsigned int msr)
+{
+	int cpu, i;
+	for_each_possible_cpu (cpu) {
+		if (!__reserve_evntsel_nmi(cpu, msr)) {
+			for_each_possible_cpu (i) {
+				if (i >= cpu)
+					break;
+				__release_evntsel_nmi(i, msr);
+			}
+			return 0;
+		}
+	}
+	return 1;
+}
+
+void release_evntsel_nmi(unsigned int msr)
+{
+	int cpu;
+	for_each_possible_cpu (cpu) {
+		__release_evntsel_nmi(cpu, msr);
+	}
 }
 
 static __cpuinit inline int nmi_known_cpu(void)
@@ -253,7 +317,7 @@ int __init check_nmi_watchdog (void)
 	for (cpu = 0; cpu < NR_CPUS; cpu++)
 		counts[cpu] = cpu_pda(cpu)->__nmi_count;
 	local_irq_enable();
-	mdelay((10*1000)/nmi_hz); // wait 10 ticks
+	mdelay((20*1000)/nmi_hz); // wait 20 ticks
 
 	for_each_online_cpu(cpu) {
 		if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
@@ -472,10 +536,10 @@ static int setup_k7_watchdog(void)
 
 	perfctr_msr = MSR_K7_PERFCTR0;
 	evntsel_msr = MSR_K7_EVNTSEL0;
-	if (!reserve_perfctr_nmi(perfctr_msr))
+	if (!__reserve_perfctr_nmi(-1, perfctr_msr))
 		goto fail;
 
-	if (!reserve_evntsel_nmi(evntsel_msr))
+	if (!__reserve_evntsel_nmi(-1, evntsel_msr))
 		goto fail1;
 
 	/* Simulator may not support it */
@@ -501,9 +565,9 @@ static int setup_k7_watchdog(void)
 	wd->check_bit = 1ULL<<63;
 	return 1;
 fail2:
-	release_evntsel_nmi(evntsel_msr);
+	__release_evntsel_nmi(-1, evntsel_msr);
 fail1:
-	release_perfctr_nmi(perfctr_msr);
+	__release_perfctr_nmi(-1, perfctr_msr);
 fail:
 	return 0;
 }
@@ -514,8 +578,8 @@ static void stop_k7_watchdog(void)
 
 	wrmsr(wd->evntsel_msr, 0, 0);
 
-	release_evntsel_nmi(wd->evntsel_msr);
-	release_perfctr_nmi(wd->perfctr_msr);
+	__release_evntsel_nmi(-1, wd->evntsel_msr);
+	__release_perfctr_nmi(-1, wd->perfctr_msr);
 }
 
 /* Note that these events don't tick when the CPU idles. This means
@@ -581,10 +645,10 @@ static int setup_p4_watchdog(void)
 		cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
 	}
 
-	if (!reserve_perfctr_nmi(perfctr_msr))
+	if (!__reserve_perfctr_nmi(-1, perfctr_msr))
 		goto fail;
 
-	if (!reserve_evntsel_nmi(evntsel_msr))
+	if (!__reserve_evntsel_nmi(-1, evntsel_msr))
 		goto fail1;
 
 	evntsel = P4_ESCR_EVENT_SELECT(0x3F)
@@ -609,7 +673,7 @@ static int setup_p4_watchdog(void)
 	wd->check_bit = 1ULL<<39;
 	return 1;
 fail1:
-	release_perfctr_nmi(perfctr_msr);
+	__release_perfctr_nmi(-1, perfctr_msr);
 fail:
 	return 0;
 }
@@ -621,8 +685,8 @@ static void stop_p4_watchdog(void)
 	wrmsr(wd->cccr_msr, 0, 0);
 	wrmsr(wd->evntsel_msr, 0, 0);
 
-	release_evntsel_nmi(wd->evntsel_msr);
-	release_perfctr_nmi(wd->perfctr_msr);
+	__release_evntsel_nmi(-1, wd->evntsel_msr);
+	__release_perfctr_nmi(-1, wd->perfctr_msr);
 }
 
 #define ARCH_PERFMON_NMI_EVENT_SEL	ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
@@ -650,10 +714,10 @@ static int setup_intel_arch_watchdog(void)
 	perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
 	evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
 
-	if (!reserve_perfctr_nmi(perfctr_msr))
+	if (!__reserve_perfctr_nmi(-1, perfctr_msr))
 		goto fail;
 
-	if (!reserve_evntsel_nmi(evntsel_msr))
+	if (!__reserve_evntsel_nmi(-1, evntsel_msr))
 		goto fail1;
 
 	wrmsrl(perfctr_msr, 0UL);
@@ -680,7 +744,7 @@ static int setup_intel_arch_watchdog(void)
 	wd->check_bit = 1ULL << (eax.split.bit_width - 1);
 	return 1;
 fail1:
-	release_perfctr_nmi(perfctr_msr);
+	__release_perfctr_nmi(-1, perfctr_msr);
 fail:
 	return 0;
 }
@@ -704,8 +768,8 @@ static void stop_intel_arch_watchdog(void)
 
 	wrmsr(wd->evntsel_msr, 0, 0);
 
-	release_evntsel_nmi(wd->evntsel_msr);
-	release_perfctr_nmi(wd->perfctr_msr);
+	__release_evntsel_nmi(-1, wd->evntsel_msr);
+	__release_perfctr_nmi(-1, wd->perfctr_msr);
 }
 
 void setup_apic_nmi_watchdog(void *unused)
diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c
index 1fa2da8f4fb..c9f2dd620e8 100644
--- a/drivers/char/vt_ioctl.c
+++ b/drivers/char/vt_ioctl.c
@@ -1039,10 +1039,22 @@ int vt_waitactive(int vt)
 
 	add_wait_queue(&vt_activate_queue, &wait);
 	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
 		retval = 0;
-		if (vt == fg_console)
+
+		/*
+		 * Synchronize with redraw_screen(). By acquiring the console
+		 * semaphore we make sure that the console switch is completed
+		 * before we return. If we didn't wait for the semaphore, we
+		 * could return at a point where fg_console has already been
+		 * updated, but the console switch hasn't been completed.
+		 */
+		acquire_console_sem();
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (vt == fg_console) {
+			release_console_sem();
 			break;
+		}
+		release_console_sem();
 		retval = -EINTR;
 		if (signal_pending(current))
 			break;
diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index c707c8ebc1a..b0b4458ae90 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -319,7 +319,7 @@ int sm501_unit_power(struct device *dev, unsigned int unit, unsigned int to)
 
 	mode &= 3;		/* get current power mode */
 
-	if (unit > ARRAY_SIZE(sm->unit_power)) {
+	if (unit >= ARRAY_SIZE(sm->unit_power)) {
 		dev_err(dev, "%s: bad unit %d\n", __FUNCTION__, unit);
 		goto already;
 	}
diff --git a/drivers/pnp/system.c b/drivers/pnp/system.c
index 2065e74bb63..a8a95540b1e 100644
--- a/drivers/pnp/system.c
+++ b/drivers/pnp/system.c
@@ -22,7 +22,7 @@ static const struct pnp_device_id pnp_dev_table[] = {
 	{	"",			0	}
 };
 
-static void reserve_range(char *pnpid, int start, int end, int port)
+static void reserve_range(const char *pnpid, resource_size_t start, resource_size_t end, int port)
 {
 	struct resource *res;
 	char *regionid;
@@ -32,9 +32,9 @@ static void reserve_range(char *pnpid, int start, int end, int port)
 		return;
 	snprintf(regionid, 16, "pnp %s", pnpid);
 	if (port)
-		res = request_region(start,end-start+1,regionid);
+		res = request_region(start, end-start+1, regionid);
 	else
-		res = request_mem_region(start,end-start+1,regionid);
+		res = request_mem_region(start, end-start+1, regionid);
 	if (res == NULL)
 		kfree(regionid);
 	else
@@ -45,12 +45,13 @@ static void reserve_range(char *pnpid, int start, int end, int port)
 	 * have double reservations.
 	 */
 	printk(KERN_INFO
-		"pnp: %s: %s range 0x%x-0x%x %s reserved\n",
-		pnpid, port ? "ioport" : "iomem", start, end,
+		"pnp: %s: %s range 0x%llx-0x%llx %s reserved\n",
+		pnpid, port ? "ioport" : "iomem",
+                (unsigned long long)start, (unsigned long long)end,
 		NULL != res ? "has been" : "could not be");
 }
 
-static void reserve_resources_of_dev(struct pnp_dev *dev)
+static void reserve_resources_of_dev(const struct pnp_dev *dev)
 {
 	int i;
 
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 85bf795abdc..7c0d6091007 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -59,6 +59,19 @@ struct cmos_rtc {
 
 static const char driver_name[] = "rtc_cmos";
 
+/* The RTC_INTR register may have e.g. RTC_PF set even if RTC_PIE is clear;
+ * always mask it against the irq enable bits in RTC_CONTROL.  Bit values
+ * are the same: PF==PIE, AF=AIE, UF=UIE; so RTC_IRQMASK works with both.
+ */
+#define	RTC_IRQMASK	(RTC_PF | RTC_AF | RTC_UF)
+
+static inline int is_intr(u8 rtc_intr)
+{
+	if (!(rtc_intr & RTC_IRQF))
+		return 0;
+	return rtc_intr & RTC_IRQMASK;
+}
+
 /*----------------------------------------------------------------*/
 
 static int cmos_read_time(struct device *dev, struct rtc_time *t)
@@ -188,7 +201,8 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 	rtc_control &= ~RTC_AIE;
 	CMOS_WRITE(rtc_control, RTC_CONTROL);
 	rtc_intr = CMOS_READ(RTC_INTR_FLAGS);
-	if (rtc_intr)
+	rtc_intr &= (rtc_control & RTC_IRQMASK) | RTC_IRQF;
+	if (is_intr(rtc_intr))
 		rtc_update_irq(&cmos->rtc->class_dev, 1, rtc_intr);
 
 	/* update alarm */
@@ -207,7 +221,8 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 		rtc_control |= RTC_AIE;
 		CMOS_WRITE(rtc_control, RTC_CONTROL);
 		rtc_intr = CMOS_READ(RTC_INTR_FLAGS);
-		if (rtc_intr)
+		rtc_intr &= (rtc_control & RTC_IRQMASK) | RTC_IRQF;
+		if (is_intr(rtc_intr))
 			rtc_update_irq(&cmos->rtc->class_dev, 1, rtc_intr);
 	}
 
@@ -287,7 +302,8 @@ cmos_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
 	}
 	CMOS_WRITE(rtc_control, RTC_CONTROL);
 	rtc_intr = CMOS_READ(RTC_INTR_FLAGS);
-	if (rtc_intr)
+	rtc_intr &= (rtc_control & RTC_IRQMASK) | RTC_IRQF;
+	if (is_intr(rtc_intr))
 		rtc_update_irq(&cmos->rtc->class_dev, 1, rtc_intr);
 	spin_unlock_irqrestore(&rtc_lock, flags);
 	return 0;
@@ -353,12 +369,10 @@ static irqreturn_t cmos_interrupt(int irq, void *p)
 
 	spin_lock(&rtc_lock);
 	irqstat = CMOS_READ(RTC_INTR_FLAGS);
+	irqstat &= (CMOS_READ(RTC_CONTROL) & RTC_IRQMASK) | RTC_IRQF;
 	spin_unlock(&rtc_lock);
 
-	if (irqstat) {
-		/* NOTE: irqstat may have e.g. RTC_PF set
-		 * even when RTC_PIE is clear...
-		 */
+	if (is_intr(irqstat)) {
 		rtc_update_irq(p, 1, irqstat);
 		return IRQ_HANDLED;
 	} else
@@ -525,25 +539,26 @@ static int cmos_suspend(struct device *dev, pm_message_t mesg)
 {
 	struct cmos_rtc	*cmos = dev_get_drvdata(dev);
 	int		do_wake = device_may_wakeup(dev);
-	unsigned char	tmp, irqstat;
+	unsigned char	tmp;
 
 	/* only the alarm might be a wakeup event source */
 	spin_lock_irq(&rtc_lock);
 	cmos->suspend_ctrl = tmp = CMOS_READ(RTC_CONTROL);
 	if (tmp & (RTC_PIE|RTC_AIE|RTC_UIE)) {
+		unsigned char	irqstat;
+
 		if (do_wake)
 			tmp &= ~(RTC_PIE|RTC_UIE);
 		else
 			tmp &= ~(RTC_PIE|RTC_AIE|RTC_UIE);
 		CMOS_WRITE(tmp, RTC_CONTROL);
 		irqstat = CMOS_READ(RTC_INTR_FLAGS);
-	} else
-		irqstat = 0;
+		irqstat &= (tmp & RTC_IRQMASK) | RTC_IRQF;
+		if (is_intr(irqstat))
+			rtc_update_irq(&cmos->rtc->class_dev, 1, irqstat);
+	}
 	spin_unlock_irq(&rtc_lock);
 
-	if (irqstat)
-		rtc_update_irq(&cmos->rtc->class_dev, 1, irqstat);
-
 	/* ACPI HOOK:  enable ACPI_EVENT_RTC when (tmp & RTC_AIE)
 	 * ... it'd be best if we could do that under rtc_lock.
 	 */
@@ -573,9 +588,10 @@ static int cmos_resume(struct device *dev)
 		spin_lock_irq(&rtc_lock);
 		CMOS_WRITE(tmp, RTC_CONTROL);
 		tmp = CMOS_READ(RTC_INTR_FLAGS);
-		spin_unlock_irq(&rtc_lock);
-		if (tmp)
+		tmp &= (cmos->suspend_ctrl & RTC_IRQMASK) | RTC_IRQF;
+		if (is_intr(tmp))
 			rtc_update_irq(&cmos->rtc->class_dev, 1, tmp);
+		spin_unlock_irq(&rtc_lock);
 	}
 
 	pr_debug("%s: resume, ctrl %02x\n",
@@ -594,7 +610,7 @@ static int cmos_resume(struct device *dev)
 /*----------------------------------------------------------------*/
 
 /* The "CMOS" RTC normally lives on the platform_bus.  On ACPI systems,
- * the device node may alternatively be created as a PNP device.
+ * the device node will always be created as a PNPACPI device.
  */
 
 #ifdef	CONFIG_PNPACPI
@@ -673,7 +689,7 @@ module_exit(cmos_exit);
 /*----------------------------------------------------------------*/
 
 /* Platform setup should have set up an RTC device, when PNPACPI is
- * unavailable ... this is the normal case, common even on PCs.
+ * unavailable ... this could happen even on (older) PCs.
  */
 
 static int __init cmos_platform_probe(struct platform_device *pdev)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index a2fceba7ef8..9cc4f0a8aaa 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1704,7 +1704,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
 				DUMP_SEEK(PAGE_SIZE);
 			} else {
 				if (page == ZERO_PAGE(addr)) {
-					DUMP_SEEK(PAGE_SIZE);
+					if (!dump_seek(file, PAGE_SIZE)) {
+						page_cache_release(page);
+						goto end_coredump;
+					}
 				} else {
 					void *kaddr;
 					flush_cache_page(vma, addr,
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 47d6d49d1fb..f3ddca4a387 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1480,8 +1480,8 @@ static int elf_fdpic_dump_segments(struct file *file, struct mm_struct *mm,
 				DUMP_SEEK(file->f_pos + PAGE_SIZE);
 			}
 			else if (page == ZERO_PAGE(addr)) {
-				DUMP_SEEK(file->f_pos + PAGE_SIZE);
 				page_cache_release(page);
+				DUMP_SEEK(file->f_pos + PAGE_SIZE);
 			}
 			else {
 				void *kaddr;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 8a824f4ce5c..a5b150f7e8a 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1148,102 +1148,37 @@ static int do_journal_get_write_access(handle_t *handle,
 	return ext3_journal_get_write_access(handle, bh);
 }
 
-/*
- * The idea of this helper function is following:
- * if prepare_write has allocated some blocks, but not all of them, the
- * transaction must include the content of the newly allocated blocks.
- * This content is expected to be set to zeroes by block_prepare_write().
- * 2006/10/14  SAW
- */
-static int ext3_prepare_failure(struct file *file, struct page *page,
-				unsigned from, unsigned to)
-{
-	struct address_space *mapping;
-	struct buffer_head *bh, *head, *next;
-	unsigned block_start, block_end;
-	unsigned blocksize;
-	int ret;
-	handle_t *handle = ext3_journal_current_handle();
-
-	mapping = page->mapping;
-	if (ext3_should_writeback_data(mapping->host)) {
-		/* optimization: no constraints about data */
-skip:
-		return ext3_journal_stop(handle);
-	}
-
-	head = page_buffers(page);
-	blocksize = head->b_size;
-	for (	bh = head, block_start = 0;
-		bh != head || !block_start;
-	    	block_start = block_end, bh = next)
-	{
-		next = bh->b_this_page;
-		block_end = block_start + blocksize;
-		if (block_end <= from)
-			continue;
-		if (block_start >= to) {
-			block_start = to;
-			break;
-		}
-		if (!buffer_mapped(bh))
-		/* prepare_write failed on this bh */
-			break;
-		if (ext3_should_journal_data(mapping->host)) {
-			ret = do_journal_get_write_access(handle, bh);
-			if (ret) {
-				ext3_journal_stop(handle);
-				return ret;
-			}
-		}
-	/*
-	 * block_start here becomes the first block where the current iteration
-	 * of prepare_write failed.
-	 */
-	}
-	if (block_start <= from)
-		goto skip;
-
-	/* commit allocated and zeroed buffers */
-	return mapping->a_ops->commit_write(file, page, from, block_start);
-}
-
 static int ext3_prepare_write(struct file *file, struct page *page,
 			      unsigned from, unsigned to)
 {
 	struct inode *inode = page->mapping->host;
-	int ret, ret2;
-	int needed_blocks = ext3_writepage_trans_blocks(inode);
+	int ret, needed_blocks = ext3_writepage_trans_blocks(inode);
 	handle_t *handle;
 	int retries = 0;
 
 retry:
 	handle = ext3_journal_start(inode, needed_blocks);
-	if (IS_ERR(handle))
-		return PTR_ERR(handle);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		goto out;
+	}
 	if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
 		ret = nobh_prepare_write(page, from, to, ext3_get_block);
 	else
 		ret = block_prepare_write(page, from, to, ext3_get_block);
 	if (ret)
-		goto failure;
+		goto prepare_write_failed;
 
 	if (ext3_should_journal_data(inode)) {
 		ret = walk_page_buffers(handle, page_buffers(page),
 				from, to, NULL, do_journal_get_write_access);
-		if (ret)
-			/* fatal error, just put the handle and return */
-			journal_stop(handle);
 	}
-	return ret;
-
-failure:
-	ret2 = ext3_prepare_failure(file, page, from, to);
-	if (ret2 < 0)
-		return ret2;
+prepare_write_failed:
+	if (ret)
+		ext3_journal_stop(handle);
 	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
-	/* retry number exceeded, or other error like -EDQUOT */
+out:
 	return ret;
 }
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index fbff4b9e122..810b6d6474b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1147,102 +1147,37 @@ static int do_journal_get_write_access(handle_t *handle,
 	return ext4_journal_get_write_access(handle, bh);
 }
 
-/*
- * The idea of this helper function is following:
- * if prepare_write has allocated some blocks, but not all of them, the
- * transaction must include the content of the newly allocated blocks.
- * This content is expected to be set to zeroes by block_prepare_write().
- * 2006/10/14  SAW
- */
-static int ext4_prepare_failure(struct file *file, struct page *page,
-				unsigned from, unsigned to)
-{
-	struct address_space *mapping;
-	struct buffer_head *bh, *head, *next;
-	unsigned block_start, block_end;
-	unsigned blocksize;
-	int ret;
-	handle_t *handle = ext4_journal_current_handle();
-
-	mapping = page->mapping;
-	if (ext4_should_writeback_data(mapping->host)) {
-		/* optimization: no constraints about data */
-skip:
-		return ext4_journal_stop(handle);
-	}
-
-	head = page_buffers(page);
-	blocksize = head->b_size;
-	for (	bh = head, block_start = 0;
-		bh != head || !block_start;
-	    	block_start = block_end, bh = next)
-	{
-		next = bh->b_this_page;
-		block_end = block_start + blocksize;
-		if (block_end <= from)
-			continue;
-		if (block_start >= to) {
-			block_start = to;
-			break;
-		}
-		if (!buffer_mapped(bh))
-		/* prepare_write failed on this bh */
-			break;
-		if (ext4_should_journal_data(mapping->host)) {
-			ret = do_journal_get_write_access(handle, bh);
-			if (ret) {
-				ext4_journal_stop(handle);
-				return ret;
-			}
-		}
-	/*
-	 * block_start here becomes the first block where the current iteration
-	 * of prepare_write failed.
-	 */
-	}
-	if (block_start <= from)
-		goto skip;
-
-	/* commit allocated and zeroed buffers */
-	return mapping->a_ops->commit_write(file, page, from, block_start);
-}
-
 static int ext4_prepare_write(struct file *file, struct page *page,
 			      unsigned from, unsigned to)
 {
 	struct inode *inode = page->mapping->host;
-	int ret, ret2;
-	int needed_blocks = ext4_writepage_trans_blocks(inode);
+	int ret, needed_blocks = ext4_writepage_trans_blocks(inode);
 	handle_t *handle;
 	int retries = 0;
 
 retry:
 	handle = ext4_journal_start(inode, needed_blocks);
-	if (IS_ERR(handle))
-		return PTR_ERR(handle);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		goto out;
+	}
 	if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
 		ret = nobh_prepare_write(page, from, to, ext4_get_block);
 	else
 		ret = block_prepare_write(page, from, to, ext4_get_block);
 	if (ret)
-		goto failure;
+		goto prepare_write_failed;
 
 	if (ext4_should_journal_data(inode)) {
 		ret = walk_page_buffers(handle, page_buffers(page),
 				from, to, NULL, do_journal_get_write_access);
-		if (ret)
-			/* fatal error, just put the handle and return */
-			ext4_journal_stop(handle);
 	}
-	return ret;
-
-failure:
-	ret2 = ext4_prepare_failure(file, page, from, to);
-	if (ret2 < 0)
-		return ret2;
+prepare_write_failed:
+	if (ret)
+		ext4_journal_stop(handle);
 	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
-	/* retry number exceeded, or other error like -EDQUOT */
+out:
 	return ret;
 }
 
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c932aa65e19..f771889183c 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -11,7 +11,11 @@
 
 #include <linux/proc_fs.h>
 
+#ifdef CONFIG_PROC_SYSCTL
 extern int proc_sys_init(void);
+#else
+static inline void proc_sys_init(void) { }
+#endif
 
 struct vmalloc_info {
 	unsigned long	used;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 5834a744c2a..41f17037f73 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -79,9 +79,7 @@ void __init proc_root_init(void)
 	proc_device_tree_init();
 #endif
 	proc_bus = proc_mkdir("bus", NULL);
-#ifdef CONFIG_SYSCTL
 	proc_sys_init();
-#endif
 }
 
 static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h
index 3f92b94e0d7..d1b8e4ab6c1 100644
--- a/include/asm-i386/cpufeature.h
+++ b/include/asm-i386/cpufeature.h
@@ -75,6 +75,7 @@
 #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
 #define X86_FEATURE_PEBS	(3*32+12)  /* Precise-Event Based Sampling */
 #define X86_FEATURE_BTS		(3*32+13)  /* Branch Trace Store */
+#define X86_FEATURE_LAPIC_TIMER_BROKEN (3*32+ 14) /* lapic timer broken in C1 */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
diff --git a/include/asm-i386/msr.h b/include/asm-i386/msr.h
index ec3b6803fd3..2ad3f30b1a6 100644
--- a/include/asm-i386/msr.h
+++ b/include/asm-i386/msr.h
@@ -275,6 +275,8 @@ static inline void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
 #define MSR_K7_FID_VID_CTL		0xC0010041
 #define MSR_K7_FID_VID_STATUS		0xC0010042
 
+#define MSR_K8_ENABLE_C1E		0xC0010055
+
 /* extended feature register */
 #define MSR_EFER 			0xc0000080
 
diff --git a/include/asm-um/delay.h b/include/asm-um/delay.h
index 0985bda6675..c71e32b6741 100644
--- a/include/asm-um/delay.h
+++ b/include/asm-um/delay.h
@@ -1,9 +1,20 @@
 #ifndef __UM_DELAY_H
 #define __UM_DELAY_H
 
-#include "asm/arch/delay.h"
-#include "asm/archparam.h"
-
 #define MILLION 1000000
 
+/* Undefined on purpose */
+extern void __bad_udelay(void);
+
+extern void __udelay(unsigned long usecs);
+extern void __delay(unsigned long loops);
+
+#define udelay(n) ((__builtin_constant_p(n) && (n) > 20000) ? \
+	__bad_udelay() : __udelay(n))
+
+/* It appears that ndelay is not used at all for UML, and has never been
+ * implemented. */
+extern void __unimplemented_ndelay(void);
+#define ndelay(n) __unimplemented_ndelay()
+
 #endif
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 769ddc6df49..c22b0dfcbcd 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -127,9 +127,13 @@ static inline int cpu_is_offline(int cpu) { return 0; }
 #endif		/* CONFIG_HOTPLUG_CPU */
 
 #ifdef CONFIG_SUSPEND_SMP
+extern int suspend_cpu_hotplug;
+
 extern int disable_nonboot_cpus(void);
 extern void enable_nonboot_cpus(void);
 #else
+#define suspend_cpu_hotplug	0
+
 static inline int disable_nonboot_cpus(void) { return 0; }
 static inline void enable_nonboot_cpus(void) {}
 #endif
diff --git a/include/linux/device.h b/include/linux/device.h
index caad9bba965..5cf30e95c8b 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -128,6 +128,7 @@ struct device_driver {
 
 	struct module		* owner;
 	const char 		* mod_name;	/* used for built-in modules */
+	struct module_kobject	* mkobj;
 
 	int	(*probe)	(struct device * dev);
 	int	(*remove)	(struct device * dev);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 3d4206ada5c..36e70845cfc 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -254,6 +254,12 @@ int __cpuinit cpu_up(unsigned int cpu)
 }
 
 #ifdef CONFIG_SUSPEND_SMP
+/* Needed to prevent the microcode driver from requesting firmware in its CPU
+ * hotplug notifier during the suspend/resume.
+ */
+int suspend_cpu_hotplug;
+EXPORT_SYMBOL(suspend_cpu_hotplug);
+
 static cpumask_t frozen_cpus;
 
 int disable_nonboot_cpus(void)
@@ -261,16 +267,8 @@ int disable_nonboot_cpus(void)
 	int cpu, first_cpu, error = 0;
 
 	mutex_lock(&cpu_add_remove_lock);
-	first_cpu = first_cpu(cpu_present_map);
-	if (!cpu_online(first_cpu)) {
-		error = _cpu_up(first_cpu);
-		if (error) {
-			printk(KERN_ERR "Could not bring CPU%d up.\n",
-				first_cpu);
-			goto out;
-		}
-	}
-
+	suspend_cpu_hotplug = 1;
+	first_cpu = first_cpu(cpu_online_map);
 	/* We take down all of the non-boot CPUs in one shot to avoid races
 	 * with the userspace trying to use the CPU hotplug at the same time
 	 */
@@ -296,7 +294,7 @@ int disable_nonboot_cpus(void)
 	} else {
 		printk(KERN_ERR "Non-boot CPUs are not disabled\n");
 	}
-out:
+	suspend_cpu_hotplug = 0;
 	mutex_unlock(&cpu_add_remove_lock);
 	return error;
 }
@@ -308,20 +306,22 @@ void enable_nonboot_cpus(void)
 	/* Allow everyone to use the CPU hotplug again */
 	mutex_lock(&cpu_add_remove_lock);
 	cpu_hotplug_disabled = 0;
-	mutex_unlock(&cpu_add_remove_lock);
 	if (cpus_empty(frozen_cpus))
-		return;
+		goto out;
 
+	suspend_cpu_hotplug = 1;
 	printk("Enabling non-boot CPUs ...\n");
 	for_each_cpu_mask(cpu, frozen_cpus) {
-		error = cpu_up(cpu);
+		error = _cpu_up(cpu);
 		if (!error) {
 			printk("CPU%d is up\n", cpu);
 			continue;
 		}
-		printk(KERN_WARNING "Error taking CPU%d up: %d\n",
-			cpu, error);
+		printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
 	}
 	cpus_clear(frozen_cpus);
+	suspend_cpu_hotplug = 0;
+out:
+	mutex_unlock(&cpu_add_remove_lock);
 }
 #endif
diff --git a/kernel/module.c b/kernel/module.c
index fbc51de6444..dcdb32b8b13 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2384,8 +2384,13 @@ void module_add_driver(struct module *mod, struct device_driver *drv)
 
 		/* Lookup built-in module entry in /sys/modules */
 		mkobj = kset_find_obj(&module_subsys.kset, drv->mod_name);
-		if (mkobj)
+		if (mkobj) {
 			mk = container_of(mkobj, struct module_kobject, kobj);
+			/* remember our module structure */
+			drv->mkobj = mk;
+			/* kset_find_obj took a reference */
+			kobject_put(mkobj);
+		}
 	}
 
 	if (!mk)
@@ -2405,17 +2410,22 @@ EXPORT_SYMBOL(module_add_driver);
 
 void module_remove_driver(struct device_driver *drv)
 {
+	struct module_kobject *mk = NULL;
 	char *driver_name;
 
 	if (!drv)
 		return;
 
 	sysfs_remove_link(&drv->kobj, "module");
-	if (drv->owner && drv->owner->mkobj.drivers_dir) {
+
+	if (drv->owner)
+		mk = &drv->owner->mkobj;
+	else if (drv->mkobj)
+		mk = drv->mkobj;
+	if (mk && mk->drivers_dir) {
 		driver_name = make_driver_name(drv);
 		if (driver_name) {
-			sysfs_remove_link(drv->owner->mkobj.drivers_dir,
-					  driver_name);
+			sysfs_remove_link(mk->drivers_dir, driver_name);
 			kfree(driver_name);
 		}
 	}