From 8965eb19386fdf5ccd0ef8b02593eb8560aa3416 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 12 May 2008 15:43:30 +0200
Subject: x86/pci: fix broken ISA DMA

Rene Herman reported:

> commit 8779f2fc3b84ebb6c5181fb13d702e9944c16069
>
> "x86: don't try to allocate from DMA zone at first"
>
> breaks all of ISA DMA. Or all of ALSA ISA DMA at least. All
> ISA soundcards are silent following that commit -- no error
> messages, everything appears fine, just silence.

That patch is buggy. We had an implicit assumption that
dev = NULL for ISA devices that require 24bit DMA.

The recent work on x86 dma_alloc_coherent() breaks the ISA DMA buffer
allocation, which is represented by "dev = NULL" and requires 24bit
DMA implicitly.

Bisected-by: Rene Herman <rene.herman@keyaccess.nl>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Tested-by: Rene Herman <rene.herman@keyaccess.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-dma.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 0c37f16b695..c5ef1af8e79 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -385,11 +385,13 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
 	if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory))
 		return memory;
 
-	if (!dev)
+	if (!dev) {
 		dev = &fallback_dev;
+		gfp |= GFP_DMA;
+	}
 	dma_mask = dev->coherent_dma_mask;
 	if (dma_mask == 0)
-		dma_mask = DMA_32BIT_MASK;
+		dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK;
 
 	/* Device not DMA able */
 	if (dev->dma_mask == NULL)
@@ -403,7 +405,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
 	   larger than 16MB and in this case we have a chance of
 	   finding fitting memory in the next higher zone first. If
 	   not retry with true GFP_DMA. -AK */
-	if (dma_mask <= DMA_32BIT_MASK)
+	if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA))
 		gfp |= GFP_DMA32;
 #endif
 
-- 
cgit v1.2.3


From f8955ebe3ea85a9d3ff2685ee64386fd34434cf3 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Sat, 10 May 2008 09:01:48 -0500
Subject: x86: [VOYAGER] fix duplicate phys_cpu_present_map symbol

The phys_cpu_present_map is an expected symbol in the SMP harness.
Unfortunately, x86 recently moved this and a few others to
kernel/setup.c where it doesn't quite work because voyager has to
define its own.  Use CONFIG_X86_LOCAL_APIC to isolate these
definitions and fix up another area in setup.c where CONFIG_X86_SMP
should be used instead of CONFIG_SMP.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Cc: toralf.foerster@gmx.de
Cc: Mike Travis <travis@sgi.com>
Cc: Alexey Starikovskiy <astarikovskiy@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/setup.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index c0c68c18a78..6f80b852a19 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -12,6 +12,7 @@
 #include <asm/mpspec.h>
 #include <asm/apicdef.h>
 
+#ifdef CONFIG_X86_LOCAL_APIC
 unsigned int num_processors;
 unsigned disabled_cpus __cpuinitdata;
 /* Processor that is doing the boot up */
@@ -23,8 +24,9 @@ EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
 
 /* Bitmask of physically existing CPUs */
 physid_mask_t phys_cpu_present_map;
+#endif
 
-#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_SMP)
+#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
 /*
  * Copy data used in early init routines from the initial arrays to the
  * per cpu data areas.  These arrays then become expendable and the
-- 
cgit v1.2.3


From 8c6b0ef2ea1bb42cd72d987389297f66cd25790b Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sun, 11 May 2008 22:46:38 +0400
Subject: x86: wakeup.lds.S - section ordering fix

To allow linker to catch sections overlapping we have to declare
them in appropriate order.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Acked-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/acpi/realmode/wakeup.lds.S | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S
index 22fab6c4be1..7da00b799cd 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S
+++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S
@@ -12,11 +12,6 @@ ENTRY(_start)
 
 SECTIONS
 {
-	. = HEADER_OFFSET;
-	.header : {
-		 *(.header)
-	}
-
 	. = 0;
 	.text : {
 		 *(.text*)
@@ -50,6 +45,11 @@ SECTIONS
 		__bss_end = .;
 	}
 
+	. = HEADER_OFFSET;
+	.header : {
+		*(.header)
+	}
+
 	. = ALIGN(16);
 	_end = .;
 
-- 
cgit v1.2.3


From 61165d7a035f6571c7576e7f51e7230157724c8d Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 13 May 2008 14:26:57 +0100
Subject: x86: fix app crashes after SMP resume

After resume on a 2cpu laptop, kernel builds collapse with a sed hang,
sh or make segfault (often on 20295564), real-time signal to cc1 etc.

Several hurdles to jump, but a manually-assisted bisect led to -rc1's
d2bcbad5f3ad38a1c09861bca7e252dde7bb8259 x86: do not zap_low_mappings
in __smp_prepare_cpus.  Though the low mappings were removed at bootup,
they were left behind (with Global flags helping to keep them in TLB)
after resume or cpu online, causing the crashes seen.

Reinstate zap_low_mappings (with local __flush_tlb_all) for each cpu_up
on x86_32.  This used to be serialized by smp_commenced_mask: that's now
gone, but a low_mappings flag will do.  No need for native_smp_cpus_done
to repeat the zap: let mem_init zap BSP's low mappings just like on UP.

(In passing, fix error code from native_cpu_up: do_boot_cpu returns a
variety of diagnostic values, Dprintk what it says but convert to -EIO.
And save_pg_dir separately before zap_low_mappings: doesn't matter now,
but zapping twice in succession wiped out resume's swsusp_pg_dir.)

That worked well on the duo and one quad, but wouldn't boot 3rd or 4th
cpu on P4 Xeon, oopsing just after unlock_ipi_call_lock.  The TLB flush
IPI now being sent reveals a long-standing bug: the booting cpu has its
APIC readied in smp_callin at the top of start_secondary, but isn't put
into the cpu_online_map until just before that unlock_ipi_call_lock.

So native_smp_call_function_mask to online cpus would send_IPI_allbutself,
including the cpu just coming up, though it has been excluded from the
count to wait for: by the time it handles the IPI, the call data on
native_smp_call_function_mask's stack may well have been overwritten.

So fall back to send_IPI_mask while cpu_online_map does not match
cpu_callout_map: perhaps there's a better APICological fix to be
made at the start_secondary end, but I wouldn't know that.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/smp.c     |  3 ++-
 arch/x86/kernel/smpboot.c | 24 +++++++++++++++++-------
 2 files changed, 19 insertions(+), 8 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 8f75893a646..0cb7aadc87c 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -231,7 +231,8 @@ native_smp_call_function_mask(cpumask_t mask,
 	wmb();
 
 	/* Send a message to other CPUs */
-	if (cpus_equal(mask, allbutself))
+	if (cpus_equal(mask, allbutself) &&
+	    cpus_equal(cpu_online_map, cpu_callout_map))
 		send_IPI_allbutself(CALL_FUNCTION_VECTOR);
 	else
 		send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6b087ab6cd8..38988491c62 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -86,6 +86,7 @@ void *x86_bios_cpu_apicid_early_ptr;
 
 #ifdef CONFIG_X86_32
 u8 apicid_2_node[MAX_APICID];
+static int low_mappings;
 #endif
 
 /* State of each CPU */
@@ -326,6 +327,12 @@ static void __cpuinit start_secondary(void *unused)
 		enable_8259A_irq(0);
 	}
 
+#ifdef CONFIG_X86_32
+	while (low_mappings)
+		cpu_relax();
+	__flush_tlb_all();
+#endif
+
 	/* This must be done before setting cpu_online_map */
 	set_cpu_sibling_map(raw_smp_processor_id());
 	wmb();
@@ -1040,14 +1047,20 @@ int __cpuinit native_cpu_up(unsigned int cpu)
 #ifdef CONFIG_X86_32
 	/* init low mem mapping */
 	clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
-			min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
+		min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
 	flush_tlb_all();
-#endif
+	low_mappings = 1;
 
 	err = do_boot_cpu(apicid, cpu);
-	if (err < 0) {
+
+	zap_low_mappings();
+	low_mappings = 0;
+#else
+	err = do_boot_cpu(apicid, cpu);
+#endif
+	if (err) {
 		Dprintk("do_boot_cpu failed %d\n", err);
-		return err;
+		return -EIO;
 	}
 
 	/*
@@ -1259,9 +1272,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
 	setup_ioapic_dest();
 #endif
 	check_nmi_watchdog();
-#ifdef CONFIG_X86_32
-	zap_low_mappings();
-#endif
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-- 
cgit v1.2.3


From 8c45a4e4f2b9bed6b6c54aaafc89e906284ccdf2 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Mon, 12 May 2008 19:31:20 -0700
Subject: x86: early_init_centaur(): use set_cpu_cap()

arch/x86/kernel/setup_64.c:954: warning: passing argument 2 of 'set_bit' from incompatible pointer type

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index f2fc8feb727..6dff1286ad8 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -951,7 +951,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
 {
 	if (c->x86 == 0x6 && c->x86_model >= 0xf)
-		set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 }
 
 static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
-- 
cgit v1.2.3


From 89804c022fe32541f5dd40a69e48ff4678d9ad24 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 13 May 2008 10:36:22 +0200
Subject: x86: fix csum_partial() export

Fix this symbol export problem:

    Building modules, stage 2.
    MODPOST 193 modules
    ERROR: "csum_partial" [fs/reiserfs/reiserfs.ko] undefined!
    make[1]: *** [__modpost] Error 1
    make: *** [modules] Error 2

This is due to a known weakness of symbol exports: if a symbol's
only in-core user is an EXPORT_SYMBOL from a lib-y section, the
symbol is not linked in.

The solution is to move the export to x8664_ksyms_64.c - but the real
solution would be to fix kbuild.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/x8664_ksyms_64.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 58882f9f263..f6c05d0410f 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -2,6 +2,7 @@
    All C exports should go in the respective C files. */
 
 #include <linux/module.h>
+#include <net/checksum.h>
 #include <linux/smp.h>
 
 #include <asm/processor.h>
@@ -29,6 +30,8 @@ EXPORT_SYMBOL(__copy_from_user_inatomic);
 EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(clear_page);
 
+EXPORT_SYMBOL(csum_partial);
+
 /*
  * Export string functions. We normally rely on gcc builtin for most of these,
  * but gcc sometimes decides not to inline them.
-- 
cgit v1.2.3


From 1f465f4e475454b8bb590846c50a9d16e8046f3d Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 9 May 2008 15:43:44 -0700
Subject: x86: user_regset_view table fix for ia32 on 64-bit

The user_regset_view table for the 32-bit regsets on the 64-bit build had
the wrong sizes for the FP regsets.  This bug had no user-visible effect
(just on kernel modules using the user_regset interfaces and the like).
But the fix is trivial and risk-free.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ptrace.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index fb03ef380f0..a7835f28293 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1303,6 +1303,9 @@ static const struct user_regset_view user_x86_64_view = {
 #define genregs32_get		genregs_get
 #define genregs32_set		genregs_set
 
+#define user_i387_ia32_struct	user_i387_struct
+#define user32_fxsr_struct	user_fxsr_struct
+
 #endif	/* CONFIG_X86_64 */
 
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -1315,13 +1318,13 @@ static const struct user_regset x86_32_regsets[] = {
 	},
 	[REGSET_FP] = {
 		.core_note_type = NT_PRFPREG,
-		.n = sizeof(struct user_i387_struct) / sizeof(u32),
+		.n = sizeof(struct user_i387_ia32_struct) / sizeof(u32),
 		.size = sizeof(u32), .align = sizeof(u32),
 		.active = fpregs_active, .get = fpregs_get, .set = fpregs_set
 	},
 	[REGSET_XFP] = {
 		.core_note_type = NT_PRXFPREG,
-		.n = sizeof(struct user_i387_struct) / sizeof(u32),
+		.n = sizeof(struct user32_fxsr_struct) / sizeof(u32),
 		.size = sizeof(u32), .align = sizeof(u32),
 		.active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
 	},
-- 
cgit v1.2.3


From f52111b1546943545e67573c4dde1c7613ca33d3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 8 May 2008 18:19:16 -0400
Subject: [PATCH] take init_files to fs/file.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/x86/kernel/init_task.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index 3d01e47777d..a4f93b4120c 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -11,7 +11,6 @@
 #include <asm/desc.h>
 
 static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
-- 
cgit v1.2.3


From a738d897b7b03b83488ae74a9bc03d26a2875dc6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 14 May 2008 08:47:40 +0200
Subject: x86: remove mwait capability C-state check

Vegard Nossum reports:

| powertop shows between 200-400 wakeups/second with the description
| "<kernel IPI>: Rescheduling interrupts" when all processors have load (e.g.
| I need to run two busy-loops on my 2-CPU system for this to show up).
|
| The bisect resulted in this commit:
|
| commit 0c07ee38c9d4eb081758f5ad14bbffa7197e1aec
| Date:   Wed Jan 30 13:33:16 2008 +0100
|
|     x86: use the correct cpuid method to detect MWAIT support for C states

remove the functional effects of this patch and make mwait unconditional.

A future patch will turn off mwait on specific CPUs where that causes
power to be wasted.

Bisected-by: Vegard Nossum <vegard.nossum@gmail.com>
Tested-by: Vegard Nossum <vegard.nossum@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/process.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 67e9b4a1e89..c7b6a694ca2 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -99,15 +99,6 @@ static void mwait_idle(void)
 		local_irq_enable();
 }
 
-
-static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
-{
-	if (force_mwait)
-		return 1;
-	/* Any C1 states supported? */
-	return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
-}
-
 /*
  * On SMP it's slightly faster (but much more power-consuming!)
  * to poll the ->work.need_resched flag instead of waiting for the
@@ -131,7 +122,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 			" performance may degrade.\n");
 	}
 #endif
-	if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
+	if (cpu_has(c, X86_FEATURE_MWAIT)) {
 		/*
 		 * Skip, if setup has overridden idle.
 		 * One CPU supports mwait => All CPUs supports mwait
-- 
cgit v1.2.3


From e9623b35599fcdbc00c16535cbefbb4d5578f4ab Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 16 May 2008 22:55:26 +0200
Subject: x86: disable mwait for AMD family 10H/11H CPUs

The previous revert of 0c07ee38c9d4eb081758f5ad14bbffa7197e1aec left
out the mwait disable condition for AMD family 10H/11H CPUs.

Andreas Herrman said:

It depends on the CPU. For AMD CPUs that support MWAIT this is wrong.
Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings then
depend on a clock divisor and current Pstate of the core.

If all cores of a processor are in halt state (C1) the processor can
enter the C1E (C1 enhanced) state. If mwait is used this will never
happen.

Thus HLT saves more power than MWAIT here.

It might be best to switch off the mwait flag for these AMD CPU
families like it was introduced with commit
f039b754714a422959027cb18bb33760eb8153f0 (x86: Don't use MWAIT on AMD
Family 10)

Re-add the AMD families 10H/11H check and disable the mwait usage for
those.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/process.c | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c7b6a694ca2..ba370dc8685 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -110,6 +110,33 @@ static void poll_idle(void)
 	cpu_relax();
 }
 
+/*
+ * mwait selection logic:
+ *
+ * It depends on the CPU. For AMD CPUs that support MWAIT this is
+ * wrong. Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings
+ * then depend on a clock divisor and current Pstate of the core. If
+ * all cores of a processor are in halt state (C1) the processor can
+ * enter the C1E (C1 enhanced) state. If mwait is used this will never
+ * happen.
+ *
+ * idle=mwait overrides this decision and forces the usage of mwait.
+ */
+static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
+{
+	if (force_mwait)
+		return 1;
+
+	if (c->x86_vendor == X86_VENDOR_AMD) {
+		switch(c->x86) {
+		case 0x10:
+		case 0x11:
+			return 0;
+		}
+	}
+	return 1;
+}
+
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
 	static int selected;
@@ -122,7 +149,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 			" performance may degrade.\n");
 	}
 #endif
-	if (cpu_has(c, X86_FEATURE_MWAIT)) {
+	if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
 		/*
 		 * Skip, if setup has overridden idle.
 		 * One CPU supports mwait => All CPUs supports mwait
-- 
cgit v1.2.3


From eba9fe93a2959ec7f195c47c9db6ce7b5114ce1f Mon Sep 17 00:00:00 2001
From: Mark Langsdorf <mark.langsdorf@amd.com>
Date: Tue, 18 Mar 2008 15:24:32 -0500
Subject: [CPUFREQ] powernow-k8: improve error messages

The most common error with powernow-k8 is an ACPI _PSS error
caused either by failure to load the ACPI processor module
or a bad parse of the _PSS object.  Make the error message
returned to the user in these situations more straightforward
and easier to understand.

-Mark Langsdorf
Operating System Research Center
AMD

Signed-off-by: Mark Langsdorf <mark.langsdorf@amd.com>
Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 46d4034d9f3..206791eb46e 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1127,12 +1127,23 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 		 * an UP version, and is deprecated by AMD.
 		 */
 		if (num_online_cpus() != 1) {
-			printk(KERN_ERR PFX "MP systems not supported by PSB BIOS structure\n");
+#ifndef CONFIG_ACPI_PROCESSOR
+			printk(KERN_ERR PFX "ACPI Processor support is required "
+			       "for SMP systems but is absent. Please load the "
+			       "ACPI Processor module before starting this "
+			       "driver.\n");
+#else
+			printk(KERN_ERR PFX "Your BIOS does not provide ACPI "
+			       "_PSS objects in a way that Linux understands. "
+			       "Please report this to the Linux ACPI maintainers"
+			       " and complain to your BIOS vendor.\n");
+#endif
 			kfree(data);
 			return -ENODEV;
 		}
 		if (pol->cpu != 0) {
-			printk(KERN_ERR PFX "No _PSS objects for CPU other than CPU0\n");
+			printk(KERN_ERR PFX "No ACPI _PSS objects for CPU other than "
+			       "CPU0. Complain to your BIOS vendor.\n");
 			kfree(data);
 			return -ENODEV;
 		}
-- 
cgit v1.2.3


From 667ad4f70110357e8f024e81741c7bd1d7906e7d Mon Sep 17 00:00:00 2001
From: maximilian attems <max@stro.at>
Date: Thu, 8 May 2008 22:10:01 +0200
Subject: [CPUFREQ] Crusoe: longrun cpufreq module reports false min freq

The longrun cpufreq module reports a false minimum frequency 3MHz on
300-600MHz Crusoe processor.  This may be due to a calculation bug
in the module.

Original patch from Kaz Sasayama <kazssym@hypercore.co.jp>
submitted as http://bugs.debian.org/468149 patch ported to x86

Cc: Kaz Sasayama <kazssym@hypercore.co.jp>
Signed-off-by: maximilian attems <max@stro.at>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/x86/kernel/cpu/cpufreq/longrun.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
index af4a867a097..777a7ff075d 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -245,7 +245,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
 	if ((ecx > 95) || (ecx == 0) || (eax < ebx))
 		return -EIO;
 
-	edx = (eax - ebx) / (100 - ecx);
+	edx = ((eax - ebx) * 100) / (100 - ecx);
 	*low_freq = edx * 1000; /* back to kHz */
 
 	dprintk("low frequency is %u kHz\n", *low_freq);
-- 
cgit v1.2.3


From b6db80ee1331e7beaeb91b4b3d946dd16c72e388 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 18 May 2008 19:27:48 +0200
Subject: x86: fix setup of cyc2ns in tsc_64.c

When the TSC is calibrated against the PIT due to the nonavailability
of PMTIMER/HPET or due to SMI interference then the setup of the per
CPU cyc2ns variables is skipped. This is unlikely to happen but it
would definitely render sched_clock() unusable.

This was introduced with commit 53d517cdbaac704352b3d0c10fecb99e0b54572e

    x86: scale cyc_2_nsec according to CPU frequency

Update the per CPU cyc2ns variables in all exit pathes of tsc_calibrate.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@kernel.org
---
 arch/x86/kernel/tsc_64.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index fcc16e58609..1784b8077a1 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -227,14 +227,14 @@ void __init tsc_calibrate(void)
 	/* hpet or pmtimer available ? */
 	if (!hpet && !pm1 && !pm2) {
 		printk(KERN_INFO "TSC calibrated against PIT\n");
-		return;
+		goto out;
 	}
 
 	/* Check, whether the sampling was disturbed by an SMI */
 	if (tsc1 == ULONG_MAX || tsc2 == ULONG_MAX) {
 		printk(KERN_WARNING "TSC calibration disturbed by SMI, "
 		       "using PIT calibration result\n");
-		return;
+		goto out;
 	}
 
 	tsc2 = (tsc2 - tsc1) * 1000000L;
@@ -255,6 +255,7 @@ void __init tsc_calibrate(void)
 
 	tsc_khz = tsc2 / tsc1;
 
+out:
 	for_each_possible_cpu(cpu)
 		set_cyc2ns_scale(tsc_khz, cpu);
 }
-- 
cgit v1.2.3


From 9ccc906c97e34fd91dc6aaf5b69b52d824386910 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 13 May 2008 12:31:00 +0200
Subject: x86: distangle user disabled TSC from unstable

tsc_enabled is set to 0 from the command line switch "notsc" and from
the mark_tsc_unstable code. Seperate those functionalities and replace
tsc_enable with tsc_disable. This makes also the native_sched_clock()
decision when to use TSC understandable.

Preparatory patch to solve the sched_clock() issue on 32 bit.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/tsc_32.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index e4790728b22..b087d691f16 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -14,7 +14,7 @@
 
 #include "mach_timer.h"
 
-static int tsc_enabled;
+static int tsc_disabled;
 
 /*
  * On some systems the TSC frequency does not
@@ -28,8 +28,8 @@ EXPORT_SYMBOL_GPL(tsc_khz);
 static int __init tsc_setup(char *str)
 {
 	printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
-				"cannot disable TSC completely.\n");
-	mark_tsc_unstable("user disabled TSC");
+	       "cannot disable TSC completely.\n");
+	tsc_disabled = 1;
 	return 1;
 }
 #else
@@ -120,7 +120,7 @@ unsigned long long native_sched_clock(void)
 	 *   very important for it to be as fast as the platform
 	 *   can achive it. )
 	 */
-	if (unlikely(!tsc_enabled && !tsc_unstable))
+	if (unlikely(tsc_disabled))
 		/* No locking but a rare wrong value is not a big deal: */
 		return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
 
@@ -322,7 +322,6 @@ void mark_tsc_unstable(char *reason)
 {
 	if (!tsc_unstable) {
 		tsc_unstable = 1;
-		tsc_enabled = 0;
 		printk("Marking TSC unstable due to: %s.\n", reason);
 		/* Can be called before registration */
 		if (clocksource_tsc.mult)
@@ -336,7 +335,7 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable);
 static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d)
 {
 	printk(KERN_NOTICE "%s detected: marking TSC unstable.\n",
-		       d->ident);
+	       d->ident);
 	tsc_unstable = 1;
 	return 0;
 }
@@ -403,8 +402,11 @@ void __init tsc_init(void)
 {
 	int cpu;
 
-	if (!cpu_has_tsc)
+	if (!cpu_has_tsc || tsc_disabled) {
+		/* Disable the TSC in case of !cpu_has_tsc */
+		tsc_disabled = 1;
 		return;
+	}
 
 	cpu_khz = calculate_cpu_khz();
 	tsc_khz = cpu_khz;
@@ -441,8 +443,6 @@ void __init tsc_init(void)
 	if (check_tsc_unstable()) {
 		clocksource_tsc.rating = 0;
 		clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
-	} else
-		tsc_enabled = 1;
-
+	}
 	clocksource_register(&clocksource_tsc);
 }
-- 
cgit v1.2.3


From 74dc51a3de06aa516e3b9fdc4017b2aeb38bf44b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 18 May 2008 22:17:59 +0200
Subject: x86: disable TSC for sched_clock() when calibration failed

When the TSC calibration fails then TSC is still used in
sched_clock(). Disable it completely in that case.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@kernel.org
---
 arch/x86/kernel/tsc_32.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index b087d691f16..068759db63d 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -413,6 +413,11 @@ void __init tsc_init(void)
 
 	if (!cpu_khz) {
 		mark_tsc_unstable("could not calculate TSC khz");
+		/*
+		 * We need to disable the TSC completely in this case
+		 * to prevent sched_clock() from using it.
+		 */
+		tsc_disabled = 1;
 		return;
 	}
 
-- 
cgit v1.2.3


From 2584a82deed7196f48066f1b1a7fad4ec5bea961 Mon Sep 17 00:00:00 2001
From: Chuck Ebbert <cebbert@redhat.com>
Date: Tue, 20 May 2008 18:18:12 -0400
Subject: x86: don't read maxlvt before checking if APIC is mapped

A check for unmapped apic was added before reading maxlvt but the early
read of maxlvt wasn't removed.

Signed-off-by: Chuck Ebbert <cebbert@redhat.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@kernel.org
---
 arch/x86/kernel/apic_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 5910020c3f2..0633cfd0dc2 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -534,7 +534,7 @@ int setup_profiling_timer(unsigned int multiplier)
  */
 void clear_local_APIC(void)
 {
-	int maxlvt = lapic_get_maxlvt();
+	int maxlvt;
 	u32 v;
 
 	/* APIC hasn't been mapped yet */
-- 
cgit v1.2.3


From 2ddfd20e7c55421435cbf95a5ed3dd6e423cf934 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 22 May 2008 10:37:48 +0200
Subject: namespacecheck: automated fixes

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/kvmclock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 4bc1be5d547..08a30986d47 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -53,7 +53,7 @@ static cycle_t kvm_clock_read(void);
  * have elapsed since the hypervisor wrote the data. So we try to account for
  * that with system time
  */
-unsigned long kvm_get_wallclock(void)
+static unsigned long kvm_get_wallclock(void)
 {
 	u32 wc_sec, wc_nsec;
 	u64 delta;
@@ -86,7 +86,7 @@ unsigned long kvm_get_wallclock(void)
 	return ts.tv_sec + 1;
 }
 
-int kvm_set_wallclock(unsigned long now)
+static int kvm_set_wallclock(unsigned long now)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From db9f600b96c16bb3c7f094e294fbdd370226ad86 Mon Sep 17 00:00:00 2001
From: Miquel van Smoorenburg <mikevs@xs4all.net>
Date: Wed, 28 May 2008 10:31:25 +0200
Subject: x86: pci-dma.c: use __GFP_NO_OOM instead of __GFP_NORETRY

On Wed, 2008-05-28 at 04:47 +0200, Andi Kleen wrote:
> > So...  why not just remove the setting of __GFP_NORETRY?  Why is it
> > wrong to oom-kill things in this case?
>
> When the 16MB zone overflows (which can be common in some workloads)
> calling the OOM killer is pretty useless because it has barely any
> real user data [only exception would be the "only 16MB" case Alan
> mentioned]. Killing random processes in this case is bad.
>
> I think for 16MB __GFP_NORETRY is ok because there should be
> nothing freeable in there so looping is useless. Only exception would be the
> "only 16MB total" case again but I'm not sure 2.6 supports that at all
> on x86.
>
> On the other hand d_a_c() does more allocations than just 16MB, especially
> on 64bit and the other zones need different strategies.

Okay, so how about this then ?

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-dma.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index c5ef1af8e79..069e843f0b9 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -397,9 +397,6 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
 	if (dev->dma_mask == NULL)
 		return NULL;
 
-	/* Don't invoke OOM killer */
-	gfp |= __GFP_NORETRY;
-
 #ifdef CONFIG_X86_64
 	/* Why <=? Even when the mask is smaller than 4GB it is often
 	   larger than 16MB and in this case we have a chance of
@@ -410,7 +407,9 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
 #endif
 
  again:
-	page = dma_alloc_pages(dev, gfp, get_order(size));
+	/* Don't invoke OOM killer or retry in lower 16MB DMA zone */
+	page = dma_alloc_pages(dev,
+		(gfp & GFP_DMA) ? gfp | __GFP_NORETRY : gfp, get_order(size));
 	if (page == NULL)
 		return NULL;
 
-- 
cgit v1.2.3


From f529626a86d61897862aa1bbbb4537773209238e Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@suse.cz>
Date: Thu, 29 May 2008 00:30:21 -0700
Subject: suspend-vs-iommu: prevent suspend if we could not resume

iommu/gart support misses suspend/resume code, which can do bad stuff,
including memory corruption on resume.  Prevent system suspend in case we
would be unable to resume.

Signed-off-by: Pavel Machek <pavel@suse.cz>
Tested-by: Patrick <ragamuffin@datacomm.ch>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-gart_64.c | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index c07455d1695..aa8ec928caa 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -26,6 +26,7 @@
 #include <linux/kdebug.h>
 #include <linux/scatterlist.h>
 #include <linux/iommu-helper.h>
+#include <linux/sysdev.h>
 #include <asm/atomic.h>
 #include <asm/io.h>
 #include <asm/mtrr.h>
@@ -548,6 +549,28 @@ static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
 	return aper_base;
 }
 
+static int gart_resume(struct sys_device *dev)
+{
+	return 0;
+}
+
+static int gart_suspend(struct sys_device *dev, pm_message_t state)
+{
+	return -EINVAL;
+}
+
+static struct sysdev_class gart_sysdev_class = {
+	.name = "gart",
+	.suspend = gart_suspend,
+	.resume = gart_resume,
+
+};
+
+static struct sys_device device_gart = {
+	.id	= 0,
+	.cls	= &gart_sysdev_class,
+};
+
 /*
  * Private Northbridge GATT initialization in case we cannot use the
  * AGP driver for some reason.
@@ -558,7 +581,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 	unsigned aper_base, new_aper_base;
 	struct pci_dev *dev;
 	void *gatt;
-	int i;
+	int i, error;
 
 	printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
 	aper_size = aper_base = info->aper_size = 0;
@@ -606,6 +629,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 
 		pci_write_config_dword(dev, 0x90, ctl);
 	}
+
+	error = sysdev_class_register(&gart_sysdev_class);
+	if (!error)
+		error = sysdev_register(&device_gart);
+	if (error)
+		panic("Could not register gart_sysdev -- would corrupt data on next suspend");
 	flush_gart();
 
 	printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
-- 
cgit v1.2.3


From fb3bbd6a663fe972611676381adc4c60ddfe61ac Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Thu, 22 May 2008 18:22:30 -0700
Subject: x86: fix APIC warning on 32bit v2

for http://bugzilla.kernel.org/show_bug.cgi?id=10613

BIOS bug, APIC version is 0 for CPU#0! fixing up to 0x10. (tell your hw vendor)

v2: fix 64 bit compilation

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Gabriel C <nix.or.die@googlemail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/acpi/boot.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index c49ebcc6c41..33c5216fd3e 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -242,12 +242,19 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
 
 static void __cpuinit acpi_register_lapic(int id, u8 enabled)
 {
+	unsigned int ver = 0;
+
 	if (!enabled) {
 		++disabled_cpus;
 		return;
 	}
 
-	generic_processor_info(id, 0);
+#ifdef CONFIG_X86_32
+	if (boot_cpu_physical_apicid != -1U)
+		ver = apic_version[boot_cpu_physical_apicid];
+#endif
+
+	generic_processor_info(id, ver);
 }
 
 static int __init
@@ -767,8 +774,13 @@ static void __init acpi_register_lapic_address(unsigned long address)
 	mp_lapic_addr = address;
 
 	set_fixmap_nocache(FIX_APIC_BASE, address);
-	if (boot_cpu_physical_apicid == -1U)
+	if (boot_cpu_physical_apicid == -1U) {
 		boot_cpu_physical_apicid  = GET_APIC_ID(read_apic_id());
+#ifdef CONFIG_X86_32
+		apic_version[boot_cpu_physical_apicid] =
+			 GET_APIC_VERSION(apic_read(APIC_LVR));
+#endif
+	}
 }
 
 static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
-- 
cgit v1.2.3


From deef325086c3897393b8f7d6bccd03405244fe18 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 12 May 2008 15:44:38 +0200
Subject: x86: disable preemption in native_smp_prepare_cpus

Priit Laes reported the following warning:

Call Trace:
 [<ffffffff8022f1e1>] warn_on_slowpath+0x51/0x63
 [<ffffffff80282e48>] sys_ioctl+0x2d/0x5d
 [<ffffffff805185ff>] _spin_lock+0xe/0x24
 [<ffffffff80227459>] task_rq_lock+0x3d/0x73
 [<ffffffff805133c3>] set_cpu_sibling_map+0x336/0x350
 [<ffffffff8021c1b8>] read_apic_id+0x30/0x62
 [<ffffffff806d921d>] verify_local_APIC+0x90/0x138
 [<ffffffff806d84b5>] native_smp_prepare_cpus+0x1f9/0x305
 [<ffffffff806ce7b1>] kernel_init+0x59/0x2d9
 [<ffffffff80518a26>] _spin_unlock_irq+0x11/0x2b
 [<ffffffff8020bf48>] child_rip+0xa/0x12
 [<ffffffff806ce758>] kernel_init+0x0/0x2d9
 [<ffffffff8020bf3e>] child_rip+0x0/0x12

fix this by generally disabling preemption in native_smp_prepare_cpus().

Reported-and-bisected-by: Priit Laes <plaes@plaes.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/smpboot.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 38988491c62..56078d61c79 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1190,6 +1190,7 @@ static void __init smp_cpu_index_default(void)
  */
 void __init native_smp_prepare_cpus(unsigned int max_cpus)
 {
+	preempt_disable();
 	nmi_watchdog_default();
 	smp_cpu_index_default();
 	current_cpu_data = boot_cpu_data;
@@ -1206,7 +1207,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 	if (smp_sanity_check(max_cpus) < 0) {
 		printk(KERN_INFO "SMP disabled\n");
 		disable_smp();
-		return;
+		goto out;
 	}
 
 	preempt_disable();
@@ -1246,6 +1247,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 	printk(KERN_INFO "CPU%d: ", 0);
 	print_cpu_info(&cpu_data(0));
 	setup_boot_clock();
+out:
+	preempt_enable();
 }
 /*
  * Early setup to make printk work.
-- 
cgit v1.2.3


From e8a496ac8cd00cabbdaa373db4818a9ad19a1c5a Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Fri, 23 May 2008 16:26:37 -0700
Subject: x86: fix broken math-emu with lazy allocation of fpu area

Fix the math emulation that got broken with the recent lazy allocation of FPU
area. init_fpu() need to be added for the math-emulation path aswell
for the FPU area allocation.

math emulation enabled kernel booted fine with this, in the presence
of "no387 nofxsr" boot param.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: hpa@zytor.com
Cc: mingo@elte.hu
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/i387.c | 44 +++++++++++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 15 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index e03cc952f23..eb9ddd8efb8 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -56,6 +56,11 @@ void __cpuinit mxcsr_feature_mask_init(void)
 
 void __init init_thread_xstate(void)
 {
+	if (!HAVE_HWFP) {
+		xstate_size = sizeof(struct i387_soft_struct);
+		return;
+	}
+
 	if (cpu_has_fxsr)
 		xstate_size = sizeof(struct i387_fxsave_struct);
 #ifdef CONFIG_X86_32
@@ -94,7 +99,7 @@ void __cpuinit fpu_init(void)
 int init_fpu(struct task_struct *tsk)
 {
 	if (tsk_used_math(tsk)) {
-		if (tsk == current)
+		if (HAVE_HWFP && tsk == current)
 			unlazy_fpu(tsk);
 		return 0;
 	}
@@ -109,6 +114,15 @@ int init_fpu(struct task_struct *tsk)
 			return -ENOMEM;
 	}
 
+#ifdef CONFIG_X86_32
+	if (!HAVE_HWFP) {
+		memset(tsk->thread.xstate, 0, xstate_size);
+		finit();
+		set_stopped_child_used_math(tsk);
+		return 0;
+	}
+#endif
+
 	if (cpu_has_fxsr) {
 		struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
 
@@ -330,13 +344,13 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
 	struct user_i387_ia32_struct env;
 	int ret;
 
-	if (!HAVE_HWFP)
-		return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
-
 	ret = init_fpu(target);
 	if (ret)
 		return ret;
 
+	if (!HAVE_HWFP)
+		return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
+
 	if (!cpu_has_fxsr) {
 		return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
 					   &target->thread.xstate->fsave, 0,
@@ -360,15 +374,15 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
 	struct user_i387_ia32_struct env;
 	int ret;
 
-	if (!HAVE_HWFP)
-		return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
-
 	ret = init_fpu(target);
 	if (ret)
 		return ret;
 
 	set_stopped_child_used_math(target);
 
+	if (!HAVE_HWFP)
+		return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
+
 	if (!cpu_has_fxsr) {
 		return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 					  &target->thread.xstate->fsave, 0, -1);
@@ -474,18 +488,18 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
 int restore_i387_ia32(struct _fpstate_ia32 __user *buf)
 {
 	int err;
+	struct task_struct *tsk = current;
 
-	if (HAVE_HWFP) {
-		struct task_struct *tsk = current;
-
+	if (HAVE_HWFP)
 		clear_fpu(tsk);
 
-		if (!used_math()) {
-			err = init_fpu(tsk);
-			if (err)
-				return err;
-		}
+	if (!used_math()) {
+		err = init_fpu(tsk);
+		if (err)
+			return err;
+	}
 
+	if (HAVE_HWFP) {
 		if (cpu_has_fxsr)
 			err = restore_i387_fxsave(buf);
 		else
-- 
cgit v1.2.3


From cd76374e9de4501acc74f833dc6cb5e7a5dca115 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@suse.cz>
Date: Thu, 29 May 2008 00:30:21 -0700
Subject: suspend-vs-iommu: prevent suspend if we could not resume

iommu/gart support misses suspend/resume code, which can do bad stuff,
including memory corruption on resume.  Prevent system suspend in case we
would be unable to resume.

Signed-off-by: Pavel Machek <pavel@suse.cz>
Tested-by: Patrick <ragamuffin@datacomm.ch>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-gart_64.c | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index c07455d1695..aa8ec928caa 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -26,6 +26,7 @@
 #include <linux/kdebug.h>
 #include <linux/scatterlist.h>
 #include <linux/iommu-helper.h>
+#include <linux/sysdev.h>
 #include <asm/atomic.h>
 #include <asm/io.h>
 #include <asm/mtrr.h>
@@ -548,6 +549,28 @@ static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
 	return aper_base;
 }
 
+static int gart_resume(struct sys_device *dev)
+{
+	return 0;
+}
+
+static int gart_suspend(struct sys_device *dev, pm_message_t state)
+{
+	return -EINVAL;
+}
+
+static struct sysdev_class gart_sysdev_class = {
+	.name = "gart",
+	.suspend = gart_suspend,
+	.resume = gart_resume,
+
+};
+
+static struct sys_device device_gart = {
+	.id	= 0,
+	.cls	= &gart_sysdev_class,
+};
+
 /*
  * Private Northbridge GATT initialization in case we cannot use the
  * AGP driver for some reason.
@@ -558,7 +581,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 	unsigned aper_base, new_aper_base;
 	struct pci_dev *dev;
 	void *gatt;
-	int i;
+	int i, error;
 
 	printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
 	aper_size = aper_base = info->aper_size = 0;
@@ -606,6 +629,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 
 		pci_write_config_dword(dev, 0x90, ctl);
 	}
+
+	error = sysdev_class_register(&gart_sysdev_class);
+	if (!error)
+		error = sysdev_register(&device_gart);
+	if (error)
+		panic("Could not register gart_sysdev -- would corrupt data on next suspend");
 	flush_gart();
 
 	printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
-- 
cgit v1.2.3


From 870568b39064cab2dd971fe57969916036982862 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 2 Jun 2008 15:57:27 -0700
Subject: x86, fpu: fix CONFIG_PREEMPT=y corruption of application's FPU stack
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Jürgen Mell reported an FPU state corruption bug under CONFIG_PREEMPT,
and bisected it to commit v2.6.19-1363-gacc2076, "i386: add sleazy FPU
optimization".

Add tsk_used_math() checks to prevent calling math_state_restore()
which can sleep in the case of !tsk_used_math(). This prevents
making a blocking call in __switch_to().

Apparently "fpu_counter > 5" check is not enough, as in some signal handling
and fork/exec scenarios, fpu_counter > 5 and !tsk_used_math() is possible.

It's a side effect though. This is the failing scenario:

process 'A' in save_i387_ia32() just after clear_used_math()

Got an interrupt and pre-empted out.

At the next context switch to process 'A' again, kernel tries to restore
the math state proactively and sees a fpu_counter > 0 and !tsk_used_math()

This results in init_fpu() during the __switch_to()'s math_state_restore()

And resulting in fpu corruption which will be saved/restored
(save_i387_fxsave and restore_i387_fxsave) during the remaining
part of the signal handling after the context switch.

Bisected-by: Jürgen Mell <j.mell@t-online.de>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Tested-by: Jürgen Mell <j.mell@t-online.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@kernel.org
---
 arch/x86/kernel/process_32.c | 5 ++++-
 arch/x86/kernel/process_64.c | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index f8476dfbb60..6d5483356e7 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -649,8 +649,11 @@ struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct
 	/* If the task has used fpu the last 5 timeslices, just do a full
 	 * restore of the math state immediately to avoid the trap; the
 	 * chances of needing FPU soon are obviously high now
+	 *
+	 * tsk_used_math() checks prevent calling math_state_restore(),
+	 * which can sleep in the case of !tsk_used_math()
 	 */
-	if (next_p->fpu_counter > 5)
+	if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
 		math_state_restore();
 
 	/*
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e2319f39988..ac54ff56df8 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -658,8 +658,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	/* If the task has used fpu the last 5 timeslices, just do a full
 	 * restore of the math state immediately to avoid the trap; the
 	 * chances of needing FPU soon are obviously high now
+	 *
+	 * tsk_used_math() checks prevent calling math_state_restore(),
+	 * which can sleep in the case of !tsk_used_math()
 	 */
-	if (next_p->fpu_counter>5)
+	if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
 		math_state_restore();
 	return prev_p;
 }
-- 
cgit v1.2.3


From b7f09ae583c49d28b2796d2fa5893dcf822e3a10 Mon Sep 17 00:00:00 2001
From: Miquel van Smoorenburg <mikevs@xs4all.net>
Date: Thu, 5 Jun 2008 18:14:44 +0200
Subject: x86, pci-dma.c: don't always add __GFP_NORETRY to gfp

Currently arch/x86/kernel/pci-dma.c always adds __GFP_NORETRY
to the allocation flags, because it wants to be reasonably
sure not to deadlock when calling alloc_pages().

But really that should only be done in two cases:
- when allocating memory in the lower 16 MB DMA zone.
  If there's no free memory there, waiting or OOM killing is of no use
- when optimistically trying an allocation in the DMA32 zone
  when dma_mask < DMA_32BIT_MASK hoping that the allocation
  happens to fall within the limits of the dma_mask

Also blindly adding __GFP_NORETRY to the the gfp variable might
not be a good idea since we then also use it when calling
dma_ops->alloc_coherent(). Clearing it might also not be a
good idea, dma_alloc_coherent()'s caller might have set it
on purpose. The gfp variable should not be clobbered.

[ mingo@elte.hu: converted to delta patch ontop of previous version. ]

Signed-off-by: Miquel van Smoorenburg <miquels@cistron.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-dma.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 069e843f0b9..dc00a1331ac 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -378,6 +378,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
 	struct page *page;
 	unsigned long dma_mask = 0;
 	dma_addr_t bus;
+	int noretry = 0;
 
 	/* ignore region specifiers */
 	gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
@@ -397,19 +398,25 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
 	if (dev->dma_mask == NULL)
 		return NULL;
 
+	/* Don't invoke OOM killer or retry in lower 16MB DMA zone */
+	if (gfp & __GFP_DMA)
+		noretry = 1;
+
 #ifdef CONFIG_X86_64
 	/* Why <=? Even when the mask is smaller than 4GB it is often
 	   larger than 16MB and in this case we have a chance of
 	   finding fitting memory in the next higher zone first. If
 	   not retry with true GFP_DMA. -AK */
-	if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA))
+	if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
 		gfp |= GFP_DMA32;
+		if (dma_mask < DMA_32BIT_MASK)
+			noretry = 1;
+	}
 #endif
 
  again:
-	/* Don't invoke OOM killer or retry in lower 16MB DMA zone */
 	page = dma_alloc_pages(dev,
-		(gfp & GFP_DMA) ? gfp | __GFP_NORETRY : gfp, get_order(size));
+		noretry ? gfp | __GFP_NORETRY : gfp, get_order(size));
 	if (page == NULL)
 		return NULL;
 
-- 
cgit v1.2.3


From 3703f39965a197ebd91743fc38d0f640606b8da3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 4 Jun 2008 18:13:37 +0200
Subject: geode: fix modular build

-tip testing found this build bug:

 MODPOST 331 modules
 ERROR: "geode_mfgpt_toggle_event" [drivers/watchdog/geodewdt.ko] undefined!
 ERROR: "geode_mfgpt_alloc_timer" [drivers/watchdog/geodewdt.ko] undefined!
 make[1]: *** [__modpost] Error 1
 make: *** [modules] Error 2

with this config:

  http://redhat.com/~mingo/misc/config-Wed_Jun__4_18_01_59_CEST_2008.bad

export those symbols.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/mfgpt_32.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 3cad17fe026..07c0f828f48 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -155,6 +155,7 @@ int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable)
 	wrmsr(msr, value, dummy);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event);
 
 int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable)
 {
@@ -222,6 +223,7 @@ int geode_mfgpt_alloc_timer(int timer, int domain)
 	/* No timers available - too bad */
 	return -1;
 }
+EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer);
 
 
 #ifdef CONFIG_GEODE_MFGPT_TIMER
-- 
cgit v1.2.3


From 86b2b70e156203149c3861455feec54bc4906e6d Mon Sep 17 00:00:00 2001
From: Joe Korty <joe.korty@ccur.com>
Date: Mon, 2 Jun 2008 17:21:06 -0400
Subject: x86: fix asm warning in head_32.S

On Mon, May 19, 2008 at 04:10:02PM -0700, Linus Torvalds wrote:
> It also causes these warnings on 32-bit PAE:
>
> 	  AS      arch/x86/kernel/head_32.o
> 	arch/x86/kernel/head_32.S: Assembler messages:
> 	arch/x86/kernel/head_32.S:225: Warning: left operand is a bignum; integer 0 assumed
> 	arch/x86/kernel/head_32.S:609: Warning: left operand is a bignum; integer 0 assumed
>
> and I do not see why (the end result seems to be identical).

Fix head_32.S gcc bignum warnings when CONFIG_PAE=y.

    arch/x86/kernel/head_32.S: Assembler messages:
    arch/x86/kernel/head_32.S:225: Warning: left operand is a bignum; integer 0 assumed
    arch/x86/kernel/head_32.S:609: Warning: left operand is a bignum; integer 0 assumed

The assembler was stumbling over the 64-bit constant 0x100000000 in the
KPMDS #define.

Testing: a cmp(1) on head_32.o before and after shows the binary is unchanged.

Signed-off-by: Joe Korty <joe.korty@ccur.com
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Theodore Tso <tytso@mit.edu>
Cc: Gabriel C <nix.or.die@googlemail.com>
Cc: Keith Packard <keithp@keithp.com>
Cc: "Pallipadi Venkatesh" <venkatesh.pallipadi@intel.com>
Cc: Eric Anholt <eric@anholt.net>
Cc: "Siddha Suresh B" <suresh.b.siddha@intel.com>
Cc: bugme-daemon@bugzilla.kernel.org
Cc: airlied@linux.ie
Cc: "Barnes Jesse" <jesse.barnes@intel.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/head_32.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index b2cc73768a9..f7357cc0162 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -189,7 +189,7 @@ default_entry:
 	 * this stage.
 	 */
 
-#define KPMDS ((0x100000000-__PAGE_OFFSET) >> 30) /* Number of kernel PMDs */
+#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
 
 	xorl %ebx,%ebx				/* %ebx is kept at zero */
 
-- 
cgit v1.2.3


From 0b6a39f7ebcb1c82587ce35b401c513eed41ac5c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 9 Jun 2008 13:29:43 +0200
Subject: Revert "x86: fix ioapic bug again"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 6e908947b4995bc0e551a8257c586d5c3e428201.

Németh Márton reported:

| there is a problem in 2.6.26-rc3 which was not there in case of
| 2.6.25: the CPU wakes up ~90,000 times per sec instead of ~60 per sec.
|
| I also "git bisected" the problem, the result is:
|
| 6e908947b4995bc0e551a8257c586d5c3e428201 is first bad commit
| commit 6e908947b4995bc0e551a8257c586d5c3e428201
| Author: Ingo Molnar <mingo@elte.hu>
| Date:   Fri Mar 21 14:32:36 2008 +0100
|
|     x86: fix ioapic bug again

the original problem is fixed by Maciej W. Rozycki in the tip/x86/apic
branch (confirmed by Márton), but those changes are too intrusive for
v2.6.26 so we'll go for the less intrusive (repeated) revert now.

Reported-and-bisected-by: Németh Márton <nm127@freemail.hu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c | 12 ++----------
 arch/x86/kernel/nmi_32.c     |  9 ++-------
 2 files changed, 4 insertions(+), 17 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index a40d54fc1fd..4dc8600d9d2 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -2130,14 +2130,10 @@ static inline void __init check_timer(void)
 {
 	int apic1, pin1, apic2, pin2;
 	int vector;
-	unsigned int ver;
 	unsigned long flags;
 
 	local_irq_save(flags);
 
-	ver = apic_read(APIC_LVR);
-	ver = GET_APIC_VERSION(ver);
-
 	/*
 	 * get/set the timer IRQ vector:
 	 */
@@ -2150,15 +2146,11 @@ static inline void __init check_timer(void)
 	 * mode for the 8259A whenever interrupts are routed
 	 * through I/O APICs.  Also IRQ0 has to be enabled in
 	 * the 8259A which implies the virtual wire has to be
-	 * disabled in the local APIC.  Finally timer interrupts
-	 * need to be acknowledged manually in the 8259A for
-	 * timer_interrupt() and for the i82489DX when using
-	 * the NMI watchdog.
+	 * disabled in the local APIC.
 	 */
 	apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
 	init_8259A(1);
-	timer_ack = !cpu_has_tsc;
-	timer_ack |= (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
+	timer_ack = 1;
 	if (timer_over_8254 > 0)
 		enable_8259A_irq(0);
 
diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c
index 11b14bbaa61..84160f74eeb 100644
--- a/arch/x86/kernel/nmi_32.c
+++ b/arch/x86/kernel/nmi_32.c
@@ -26,7 +26,6 @@
 
 #include <asm/smp.h>
 #include <asm/nmi.h>
-#include <asm/timer.h>
 
 #include "mach_traps.h"
 
@@ -82,7 +81,7 @@ int __init check_nmi_watchdog(void)
 
 	prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
 	if (!prev_nmi_count)
-		goto error;
+		return -1;
 
 	printk(KERN_INFO "Testing NMI watchdog ... ");
 
@@ -119,7 +118,7 @@ int __init check_nmi_watchdog(void)
 	if (!atomic_read(&nmi_active)) {
 		kfree(prev_nmi_count);
 		atomic_set(&nmi_active, -1);
-		goto error;
+		return -1;
 	}
 	printk("OK.\n");
 
@@ -130,10 +129,6 @@ int __init check_nmi_watchdog(void)
 
 	kfree(prev_nmi_count);
 	return 0;
-error:
-	timer_ack = !cpu_has_tsc;
-
-	return -1;
 }
 
 static int __init setup_nmi_watchdog(char *str)
-- 
cgit v1.2.3


From e32e58a96de4ac35a03349db2ab69f263ded958f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 6 Jun 2008 10:14:08 +0200
Subject: x86: fix lockdep warning during suspend-to-ram

Andrew Morton wrote:

> I've been seeing the below for a long time during suspend-to-ram on the Vaio.
>
>
> PM: Syncing filesystems ... done.
> PM: Preparing system for mem sleep
> Freezing user space processes ... <4>------------[ cut here ]------------
> WARNING: at kernel/lockdep.c:2658 check_flags+0x4c/0x127()
> Modules linked in: i915 drm ipw2200 sonypi ipv6 autofs4 hidp l2cap bluetooth sunrpc nf_conntrack_netbios_ns ipt_REJECT nf_conntrack_ipv4 xt_state nf_conntrack xt_tcpudp iptable_filter ip_tables x_tables acpi_cpufreq nvram ohci1394 ieee1394 ehci_hcd uhci_hcd sg joydev snd_hda_intel snd_seq_dummy sr_mod snd_seq_oss cdrom snd_seq_midi_event snd_seq snd_seq_device snd_pcm_oss snd_mixer_oss ieee80211 pcspkr ieee80211_crypt snd_pcm i2c_i801 snd_timer i2c_core ide_pci_generic piix snd soundcore snd_page_alloc button ext3 jbd ide_disk ide_core [last unloaded: ipw2200]
> Pid: 3250, comm: zsh Not tainted 2.6.26-rc5 #1
>  [<c011c5f5>] warn_on_slowpath+0x41/0x6d
>  [<c01080e6>] ? native_sched_clock+0x82/0x96
>  [<c013789c>] ? mark_held_locks+0x41/0x5c
>  [<c0315688>] ? _spin_unlock_irqrestore+0x36/0x58
>  [<c0137a29>] ? trace_hardirqs_on+0xe6/0x10d
>  [<c0138637>] ? __lock_acquire+0xae3/0xb2b
>  [<c0313413>] ? schedule+0x39b/0x3b4
>  [<c0135596>] check_flags+0x4c/0x127
>  [<c01386b9>] lock_acquire+0x3a/0x86
>  [<c0315075>] _spin_lock+0x26/0x53
>  [<c0140660>] ? refrigerator+0x13/0xc3
>  [<c0140660>] refrigerator+0x13/0xc3
>  [<c012684a>] get_signal_to_deliver+0x3c/0x31e
>  [<c0102fe7>] do_notify_resume+0x91/0x6ee
>  [<c01359fd>] ? lock_release_holdtime+0x50/0x56
>  [<c0315688>] ? _spin_unlock_irqrestore+0x36/0x58
>  [<c0235d24>] ? read_chan+0x0/0x58c
>  [<c0137a29>] ? trace_hardirqs_on+0xe6/0x10d
>  [<c0315694>] ? _spin_unlock_irqrestore+0x42/0x58
>  [<c0230afa>] ? tty_ldisc_deref+0x5c/0x63
>  [<c0233104>] ? tty_read+0x66/0x98
>  [<c014b3f0>] ? audit_syscall_exit+0x2aa/0x2c5
>  [<c0109430>] ? do_syscall_trace+0x6b/0x16f
>  [<c0103a9c>] work_notifysig+0x13/0x1b
>  =======================
> ---[ end trace 25b49fe59a25afa5 ]---
> possible reason: unannotated irqs-off.
> irq event stamp: 58919
> hardirqs last  enabled at (58919): [<c0103afd>] syscall_exit_work+0x11/0x26

Joy - I so love entry.S

Best I can make of it:

syscall_exit_work
  resume_userspace
    DISABLE_INTERRUPTS
    (no TRACE_IRQS_OFF)
      work_pending
        work_notifysig
          do_notify_resume()
            do_signal()
              get_signal_to_deliver()
                try_to_freeze()
                  refrigerator()
                    task_lock() -> check_flags() -> BANG

The normal path is:

syscall_exit_work
  resume_userspace
    DISABLE_INTERRUPTS
    restore_all
      TRACE_IRQS_IRET
      iret

No idea why that would not warn..

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_32.S | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 2a609dc3271..c778e4fa55a 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -248,6 +248,7 @@ ENTRY(resume_userspace)
  	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
 					# between sampling and the iret
+	TRACE_IRQS_OFF
 	movl TI_flags(%ebp), %ecx
 	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done on
 					# int/exception return?
-- 
cgit v1.2.3


From 4461145ef1be92851c230f858f6b6f457c99670f Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Thu, 12 Jun 2008 08:55:59 +0200
Subject: x86, lockdep: fix "WARNING: at kernel/lockdep.c:2658
 check_flags+0x4c/0x128()"

Alessandro Suardi reported:
> Recently upgraded my FC6 desktop to Fedora 9; with the
>  latest nautilus RPM updates my VNC session went nuts
>  with nautilus pegging the CPU for everything that breathed.
>
> I now reverted to an earlier nautilus package, but during
>  the peak CPU period my kernel spat this:
>
> [314185.623294] ------------[ cut here ]------------
> [314185.623414] WARNING: at kernel/lockdep.c:2658 check_flags+0x4c/0x128()
> [314185.623514] Modules linked in: iptable_filter ip_tables x_tables
> sunrpc ipv6 fuse snd_via82xx snd_ac97_codec ac97_bus snd_mpu401_uart
> snd_rawmidi via686a hwmon parport_pc sg parport uhci_hcd ehci_hcd
> [314185.623924] Pid: 12314, comm: nautilus Not tainted 2.6.26-rc5-git2 #4
> [314185.624021]  [<c0115b95>] warn_on_slowpath+0x41/0x7b
> [314185.624021]  [<c010de70>] ? do_page_fault+0x2c1/0x5fd
> [314185.624021]  [<c0128396>] ? up_read+0x16/0x28
> [314185.624021]  [<c010de70>] ? do_page_fault+0x2c1/0x5fd
> [314185.624021]  [<c012fa33>] ? __lock_acquire+0xbb4/0xbc3
> [314185.624021]  [<c012d0a0>] check_flags+0x4c/0x128
> [314185.624021]  [<c012fa73>] lock_acquire+0x31/0x7d
> [314185.624021]  [<c0128cf6>] __atomic_notifier_call_chain+0x30/0x80
> [314185.624021]  [<c0128cc6>] ? __atomic_notifier_call_chain+0x0/0x80
> [314185.624021]  [<c0128d52>] atomic_notifier_call_chain+0xc/0xe
> [314185.624021]  [<c0128d81>] notify_die+0x2d/0x2f
> [314185.624021]  [<c01043b0>] do_int3+0x1f/0x4d
> [314185.624021]  [<c02f2d3b>] int3+0x27/0x2c
> [314185.624021]  =======================
> [314185.624021] ---[ end trace 1923f65a2d7bb246 ]---
> [314185.624021] possible reason: unannotated irqs-off.
> [314185.624021] irq event stamp: 488879
> [314185.624021] hardirqs last  enabled at (488879): [<c0102d67>]
> restore_nocheck+0x12/0x15
> [314185.624021] hardirqs last disabled at (488878): [<c0102dca>]
> work_resched+0x19/0x30
> [314185.624021] softirqs last  enabled at (488876): [<c011a1ba>]
> __do_softirq+0xa6/0xac
> [314185.624021] softirqs last disabled at (488865): [<c010476e>]
> do_softirq+0x57/0xa6
>
> I didn't seem to find it with some googling, so here it is.
>
> I was incidentally ltracing that process to try and find out
>  what was gulping down that much CPU (sorry, no idea
>  whether ltrace and the WARNING happened at the same
>  time or which came first) and:

Yeah, this is extremely likely to be the source of the warning.

The warning should be harmless, however.

> Box is my trusty noname K7-800, 512MB RAM; if there's
>  anything else useful I might be able to provide, just ask.

It would be interesting to see where the int3 comes from.  Too bad,
lockdep doesn't provide the register dump. The stacktrace also doesn't
go further than the int3(), I wonder if this int3 came from userspace?
The ltrace readme says "software breakpoints, like gdb", so I guess
this is the case. Yep, seems like it.

This looks relevant:

| commit fb1dac909d94ff807cd833d340c6827c3a957159
| Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
| Date:   Wed Jan 16 09:51:59 2008 +0100
|
|     lockdep: more hardirq annotations for notify_die()

I'm attaching a similarly-looking patch for this case (DO_VM86_ERROR),
though I suspect it might be missing for the other cases
(DO_ERROR/DO_ERROR_INFO) as well.

Reported-by: Alessandro Suardi <alessandro.suardi@gmail.com>
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/traps_32.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index bde6f63e15d..08d752de4ee 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -544,6 +544,7 @@ vm86_trap:
 #define DO_ERROR(trapnr, signr, str, name)				\
 void do_##name(struct pt_regs *regs, long error_code)			\
 {									\
+	trace_hardirqs_fixup();						\
 	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)	\
 						== NOTIFY_STOP)		\
 		return;							\
-- 
cgit v1.2.3


From 1da2e3d679a8ea2d9e82040359a706da0bd3bef6 Mon Sep 17 00:00:00 2001
From: Stas Sergeev <stsp@aknet.ru>
Date: Thu, 12 Jun 2008 15:21:54 -0700
Subject: provide rtc_cmos platform device

Recently (around 2.6.25) I've noticed that RTC no longer works for me.  It
turned out this is because I use pnpacpi=off kernel option to work around
the parport_pc bugs.  I always did so, but RTC used to work fine in the
past, and now it have regressed.

The patch fixes the problem by creating the platform device for the RTC
when PNP is disabled.  This may also help running the PNP-enabled kernel
on an older PCs.

Signed-off-by: Stas Sergeev <stsp@aknet.ru>
Cc: David Brownell <david-b@pacbell.net>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Adam Belay <ambx1@neo.rr.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/rtc.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 9615eee9b77..05191bbc68b 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -4,6 +4,8 @@
 #include <linux/acpi.h>
 #include <linux/bcd.h>
 #include <linux/mc146818rtc.h>
+#include <linux/platform_device.h>
+#include <linux/pnp.h>
 
 #include <asm/time.h>
 #include <asm/vsyscall.h>
@@ -197,3 +199,35 @@ unsigned long long native_read_tsc(void)
 }
 EXPORT_SYMBOL(native_read_tsc);
 
+
+static struct resource rtc_resources[] = {
+	[0] = {
+		.start	= RTC_PORT(0),
+		.end	= RTC_PORT(1),
+		.flags	= IORESOURCE_IO,
+	},
+	[1] = {
+		.start	= RTC_IRQ,
+		.end	= RTC_IRQ,
+		.flags	= IORESOURCE_IRQ,
+	}
+};
+
+static struct platform_device rtc_device = {
+	.name		= "rtc_cmos",
+	.id		= -1,
+	.resource	= rtc_resources,
+	.num_resources	= ARRAY_SIZE(rtc_resources),
+};
+
+static __init int add_rtc_cmos(void)
+{
+#ifdef CONFIG_PNP
+	if (!pnp_platform_devices)
+		platform_device_register(&rtc_device);
+#else
+	platform_device_register(&rtc_device);
+#endif /* CONFIG_PNP */
+	return 0;
+}
+device_initcall(add_rtc_cmos);
-- 
cgit v1.2.3


From 75118a82e21cafb4a82b53bb85d1c7689787e046 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Fri, 13 Jun 2008 15:47:12 -0700
Subject: x86: fix NULL pointer deref in __switch_to

Patrick McHardy reported a crash:

> > I get this oops once a day, its apparently triggered by something
> > run by cron, but the process is a different one each time.
> >
> > Kernel is -git from yesterday shortly before the -rc6 release
> > (last commit is the usb-2.6 merge, the x86 patches are missing),
> > .config is attached.
> >
> > I'll retry with current -git, but the patches that have gone in
> > since I last updated don't look related.
> >
> > [62060.043009] BUG: unable to handle kernel NULL pointer dereference at
> > 000001ff
> > [62060.043009] IP: [<c0102a9b>] __switch_to+0x2f/0x118
> > [62060.043009] *pde = 00000000
> > [62060.043009] Oops: 0002 [#1] PREEMPT

Vegard Nossum analyzed it:

> This decodes to
>
>    0:   0f ae 00                fxsave (%eax)
>
> so it's related to the floating-point context. This is the exact
> location of the crash:
>
> $ addr2line -e arch/x86/kernel/process_32.o -i ab0
> include/asm/i387.h:232
> include/asm/i387.h:262
> arch/x86/kernel/process_32.c:595
>
> ...so it looks like prev_task->thread.xstate->fxsave has become NULL.
> Or maybe it never had any other value.

Somehow (as described below) TS_USEDFPU is set but the fpu is not
allocated or freed.

Another possible FPU pre-emption issue with the sleazy FPU optimization
which was benign before but not so anymore, with the dynamic FPU allocation
patch.

New task is getting exec'd and it is prempted at the below point.

flush_thread() {
	...
	/*
	* Forget coprocessor state..
	*/
	clear_fpu(tsk);
		<----- Preemption point
	clear_used_math();
	...
}

Now when it context switches in again, as the used_math() is still set
and fpu_counter can be > 5, we will do a math_state_restore() which sets
the task's TS_USEDFPU. After it continues from the above preemption point
it does clear_used_math() and much later free_thread_xstate().

Now, at the next context switch, it is quite possible that xstate is
null, used_math() is not set and TS_USEDFPU is still set. This will
trigger unlazy_fpu() causing kernel oops.

Fix this  by clearing tsk's fpu_counter before clearing task's fpu.

Reported-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/process_32.c | 1 +
 arch/x86/kernel/process_64.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 6d5483356e7..e2db9ac5c61 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -333,6 +333,7 @@ void flush_thread(void)
 	/*
 	 * Forget coprocessor state..
 	 */
+	tsk->fpu_counter = 0;
 	clear_fpu(tsk);
 	clear_used_math();
 }
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ac54ff56df8..c6eb5c91e5f 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -294,6 +294,7 @@ void flush_thread(void)
 	/*
 	 * Forget coprocessor state..
 	 */
+	tsk->fpu_counter = 0;
 	clear_fpu(tsk);
 	clear_used_math();
 }
-- 
cgit v1.2.3


From df17b1d990fc214f033c5588e58216ec941591e0 Mon Sep 17 00:00:00 2001
From: Mikael Pettersson <mikpe@it.uu.se>
Date: Sun, 15 Jun 2008 02:19:56 +0200
Subject: x86, 32-bit: fix boot failure on TSC-less processors

Booting 2.6.26-rc6 on my 486 DX/4 fails with a "BUG: Int 6"
(invalid opcode) and a kernel halt immediately after the
kernel has been uncompressed. The BUG shows EIP pointing
to an rdtsc instruction in native_read_tsc(), invoked from
native_sched_clock().

(This error occurs so early that not even the serial console
can capture it.)

A bisection showed that this bug first occurs in 2.6.26-rc3-git7,
via commit 9ccc906c97e34fd91dc6aaf5b69b52d824386910:

>x86: distangle user disabled TSC from unstable
>
>tsc_enabled is set to 0 from the command line switch "notsc" and from
>the mark_tsc_unstable code. Seperate those functionalities and replace
>tsc_enable with tsc_disable. This makes also the native_sched_clock()
>decision when to use TSC understandable.
>
>Preparatory patch to solve the sched_clock() issue on 32 bit.
>
>Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

The core reason for this bug is that native_sched_clock() gets
called before tsc_init().

Before the commit above, tsc_32.c used a "tsc_enabled" variable
which defaulted to 0 == disabled, and which only got enabled late
in tsc_init(). Thus early calls to native_sched_clock() would skip
the TSC and use jiffies instead.

After the commit above, tsc_32.c uses a "tsc_disabled" variable
which defaults to 0, meaning that the TSC is Ok to use. Early calls
to native_sched_clock() now erroneously try to use the TSC on
!cpu_has_tsc processors, leading to invalid opcode exceptions.

My proposed fix is to initialise tsc_disabled to a "soft disabled"
state distinct from the hard disabled state set up by the "notsc"
kernel option. This fixes the native_sched_clock() problem. It also
allows tsc_init() to be simplified: instead of setting tsc_disabled = 1
on every error return, we just set tsc_disabled = 0 once when all
checks have succeeded.

I've verified that this lets my 486 boot again. I've also verified
that a Core2 machine still uses the TSC as clocksource after the patch.

Signed-off-by: Mikael Pettersson <mikpe@it.uu.se>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/tsc_32.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index 068759db63d..65b70637ad9 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -14,7 +14,10 @@
 
 #include "mach_timer.h"
 
-static int tsc_disabled;
+/* native_sched_clock() is called before tsc_init(), so
+   we must start with the TSC soft disabled to prevent
+   erroneous rdtsc usage on !cpu_has_tsc processors */
+static int tsc_disabled = -1;
 
 /*
  * On some systems the TSC frequency does not
@@ -402,25 +405,20 @@ void __init tsc_init(void)
 {
 	int cpu;
 
-	if (!cpu_has_tsc || tsc_disabled) {
-		/* Disable the TSC in case of !cpu_has_tsc */
-		tsc_disabled = 1;
+	if (!cpu_has_tsc || tsc_disabled > 0)
 		return;
-	}
 
 	cpu_khz = calculate_cpu_khz();
 	tsc_khz = cpu_khz;
 
 	if (!cpu_khz) {
 		mark_tsc_unstable("could not calculate TSC khz");
-		/*
-		 * We need to disable the TSC completely in this case
-		 * to prevent sched_clock() from using it.
-		 */
-		tsc_disabled = 1;
 		return;
 	}
 
+	/* now allow native_sched_clock() to use rdtsc */
+	tsc_disabled = 0;
+
 	printk("Detected %lu.%03lu MHz processor.\n",
 				(unsigned long)cpu_khz / 1000,
 				(unsigned long)cpu_khz % 1000);
-- 
cgit v1.2.3


From d3942cff620bea073fc4e3c8ed878eb1e84615ce Mon Sep 17 00:00:00 2001
From: Bernhard Walle <bwalle@suse.de>
Date: Sun, 8 Jun 2008 16:16:07 +0200
Subject: x86: use BOOTMEM_EXCLUSIVE on 32-bit

This patch uses the BOOTMEM_EXCLUSIVE for crashkernel reservation also for
i386 and prints a error message on failure.

The patch is still for 2.6.26 since it is only bug fixing. The unification
of reserve_crashkernel() between i386 and x86_64 should be done for 2.6.27.

Signed-off-by: Bernhard Walle <bwalle@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: <stable@kernel.org>
---
 arch/x86/kernel/setup_32.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 2c5f8b213e8..5a2f8e06388 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -532,10 +532,16 @@ static void __init reserve_crashkernel(void)
 					(unsigned long)(crash_size >> 20),
 					(unsigned long)(crash_base >> 20),
 					(unsigned long)(total_mem >> 20));
+
+			if (reserve_bootmem(crash_base, crash_size,
+					BOOTMEM_EXCLUSIVE) < 0) {
+				printk(KERN_INFO "crashkernel reservation "
+					"failed - memory is in use\n");
+				return;
+			}
+
 			crashk_res.start = crash_base;
 			crashk_res.end   = crash_base + crash_size - 1;
-			reserve_bootmem(crash_base, crash_size,
-					BOOTMEM_DEFAULT);
 		} else
 			printk(KERN_INFO "crashkernel reservation failed - "
 					"you have to specify a base address\n");
-- 
cgit v1.2.3


From ffe6e1da86d21d7855495b5a772c93f050258f6e Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jordan.crouse@amd.com>
Date: Wed, 18 Jun 2008 11:34:38 -0600
Subject: x86, geode: add a VSA2 ID for General Software

General Software writes their own VSA2 module for their version
of the Geode BIOS, which returns a different ID then the standard
VSA2.  This was causing the framebuffer driver to break for most
GSW boards.

Signed-off-by: Jordan Crouse <jordan.crouse@amd.com>
Cc: tglx@linutronix.de
Cc: linux-geode@lists.infradead.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/geode_32.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c
index e8edd63ab00..9b08e852fd1 100644
--- a/arch/x86/kernel/geode_32.c
+++ b/arch/x86/kernel/geode_32.c
@@ -166,6 +166,8 @@ int geode_has_vsa2(void)
 	static int has_vsa2 = -1;
 
 	if (has_vsa2 == -1) {
+		u16 val;
+
 		/*
 		 * The VSA has virtual registers that we can query for a
 		 * signature.
@@ -173,7 +175,8 @@ int geode_has_vsa2(void)
 		outw(VSA_VR_UNLOCK, VSA_VRC_INDEX);
 		outw(VSA_VR_SIGNATURE, VSA_VRC_INDEX);
 
-		has_vsa2 = (inw(VSA_VRC_DATA) == VSA_SIG);
+		val = inw(VSA_VRC_DATA);
+		has_vsa2 = (val == AMD_VSA_SIG || val == GSW_VSA_SIG);
 	}
 
 	return has_vsa2;
-- 
cgit v1.2.3


From 7af192c954017499ec163bc9dbaaee2e593d7ef2 Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Tue, 3 Jun 2008 16:17:29 +0200
Subject: x86: Add structs and functions for paravirt clocksource

This patch adds structs for the paravirt clocksource ABI
used by both xen and kvm (pvclock-abi.h).

It also adds some helper functions to read system time and
wall clock time from a paravirtual clocksource (pvclock.[ch]).
They are based on the xen code.  They are enabled using
CONFIG_PARAVIRT_CLOCK.

Subsequent patches of this series will put the code in use.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kernel/Makefile  |   1 +
 arch/x86/kernel/pvclock.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 142 insertions(+)
 create mode 100644 arch/x86/kernel/pvclock.c

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5e618c3b472..77807d4769c 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -82,6 +82,7 @@ obj-$(CONFIG_VMI)		+= vmi_32.o vmiclock_32.o
 obj-$(CONFIG_KVM_GUEST)		+= kvm.o
 obj-$(CONFIG_KVM_CLOCK)		+= kvmclock.o
 obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT_CLOCK)	+= pvclock.o
 
 obj-$(CONFIG_PCSPKR_PLATFORM)	+= pcspeaker.o
 
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
new file mode 100644
index 00000000000..05fbe9a0325
--- /dev/null
+++ b/arch/x86/kernel/pvclock.c
@@ -0,0 +1,141 @@
+/*  paravirtual clock -- common code used by kvm/xen
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <asm/pvclock.h>
+
+/*
+ * These are perodically updated
+ *    xen: magic shared_info page
+ *    kvm: gpa registered via msr
+ * and then copied here.
+ */
+struct pvclock_shadow_time {
+	u64 tsc_timestamp;     /* TSC at last update of time vals.  */
+	u64 system_timestamp;  /* Time, in nanosecs, since boot.    */
+	u32 tsc_to_nsec_mul;
+	int tsc_shift;
+	u32 version;
+};
+
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
+{
+	u64 product;
+#ifdef __i386__
+	u32 tmp1, tmp2;
+#endif
+
+	if (shift < 0)
+		delta >>= -shift;
+	else
+		delta <<= shift;
+
+#ifdef __i386__
+	__asm__ (
+		"mul  %5       ; "
+		"mov  %4,%%eax ; "
+		"mov  %%edx,%4 ; "
+		"mul  %5       ; "
+		"xor  %5,%5    ; "
+		"add  %4,%%eax ; "
+		"adc  %5,%%edx ; "
+		: "=A" (product), "=r" (tmp1), "=r" (tmp2)
+		: "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
+#elif __x86_64__
+	__asm__ (
+		"mul %%rdx ; shrd $32,%%rdx,%%rax"
+		: "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
+#else
+#error implement me!
+#endif
+
+	return product;
+}
+
+static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
+{
+	u64 delta = native_read_tsc() - shadow->tsc_timestamp;
+	return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
+}
+
+/*
+ * Reads a consistent set of time-base values from hypervisor,
+ * into a shadow data area.
+ */
+static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
+					struct pvclock_vcpu_time_info *src)
+{
+	do {
+		dst->version = src->version;
+		rmb();		/* fetch version before data */
+		dst->tsc_timestamp     = src->tsc_timestamp;
+		dst->system_timestamp  = src->system_time;
+		dst->tsc_to_nsec_mul   = src->tsc_to_system_mul;
+		dst->tsc_shift         = src->tsc_shift;
+		rmb();		/* test version after fetching data */
+	} while ((src->version & 1) || (dst->version != src->version));
+
+	return dst->version;
+}
+
+cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
+{
+	struct pvclock_shadow_time shadow;
+	unsigned version;
+	cycle_t ret, offset;
+
+	do {
+		version = pvclock_get_time_values(&shadow, src);
+		barrier();
+		offset = pvclock_get_nsec_offset(&shadow);
+		ret = shadow.system_timestamp + offset;
+		barrier();
+	} while (version != src->version);
+
+	return ret;
+}
+
+void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
+			    struct pvclock_vcpu_time_info *vcpu_time,
+			    struct timespec *ts)
+{
+	u32 version;
+	u64 delta;
+	struct timespec now;
+
+	/* get wallclock at system boot */
+	do {
+		version = wall_clock->version;
+		rmb();		/* fetch version before time */
+		now.tv_sec  = wall_clock->sec;
+		now.tv_nsec = wall_clock->nsec;
+		rmb();		/* fetch time before checking version */
+	} while ((wall_clock->version & 1) || (version != wall_clock->version));
+
+	delta = pvclock_clocksource_read(vcpu_time);	/* time since system boot */
+	delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
+
+	now.tv_nsec = do_div(delta, NSEC_PER_SEC);
+	now.tv_sec = delta;
+
+	set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
+}
-- 
cgit v1.2.3


From f6e16d5ad463d15f285666f588cfe49495c692d9 Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Tue, 3 Jun 2008 16:17:32 +0200
Subject: x86: KVM guest: Use the paravirt clocksource structs and functions

This patch updates the kvm host code to use the pvclock structs
and functions, thereby making it compatible with Xen.

The patch also fixes an initialization bug: on SMP systems the
per-cpu has two different locations early at boot and after CPU
bringup.  kvmclock must take that in account when registering the
physical address within the host.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kernel/kvmclock.c | 89 +++++++++++++++++-----------------------------
 1 file changed, 33 insertions(+), 56 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 08a30986d47..87edf1ceb1d 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -18,6 +18,7 @@
 
 #include <linux/clocksource.h>
 #include <linux/kvm_para.h>
+#include <asm/pvclock.h>
 #include <asm/arch_hooks.h>
 #include <asm/msr.h>
 #include <asm/apic.h>
@@ -36,18 +37,9 @@ static int parse_no_kvmclock(char *arg)
 early_param("no-kvmclock", parse_no_kvmclock);
 
 /* The hypervisor will put information about time periodically here */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock);
-#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock);
+static struct pvclock_wall_clock wall_clock;
 
-static inline u64 kvm_get_delta(u64 last_tsc)
-{
-	int cpu = smp_processor_id();
-	u64 delta = native_read_tsc() - last_tsc;
-	return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
-}
-
-static struct kvm_wall_clock wall_clock;
-static cycle_t kvm_clock_read(void);
 /*
  * The wallclock is the time of day when we booted. Since then, some time may
  * have elapsed since the hypervisor wrote the data. So we try to account for
@@ -55,64 +47,37 @@ static cycle_t kvm_clock_read(void);
  */
 static unsigned long kvm_get_wallclock(void)
 {
-	u32 wc_sec, wc_nsec;
-	u64 delta;
+	struct pvclock_vcpu_time_info *vcpu_time;
 	struct timespec ts;
-	int version, nsec;
 	int low, high;
 
 	low = (int)__pa(&wall_clock);
 	high = ((u64)__pa(&wall_clock) >> 32);
+	native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
 
-	delta = kvm_clock_read();
+	vcpu_time = &get_cpu_var(hv_clock);
+	pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
+	put_cpu_var(hv_clock);
 
-	native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
-	do {
-		version = wall_clock.wc_version;
-		rmb();
-		wc_sec = wall_clock.wc_sec;
-		wc_nsec = wall_clock.wc_nsec;
-		rmb();
-	} while ((wall_clock.wc_version != version) || (version & 1));
-
-	delta = kvm_clock_read() - delta;
-	delta += wc_nsec;
-	nsec = do_div(delta, NSEC_PER_SEC);
-	set_normalized_timespec(&ts, wc_sec + delta, nsec);
-	/*
-	 * Of all mechanisms of time adjustment I've tested, this one
-	 * was the champion!
-	 */
-	return ts.tv_sec + 1;
+	return ts.tv_sec;
 }
 
 static int kvm_set_wallclock(unsigned long now)
 {
-	return 0;
+	return -1;
 }
 
-/*
- * This is our read_clock function. The host puts an tsc timestamp each time
- * it updates a new time. Without the tsc adjustment, we can have a situation
- * in which a vcpu starts to run earlier (smaller system_time), but probes
- * time later (compared to another vcpu), leading to backwards time
- */
 static cycle_t kvm_clock_read(void)
 {
-	u64 last_tsc, now;
-	int cpu;
+	struct pvclock_vcpu_time_info *src;
+	cycle_t ret;
 
-	preempt_disable();
-	cpu = smp_processor_id();
-
-	last_tsc = get_clock(cpu, tsc_timestamp);
-	now = get_clock(cpu, system_time);
-
-	now += kvm_get_delta(last_tsc);
-	preempt_enable();
-
-	return now;
+	src = &get_cpu_var(hv_clock);
+	ret = pvclock_clocksource_read(src);
+	put_cpu_var(hv_clock);
+	return ret;
 }
+
 static struct clocksource kvm_clock = {
 	.name = "kvm-clock",
 	.read = kvm_clock_read,
@@ -123,13 +88,14 @@ static struct clocksource kvm_clock = {
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static int kvm_register_clock(void)
+static int kvm_register_clock(char *txt)
 {
 	int cpu = smp_processor_id();
 	int low, high;
 	low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
 	high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
-
+	printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
+	       cpu, high, low, txt);
 	return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
 }
 
@@ -140,12 +106,20 @@ static void kvm_setup_secondary_clock(void)
 	 * Now that the first cpu already had this clocksource initialized,
 	 * we shouldn't fail.
 	 */
-	WARN_ON(kvm_register_clock());
+	WARN_ON(kvm_register_clock("secondary cpu clock"));
 	/* ok, done with our trickery, call native */
 	setup_secondary_APIC_clock();
 }
 #endif
 
+#ifdef CONFIG_SMP
+void __init kvm_smp_prepare_boot_cpu(void)
+{
+	WARN_ON(kvm_register_clock("primary cpu clock"));
+	native_smp_prepare_boot_cpu();
+}
+#endif
+
 /*
  * After the clock is registered, the host will keep writing to the
  * registered memory location. If the guest happens to shutdown, this memory
@@ -174,13 +148,16 @@ void __init kvmclock_init(void)
 		return;
 
 	if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
-		if (kvm_register_clock())
+		if (kvm_register_clock("boot clock"))
 			return;
 		pv_time_ops.get_wallclock = kvm_get_wallclock;
 		pv_time_ops.set_wallclock = kvm_set_wallclock;
 		pv_time_ops.sched_clock = kvm_clock_read;
 #ifdef CONFIG_X86_LOCAL_APIC
 		pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
+#endif
+#ifdef CONFIG_SMP
+		smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
 #endif
 		machine_ops.shutdown  = kvm_shutdown;
 #ifdef CONFIG_KEXEC
-- 
cgit v1.2.3