From 58d9ce7d751fa11c6c8ea5dcd4d63b320aae1363 Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Mon, 22 Jan 2007 20:40:34 -0800
Subject: [PATCH] Revert nmi_known_cpu() check during boot option parsing

Commit f2802e7f571c05f9a901b1f5bd144aa730ccc88e and its x86 version
(b7471c6da94d30d3deadc55986cc38d1ff57f9ca) adds nmi_known_cpu() check
while parsing boot options in x86_64 and i386.

With that, "nmi_watchdog=2" stops working for me on Intel Core 2 CPU
based system.

The problem is, setup_nmi_watchdog is called while parsing the boot
option and identify_cpu is not done yet.  So, the return value of
nmi_known_cpu() is not valid at this point.

So revert that check.  This should not have any adverse effect as the
nmi_known_cpu() check is done again later in enable_lapic_nmi_watchdog().

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/i386/kernel/nmi.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'arch/i386/kernel')

diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index a5e34d65596..1a6f8bb8881 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -310,13 +310,7 @@ static int __init setup_nmi_watchdog(char *str)
 
 	if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
 		return 0;
-	/*
-	 * If any other x86 CPU has a local APIC, then
-	 * please test the NMI stuff there and send me the
-	 * missing bits. Right now Intel P6/P4 and AMD K7 only.
-	 */
-	if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0))
-		return 0;  /* no lapic support */
+
 	nmi_watchdog = nmi;
 	return 1;
 }
-- 
cgit v1.2.3


From 0dbe5a111382fd1320ff4b1d889e5b8c41290619 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 22 Jan 2007 20:40:36 -0800
Subject: [PATCH] paravirt: mark the paravirt_ops export internal

The paravirt subsystem is still in flux so all exports from it are
definitely internal use only.  The APIs around this /will/ change.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Andi Kleen <ak@suse.de>
Cc: Zachary Amsden <zach@vmware.com>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/i386/kernel/paravirt.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'arch/i386/kernel')

diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c
index 3dceab5828f..e55fd05da0f 100644
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -566,4 +566,11 @@ struct paravirt_ops paravirt_ops = {
 	.irq_enable_sysexit = native_irq_enable_sysexit,
 	.iret = native_iret,
 };
-EXPORT_SYMBOL(paravirt_ops);
+
+/*
+ * NOTE: CONFIG_PARAVIRT is experimental and the paravirt_ops
+ * semantics are subject to change. Hence we only do this
+ * internal-only export of this, until it gets sorted out and
+ * all lowlevel CPU ops used by modules are separately exported.
+ */
+EXPORT_SYMBOL_GPL(paravirt_ops);
-- 
cgit v1.2.3


From a1f3bb9ae4497a2ed3eac773fd7798ac33a0371f Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 26 Jan 2007 00:56:46 -0800
Subject: [PATCH] Fix CONFIG_COMPAT_VDSO

I wouldn't mind if CONFIG_COMPAT_VDSO went away entirely.  But if it's there,
it should work properly.  Currently it's quite haphazard: both real vma and
fixmap are mapped, both are put in the two different AT_* slots, sysenter
returns to the vma address rather than the fixmap address, and core dumps yet
are another story.

This patch makes CONFIG_COMPAT_VDSO disable the real vma and use the fixmap
area consistently.  This makes it actually compatible with what the old vdso
implementation did.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/i386/kernel/entry.S    | 4 ++++
 arch/i386/kernel/sysenter.c | 2 ++
 2 files changed, 6 insertions(+)

(limited to 'arch/i386/kernel')

diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 06461b8b715..5e47683fc63 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -302,12 +302,16 @@ sysenter_past_esp:
 	pushl $(__USER_CS)
 	CFI_ADJUST_CFA_OFFSET 4
 	/*CFI_REL_OFFSET cs, 0*/
+#ifndef CONFIG_COMPAT_VDSO
 	/*
 	 * Push current_thread_info()->sysenter_return to the stack.
 	 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
 	 * pushed above; +8 corresponds to copy_thread's esp0 setting.
 	 */
 	pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
+#else
+	pushl $SYSENTER_RETURN
+#endif
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_REL_OFFSET eip, 0
 
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index 7de9117b5a3..454d12df59e 100644
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -100,6 +100,7 @@ int __init sysenter_setup(void)
 	return 0;
 }
 
+#ifndef CONFIG_COMPAT_VDSO
 static struct page *syscall_nopage(struct vm_area_struct *vma,
 				unsigned long adr, int *type)
 {
@@ -187,3 +188,4 @@ int in_gate_area_no_task(unsigned long addr)
 {
 	return 0;
 }
+#endif
-- 
cgit v1.2.3


From f47aef55d9a18945fcdd7fd6bf01121ce973b91b Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 26 Jan 2007 00:56:49 -0800
Subject: [PATCH] i386 vDSO: use VM_ALWAYSDUMP

This patch fixes core dumps to include the vDSO vma, which is left out now.
It removes the special-case core writing macros, which were not doing the
right thing for the vDSO vma anyway.  Instead, it uses VM_ALWAYSDUMP in the
vma; there is no need for the fixmap page to be installed.  It handles the
CONFIG_COMPAT_VDSO case by making elf_core_dump use the fake vma from
get_gate_vma after real vmas in the same way the /proc/PID/maps code does.

This changes core dumps so they no longer include the non-PT_LOAD phdrs from
the vDSO.  I made the change to add them in the first place, but in turned out
that nothing ever wanted them there since the advent of NT_AUXV.  It's cleaner
to leave them out, and just let the phdrs inside the vDSO image speak for
themselves.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/i386/kernel/sysenter.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'arch/i386/kernel')

diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index 454d12df59e..5da744204d1 100644
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -79,11 +79,6 @@ int __init sysenter_setup(void)
 #ifdef CONFIG_COMPAT_VDSO
 	__set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY);
 	printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
-#else
-	/*
-	 * In the non-compat case the ELF coredumping code needs the fixmap:
-	 */
-	__set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_KERNEL_RO);
 #endif
 
 	if (!boot_cpu_has(X86_FEATURE_SEP)) {
@@ -147,6 +142,13 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
 	vma->vm_end = addr + PAGE_SIZE;
 	/* MAYWRITE to allow gdb to COW and set breakpoints */
 	vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
+	/*
+	 * Make sure the vDSO gets into every core dump.
+	 * Dumping its contents makes post-mortem fully interpretable later
+	 * without matching up the same kernel and hardware config to see
+	 * what PC values meant.
+	 */
+	vma->vm_flags |= VM_ALWAYSDUMP;
 	vma->vm_flags |= mm->def_flags;
 	vma->vm_page_prot = protection_map[vma->vm_flags & 7];
 	vma->vm_ops = &syscall_vm_ops;
-- 
cgit v1.2.3


From 3453c8478a228d9b374956ea99256163f1a0c88c Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Mon, 29 Jan 2007 00:07:04 -0500
Subject: [CPUFREQ] Remove unneeded errata workaround from p4-clockmod.

This workaround unnecessarily cripples functionality to work
around an errata that doesn't seem possible to hit due to
us using the automatic clock throttling in the p4 mcheck code.

See http://lkml.org/lkml/2006/10/28/148 for complete reasoning
and lack of disconsent.

Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/i386/kernel/cpu/cpufreq/p4-clockmod.c | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'arch/i386/kernel')

diff --git a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c
index bec50170b75..4786fedca6e 100644
--- a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c
@@ -51,7 +51,6 @@ enum {
 
 
 static int has_N44_O17_errata[NR_CPUS];
-static int has_N60_errata[NR_CPUS];
 static unsigned int stock_freq;
 static struct cpufreq_driver p4clockmod_driver;
 static unsigned int cpufreq_p4_get(unsigned int cpu);
@@ -224,12 +223,6 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
 	case 0x0f12:
 		has_N44_O17_errata[policy->cpu] = 1;
 		dprintk("has errata -- disabling low frequencies\n");
-		break;
-
-	case 0x0f29:
-		has_N60_errata[policy->cpu] = 1;
-		dprintk("has errata -- disabling frequencies lower than 2ghz\n");
-		break;
 	}
 
 	/* get max frequency */
@@ -241,8 +234,6 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
 	for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) {
 		if ((i<2) && (has_N44_O17_errata[policy->cpu]))
 			p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID;
-		else if (has_N60_errata[policy->cpu] && ((stock_freq * i)/8) < 2000000)
-			p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID;
 		else
 			p4clockmod_table[i].frequency = (stock_freq * i)/8;
 	}
-- 
cgit v1.2.3


From 8339f0008c47cdd921c73f6d53d5588b5484f93c Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 29 Jan 2007 13:19:05 -0700
Subject: [PATCH] i386: In assign_irq_vector look at all vectors before giving
 up

When the world was a simple and static place setting up irqs was easy.
It sufficed to allocate a linux irq number and a find a free cpu
vector we could receive that linux irq on.  In those days it was
a safe assumption that any allocated vector was actually in use
so after one global pass through all of the vectors we would have
none left.

These days things are much more dynamic with interrupt controllers
(in the form of MSI or MSI-X) appearing on plug in cards and linux
irqs appearing and disappearing.  As these irqs come and go vectors
are allocated and freed,  invalidating the ancient assumption that all
allocated vectors stayed in use forever.

So this patch modifies the vector allocator to walk through every
possible vector before giving up, and to check to see if a vector
is in use before assigning it.  With these changes we stop leaking
freed vectors and it becomes possible to allocate and free irq vectors
all day long.

This changed was modeled after the vector allocator on x86_64 where
this limitation has already been removed.  In essence we don't update
the static variables that hold the position of the last vector we
allocated until have successfully allocated another vector.  This
allows us to detect if we have completed one complete scan through
all of the possible vectors.

Acked-by: Auke Kok <auke-jan.h.kok@intel.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/i386/kernel/io_apic.c | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

(limited to 'arch/i386/kernel')

diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 2424cc9c7b3..6a3875f81a0 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -1227,26 +1227,32 @@ static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }
 
 static int __assign_irq_vector(int irq)
 {
-	static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
-	int vector;
+	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
+	int vector, offset, i;
 
 	BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
 
 	if (irq_vector[irq] > 0)
 		return irq_vector[irq];
 
-	current_vector += 8;
-	if (current_vector == SYSCALL_VECTOR)
-		current_vector += 8;
-
-	if (current_vector >= FIRST_SYSTEM_VECTOR) {
-		offset++;
-		if (!(offset % 8))
-			return -ENOSPC;
-		current_vector = FIRST_DEVICE_VECTOR + offset;
-	}
-
 	vector = current_vector;
+	offset = current_offset;
+next:
+	vector += 8;
+	if (vector >= FIRST_SYSTEM_VECTOR) {
+		offset = (offset + 1) % 8;
+		vector = FIRST_DEVICE_VECTOR + offset;
+	}
+	if (vector == current_vector)
+		return -ENOSPC;
+	if (vector == SYSCALL_VECTOR)
+		goto next;
+	for (i = 0; i < NR_IRQ_VECTORS; i++)
+		if (irq_vector[i] == vector)
+			goto next;
+
+	current_vector = vector;
+	current_offset = offset;
 	irq_vector[irq] = vector;
 
 	return vector;
-- 
cgit v1.2.3


From 435f8a605d3b56bb96212f4d70b62ecbd0629340 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@woody.linux-foundation.org>
Date: Fri, 2 Feb 2007 08:07:42 -0800
Subject: Revert "[PATCH] fix typo in geode_configre()@cyrix.c"

This reverts commit e4f0ae0ea63caceff37a13f281a72652b7ea71ba.

It's not wrong, but it's not right either, and everybody seems to agree
that the right fix is probably to do the ccr3 write after the ccr4 one
(and that we also should clean it up a bit).  And after that we need to
really validate that all the bits that we write to ccr4 actually do
work.

The old 2.6.19 code was insane, and basically didn't change ccr4 at all
(even though it certainly looks like it was the *intent* to do so).  So
let's revert the change that may fix things, just because it's not what
was actually ever tested when the code was written, even if it _was_ the
intent.

There's a discussion on http://lkml.org/lkml/2007/1/9/63 that was
started by the patch that now gets reverted, and that discussion may
well contain the proper long-term fix.

Suggested-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/i386/kernel/cpu/cyrix.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/i386/kernel')

diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c
index abcff92f994..c0c3b59de32 100644
--- a/arch/i386/kernel/cpu/cyrix.c
+++ b/arch/i386/kernel/cpu/cyrix.c
@@ -173,7 +173,7 @@ static void __cpuinit geode_configure(void)
 	ccr4 = getCx86(CX86_CCR4);
 	ccr4 |= 0x38;		/* FPU fast, DTE cache, Mem bypass */
 	
-	setCx86(CX86_CCR4, ccr4);
+	setCx86(CX86_CCR3, ccr3);
 	
 	set_cx86_memwb();
 	set_cx86_reorder();	
-- 
cgit v1.2.3


From 40c373cc3af9720d1cec0e32c3da26b1d220a95b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Riss?= <frederic.riss@gmail.com>
Date: Tue, 30 Jan 2007 21:41:17 +0100
Subject: [PATCH] EFI x86: pass firmware call parameters on the stack

When calling into the EFI firmware, the parameters need to be passed on
the stack. The recent change to use -mregparm=3 breaks x86 EFI support.
This patch is needed to allow the new Intel-based Macs to suspend to ram
(efi.get_time is called during the suspend phase).

Signed-off-by: Frederic Riss <frederic.riss@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/i386/kernel/efi.c | 89 +++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 73 insertions(+), 16 deletions(-)

(limited to 'arch/i386/kernel')

diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c
index b92c7f0a358..8f9c624ace6 100644
--- a/arch/i386/kernel/efi.c
+++ b/arch/i386/kernel/efi.c
@@ -472,6 +472,70 @@ static inline void __init check_range_for_systab(efi_memory_desc_t *md)
 	}
 }
 
+/*
+ * Wrap all the virtual calls in a way that forces the parameters on the stack.
+ */
+
+#define efi_call_virt(f, args...) \
+     ((efi_##f##_t __attribute__((regparm(0)))*)efi.systab->runtime->f)(args)
+
+static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
+{
+	return efi_call_virt(get_time, tm, tc);
+}
+
+static efi_status_t virt_efi_set_time (efi_time_t *tm)
+{
+	return efi_call_virt(set_time, tm);
+}
+
+static efi_status_t virt_efi_get_wakeup_time (efi_bool_t *enabled,
+					      efi_bool_t *pending,
+					      efi_time_t *tm)
+{
+	return efi_call_virt(get_wakeup_time, enabled, pending, tm);
+}
+
+static efi_status_t virt_efi_set_wakeup_time (efi_bool_t enabled,
+					      efi_time_t *tm)
+{
+	return efi_call_virt(set_wakeup_time, enabled, tm);
+}
+
+static efi_status_t virt_efi_get_variable (efi_char16_t *name,
+					   efi_guid_t *vendor, u32 *attr,
+					   unsigned long *data_size, void *data)
+{
+	return efi_call_virt(get_variable, name, vendor, attr, data_size, data);
+}
+
+static efi_status_t virt_efi_get_next_variable (unsigned long *name_size,
+						efi_char16_t *name,
+						efi_guid_t *vendor)
+{
+	return efi_call_virt(get_next_variable, name_size, name, vendor);
+}
+
+static efi_status_t virt_efi_set_variable (efi_char16_t *name,
+					   efi_guid_t *vendor,
+					   unsigned long attr,
+					   unsigned long data_size, void *data)
+{
+	return efi_call_virt(set_variable, name, vendor, attr, data_size, data);
+}
+
+static efi_status_t virt_efi_get_next_high_mono_count (u32 *count)
+{
+	return efi_call_virt(get_next_high_mono_count, count);
+}
+
+static void virt_efi_reset_system (int reset_type, efi_status_t status,
+				   unsigned long data_size,
+				   efi_char16_t *data)
+{
+	efi_call_virt(reset_system, reset_type, status, data_size, data);
+}
+
 /*
  * This function will switch the EFI runtime services to virtual mode.
  * Essentially, look through the EFI memmap and map every region that
@@ -525,22 +589,15 @@ void __init efi_enter_virtual_mode(void)
 	 * pointers in the runtime service table to the new virtual addresses.
 	 */
 
-	efi.get_time = (efi_get_time_t *) efi.systab->runtime->get_time;
-	efi.set_time = (efi_set_time_t *) efi.systab->runtime->set_time;
-	efi.get_wakeup_time = (efi_get_wakeup_time_t *)
-					efi.systab->runtime->get_wakeup_time;
-	efi.set_wakeup_time = (efi_set_wakeup_time_t *)
-					efi.systab->runtime->set_wakeup_time;
-	efi.get_variable = (efi_get_variable_t *)
-					efi.systab->runtime->get_variable;
-	efi.get_next_variable = (efi_get_next_variable_t *)
-					efi.systab->runtime->get_next_variable;
-	efi.set_variable = (efi_set_variable_t *)
-					efi.systab->runtime->set_variable;
-	efi.get_next_high_mono_count = (efi_get_next_high_mono_count_t *)
-					efi.systab->runtime->get_next_high_mono_count;
-	efi.reset_system = (efi_reset_system_t *)
-					efi.systab->runtime->reset_system;
+	efi.get_time = virt_efi_get_time;
+	efi.set_time = virt_efi_set_time;
+	efi.get_wakeup_time = virt_efi_get_wakeup_time;
+	efi.set_wakeup_time = virt_efi_set_wakeup_time;
+	efi.get_variable = virt_efi_get_variable;
+	efi.get_next_variable = virt_efi_get_next_variable;
+	efi.set_variable = virt_efi_set_variable;
+	efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
+	efi.reset_system = virt_efi_reset_system;
 }
 
 void __init
-- 
cgit v1.2.3