From 8edc5cc5ec880c96de8e6686fb0d7a5231e91c05 Mon Sep 17 00:00:00 2001
From: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Mon, 12 May 2008 15:43:34 +0200
Subject: x86: remove 6 bank limitation in 64 bit MCE reporting code

Eliminate the 6 bank restriction in 64 bit mce reporting code. This
restriction is artificial (due to static creation of sysfs files) and 32
bit code does not have any such restriction.

This change helps in reporting the details of machine checks on a
machine check exception with errors in bank 6 and above on CPUs that
support those banks. Without the patch, machine check errors in those
banks are not reported.

We still have 128 (MCE_EXTENDED_BANK) bank restriction instead of max
256 supported in hardware. That is not changed in the patch below as it
will have some user level mcelog utility dependency, with bank 128 being
used for thermal reporting currently.

The patch below does not create sysfs control (bankNctl) for banks
higher than 6 as well. That needs some pre-cleanup in /sysfs mce layout,
removal of per cpu /sysfs entries for bankctl as they are really global
system level control today. That change will follow. This basic change
is critical to report the detailed errors on banks higher than 6.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mcheck/mce_64.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index e07e8c068ae..f1f3f5e163b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -31,7 +31,7 @@
 #include <asm/idle.h>
 
 #define MISC_MCELOG_MINOR 227
-#define NR_BANKS 6
+#define NR_SYSFS_BANKS 6
 
 atomic_t mce_entry;
 
@@ -46,7 +46,7 @@ static int mce_dont_init;
  */
 static int tolerant = 1;
 static int banks;
-static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
+static unsigned long bank[NR_SYSFS_BANKS] = { [0 ... NR_SYSFS_BANKS-1] = ~0UL };
 static unsigned long notify_user;
 static int rip_msr;
 static int mce_bootlog = -1;
@@ -209,7 +209,7 @@ void do_machine_check(struct pt_regs * regs, long error_code)
 	barrier();
 
 	for (i = 0; i < banks; i++) {
-		if (!bank[i])
+		if (i < NR_SYSFS_BANKS && !bank[i])
 			continue;
 
 		m.misc = 0;
@@ -444,9 +444,10 @@ static void mce_init(void *dummy)
 
 	rdmsrl(MSR_IA32_MCG_CAP, cap);
 	banks = cap & 0xff;
-	if (banks > NR_BANKS) {
-		printk(KERN_INFO "MCE: warning: using only %d banks\n", banks);
-		banks = NR_BANKS;
+	if (banks > MCE_EXTENDED_BANK) {
+		printk(KERN_INFO "MCE: warning: using only %d banks\n",
+		       MCE_EXTENDED_BANK);
+		banks = MCE_EXTENDED_BANK;
 	}
 	/* Use accurate RIP reporting if available. */
 	if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
@@ -462,7 +463,7 @@ static void mce_init(void *dummy)
 		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
 
 	for (i = 0; i < banks; i++) {
-		wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
+		wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL);
 		wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
 	}
 }
@@ -766,7 +767,10 @@ DEFINE_PER_CPU(struct sys_device, device_mce);
 	}								\
 	static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
 
-/* TBD should generate these dynamically based on number of available banks */
+/*
+ * TBD should generate these dynamically based on number of available banks.
+ * Have only 6 contol banks in /sysfs until then.
+ */
 ACCESSOR(bank0ctl,bank[0],mce_restart())
 ACCESSOR(bank1ctl,bank[1],mce_restart())
 ACCESSOR(bank2ctl,bank[2],mce_restart())
-- 
cgit v1.2.3


From 205f93288093df69f9ab5f6981aef27b91088b28 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@codemonkey.org.uk>
Date: Mon, 5 May 2008 17:52:52 -0400
Subject: x86: add new cache descriptor

The latest rev of Intel doc AP-485 details a new cache
descriptor that we don't yet support.
A 6MB 24-way assoc L2 cache.

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/intel_cacheinfo.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 26d615dcb14..2c8afafa18e 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -62,6 +62,7 @@ static struct _cache_table cache_table[] __cpuinitdata =
 	{ 0x4b, LVL_3,      8192 },	/* 16-way set assoc, 64 byte line size */
 	{ 0x4c, LVL_3,     12288 },	/* 12-way set assoc, 64 byte line size */
 	{ 0x4d, LVL_3,     16384 },	/* 16-way set assoc, 64 byte line size */
+	{ 0x4e, LVL_2,      6144 },	/* 24-way set assoc, 64 byte line size */
 	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
-- 
cgit v1.2.3


From 873b274a41c0cfe58b2eb0a7722424eb367b905b Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Thu, 22 May 2008 13:02:23 -0400
Subject: x86: Add Centaur and Transmeta CPUs to PAT whitelist

Unconditionally enable PAT support on Centaur and Transmeta CPUs.
All known models that advertise PAT have no known errata.

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/addon_cpuid_features.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index c2e1ce33c7c..d8b3e4a9d66 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -62,6 +62,9 @@ void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
 		if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15))
 			return;
 		break;
+	case X86_VENDOR_CENTAUR:
+	case X86_VENDOR_TRANSMETA:
+		return;
 	}
 
 	pat_disable(cpu_has_pat ?
-- 
cgit v1.2.3


From bfe4bb1526945e446d2912bef2e1e2cbd2c7349e Mon Sep 17 00:00:00 2001
From: Miklos Vajna <vmiklos@frugalware.org>
Date: Sat, 17 May 2008 22:48:13 +0200
Subject: x86: janitor work in bugs.c

Just moved trailing statements to the next line, removed space before
open/close parenthesis, wrapped long lines.

Signed-off-by: Miklos Vajna <vmiklos@frugalware.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/bugs.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 170d2f5523b..1b1c56bb338 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -59,8 +59,12 @@ static void __init check_fpu(void)
 		return;
 	}
 
-/* trap_init() enabled FXSR and company _before_ testing for FP problems here. */
-	/* Test for the divl bug.. */
+	/*
+	 * trap_init() enabled FXSR and company _before_ testing for FP
+	 * problems here.
+	 *
+	 * Test for the divl bug..
+	 */
 	__asm__("fninit\n\t"
 		"fldl %1\n\t"
 		"fdivl %2\n\t"
@@ -108,10 +112,15 @@ static void __init check_popad(void)
 	  "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "
 	  : "=&a" (res)
 	  : "d" (inp)
-	  : "ecx", "edi" );
-	/* If this fails, it means that any user program may lock the CPU hard. Too bad. */
-	if (res != 12345678) printk( "Buggy.\n" );
-		        else printk( "OK.\n" );
+	  : "ecx", "edi");
+	/*
+	 * If this fails, it means that any user program may lock the
+	 * CPU hard. Too bad.
+	 */
+	if (res != 12345678)
+		printk("Buggy.\n");
+	else
+		printk("OK.\n");
 #endif
 }
 
@@ -137,7 +146,8 @@ static void __init check_config(void)
  * i486+ only features! (WP works in supervisor mode and the
  * new "invlpg" and "bswap" instructions)
  */
-#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP)
+#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || \
+	defined(CONFIG_X86_BSWAP)
 	if (boot_cpu_data.x86 == 3)
 		panic("Kernel requires i486+ for 'invlpg' and other features");
 #endif
@@ -170,6 +180,7 @@ void __init check_bugs(void)
 	check_fpu();
 	check_hlt();
 	check_popad();
-	init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
+	init_utsname()->machine[1] =
+		'0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
 	alternative_instructions();
 }
-- 
cgit v1.2.3


From 0da72a4aeb4482c64c1142a2e36b556d13374937 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Apr 2008 20:11:51 +0200
Subject: x86: fix sparse warning in mtrr/generic.c

arch/x86/kernel/cpu/mtrr/generic.c:216:12: warning: symbol 'lo' shadows an earlier one

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mtrr/generic.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 5d241ce94a4..0625d4158e5 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -213,12 +213,12 @@ void __init get_mtrr_state(void)
 	mtrr_state.enabled = (lo & 0xc00) >> 10;
 
 	if (amd_special_default_mtrr()) {
-		unsigned lo, hi;
+		unsigned low, high;
 		/* TOP_MEM2 */
-		rdmsr(MSR_K8_TOP_MEM2, lo, hi);
-		tom2 = hi;
+		rdmsr(MSR_K8_TOP_MEM2, low, high);
+		tom2 = high;
 		tom2 <<= 32;
-		tom2 |= lo;
+		tom2 |= low;
 		tom2 &= 0xffffff8000000ULL;
 	}
 	if (mtrr_show) {
-- 
cgit v1.2.3


From 95ffa2438d0e9c48779f0106b1c0eb36165e759c Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel.send@gmail.com>
Date: Tue, 29 Apr 2008 03:52:33 -0700
Subject: x86: mtrr cleanup for converting continuous to discrete layout, v8

some BIOS like to use continus MTRR layout, and X driver can not add
WB entries for graphical cards when 4g or more RAM installed.

the patch will change MTRR to discrete.

mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
default is 256m, could be set according to size of graphics card memory.

mtrr_gran_size= could be used to send smallest mtrr block to avoid run out of MTRRs

v2: fix -1 for UC checking
v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
    skip the var state change warning.
    remove next_basek in range_to_mtrr()
v4: correct warning mask.
v5: CONFIG_MTRR_SANITIZER
v6: fix 1g, 2g, 512 aligment with extra hole
v7: gran_sizek to prevent running out of MTRRs.
v8: fix hole_basek caculation caused when removing next_basek
    gran_sizek using when basek is 0.

need to apply
	[PATCH] x86: fix trimming e820 with MTRR holes.
right after this one.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/mtrr/generic.c |  32 ++-
 arch/x86/kernel/cpu/mtrr/main.c    | 467 ++++++++++++++++++++++++++++++++++++-
 arch/x86/kernel/cpu/mtrr/mtrr.h    |   3 +
 3 files changed, 488 insertions(+), 14 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 0625d4158e5..5aae648600b 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -37,7 +37,7 @@ static struct fixed_range_block fixed_range_blocks[] = {
 static unsigned long smp_changes_mask;
 static struct mtrr_state mtrr_state = {};
 static int mtrr_state_set;
-static u64 tom2;
+u64 mtrr_tom2;
 
 #undef MODULE_PARAM_PREFIX
 #define MODULE_PARAM_PREFIX "mtrr."
@@ -139,8 +139,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
 		}
 	}
 
-	if (tom2) {
-		if (start >= (1ULL<<32) && (end < tom2))
+	if (mtrr_tom2) {
+		if (start >= (1ULL<<32) && (end < mtrr_tom2))
 			return MTRR_TYPE_WRBACK;
 	}
 
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
 	rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
 }
 
+/*  fill the MSR pair relating to a var range  */
+void fill_mtrr_var_range(unsigned int index,
+		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
+{
+	struct mtrr_var_range *vr;
+
+	vr = mtrr_state.var_ranges;
+
+	vr[index].base_lo = base_lo;
+	vr[index].base_hi = base_hi;
+	vr[index].mask_lo = mask_lo;
+	vr[index].mask_hi = mask_hi;
+}
+
 static void
 get_fixed_ranges(mtrr_type * frs)
 {
@@ -216,10 +230,10 @@ void __init get_mtrr_state(void)
 		unsigned low, high;
 		/* TOP_MEM2 */
 		rdmsr(MSR_K8_TOP_MEM2, low, high);
-		tom2 = high;
-		tom2 <<= 32;
-		tom2 |= low;
-		tom2 &= 0xffffff8000000ULL;
+		mtrr_tom2 = high;
+		mtrr_tom2 <<= 32;
+		mtrr_tom2 |= low;
+		mtrr_tom2 &= 0xffffff8000000ULL;
 	}
 	if (mtrr_show) {
 		int high_width;
@@ -251,9 +265,9 @@ void __init get_mtrr_state(void)
 			else
 				printk(KERN_INFO "MTRR %u disabled\n", i);
 		}
-		if (tom2) {
+		if (mtrr_tom2) {
 			printk(KERN_INFO "TOM2: %016llx aka %lldM\n",
-					  tom2, tom2>>20);
+					  mtrr_tom2, mtrr_tom2>>20);
 		}
 	}
 	mtrr_state_set = 1;
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 6a1e278d932..8a6f68b45e3 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -37,6 +37,7 @@
 #include <linux/smp.h>
 #include <linux/cpu.h>
 #include <linux/mutex.h>
+#include <linux/sort.h>
 
 #include <asm/e820.h>
 #include <asm/mtrr.h>
@@ -609,6 +610,452 @@ static struct sysdev_driver mtrr_sysdev_driver = {
 	.resume		= mtrr_restore,
 };
 
+#ifdef CONFIG_MTRR_SANITIZER
+
+#ifdef CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT
+static int enable_mtrr_cleanup __initdata = 1;
+#else
+static int enable_mtrr_cleanup __initdata;
+#endif
+
+#else
+
+static int enable_mtrr_cleanup __initdata = -1;
+
+#endif
+
+static int __init disable_mtrr_cleanup_setup(char *str)
+{
+	if (enable_mtrr_cleanup != -1)
+		enable_mtrr_cleanup = 0;
+	return 0;
+}
+early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
+
+static int __init enable_mtrr_cleanup_setup(char *str)
+{
+	if (enable_mtrr_cleanup != -1)
+		enable_mtrr_cleanup = 1;
+	return 0;
+}
+early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
+
+#define RANGE_NUM 256
+
+struct res_range {
+	unsigned long start;
+	unsigned long end;
+};
+
+static int __init add_range(struct res_range *range, int nr_range, unsigned long start,
+			      unsigned long end, int merge)
+{
+	int i;
+
+	if (!merge)
+		goto addit;
+
+	/* try to merge it with old one */
+	for (i = 0; i < nr_range; i++) {
+		unsigned long final_start, final_end;
+		unsigned long common_start, common_end;
+
+		if (!range[i].end)
+			continue;
+
+		common_start = max(range[i].start, start);
+		common_end = min(range[i].end, end);
+		if (common_start > common_end + 1)
+			continue;
+
+		final_start = min(range[i].start, start);
+		final_end = max(range[i].end, end);
+
+		range[i].start = final_start;
+		range[i].end =  final_end;
+		return nr_range;
+	}
+
+addit:
+	/* need to add that */
+	if (nr_range >= RANGE_NUM)
+		return nr_range;
+
+	range[nr_range].start = start;
+	range[nr_range].end = end;
+
+	nr_range++;
+
+	return nr_range;
+
+}
+static void __init subtract_range(struct res_range *range, unsigned long start,
+				unsigned long end)
+{
+	int i;
+	int j;
+
+	for (j = 0; j < RANGE_NUM; j++) {
+		if (!range[j].end)
+			continue;
+
+		if (start <= range[j].start && end >= range[j].end) {
+			range[j].start = 0;
+			range[j].end = 0;
+			continue;
+		}
+
+		if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
+			range[j].start = end + 1;
+			continue;
+		}
+
+
+		if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
+			range[j].end = start - 1;
+			continue;
+		}
+
+		if (start > range[j].start && end < range[j].end) {
+			/* find the new spare */
+			for (i = 0; i < RANGE_NUM; i++) {
+				if (range[i].end == 0)
+					break;
+			}
+			if (i < RANGE_NUM) {
+				range[i].end = range[j].end;
+				range[i].start = end + 1;
+			} else {
+				printk(KERN_ERR "run of slot in ranges\n");
+			}
+			range[j].end = start - 1;
+			continue;
+		}
+	}
+}
+
+static int __init cmp_range(const void *x1, const void *x2)
+{
+	const struct res_range *r1 = x1;
+	const struct res_range *r2 = x2;
+	long start1, start2;
+
+	start1 = r1->start;
+	start2 = r2->start;
+
+	return start1 - start2;
+}
+
+struct var_mtrr_state {
+	unsigned long range_startk, range_sizek;
+	unsigned long chunk_sizek;
+	unsigned long gran_sizek;
+	unsigned int reg;
+	unsigned address_bits;
+};
+
+static void __init set_var_mtrr(
+	unsigned int reg, unsigned long basek, unsigned long sizek,
+	unsigned char type, unsigned address_bits)
+{
+	u32 base_lo, base_hi, mask_lo, mask_hi;
+	unsigned address_mask_high;
+
+	if (!sizek) {
+		fill_mtrr_var_range(reg, 0, 0, 0, 0);
+		return;
+	}
+
+	address_mask_high = ((1u << (address_bits - 32u)) - 1u);
+
+	base_hi = basek >> 22;
+	base_lo  = basek << 10;
+
+	if (sizek < 4*1024*1024) {
+		mask_hi = address_mask_high;
+		mask_lo = ~((sizek << 10) - 1);
+	} else {
+		mask_hi = address_mask_high & (~((sizek >> 22) - 1));
+		mask_lo = 0;
+	}
+
+	base_lo |= type;
+	mask_lo |= 0x800;
+	fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
+}
+
+static unsigned int __init range_to_mtrr(unsigned int reg,
+	unsigned long range_startk, unsigned long range_sizek,
+	unsigned char type, unsigned address_bits)
+{
+	if (!range_sizek || (reg >= num_var_ranges))
+		return reg;
+
+	while (range_sizek) {
+		unsigned long max_align, align;
+		unsigned long sizek;
+		/* Compute the maximum size I can make a range */
+		if (range_startk)
+			max_align = ffs(range_startk) - 1;
+		else
+			max_align = 32;
+		align = fls(range_sizek) - 1;
+		if (align > max_align)
+			align = max_align;
+
+		sizek = 1 << align;
+		printk(KERN_INFO "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n",
+			reg, range_startk >> 10, sizek >> 10,
+			(type == MTRR_TYPE_UNCACHABLE)?"UC":
+			    ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
+			);
+		set_var_mtrr(reg++, range_startk, sizek, type, address_bits);
+		range_startk += sizek;
+		range_sizek -= sizek;
+		if (reg >= num_var_ranges)
+			break;
+	}
+	return reg;
+}
+
+static void __init range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek)
+{
+	unsigned long hole_basek, hole_sizek;
+	unsigned long range0_basek, range0_sizek;
+	unsigned long range_basek, range_sizek;
+	unsigned long chunk_sizek;
+	unsigned long gran_sizek;
+
+	hole_basek = 0;
+	hole_sizek = 0;
+	chunk_sizek = state->chunk_sizek;
+	gran_sizek = state->gran_sizek;
+
+	/* align with gran size, prevent small block used up MTRRs */
+	range_basek = ALIGN(state->range_startk, gran_sizek);
+	if ((range_basek > basek) && basek)
+		return;
+	range_sizek = ALIGN(state->range_sizek - (range_basek - state->range_startk), gran_sizek);
+
+	while (range_basek + range_sizek > (state->range_startk + state->range_sizek)) {
+		range_sizek -= gran_sizek;
+		if (!range_sizek)
+			return;
+	}
+	state->range_startk = range_basek;
+	state->range_sizek = range_sizek;
+
+	/* try to append some small hole */
+	range0_basek = state->range_startk;
+	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
+	if ((range0_sizek == state->range_sizek) ||
+	    ((range0_basek + range0_sizek - chunk_sizek > basek) && basek)) {
+			printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + state->range_sizek)<<10);
+			state->reg = range_to_mtrr(state->reg, range0_basek,
+				state->range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+		return;
+	}
+
+
+	range0_sizek -= chunk_sizek;
+	printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10);
+	state->reg = range_to_mtrr(state->reg, range0_basek,
+			range0_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+
+	range_basek = range0_basek + range0_sizek;
+	range_sizek = chunk_sizek;
+	if (range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) {
+		hole_sizek = range_sizek - (state->range_sizek - range0_sizek);
+		hole_basek = range_basek + range_sizek - hole_sizek;
+	} else
+		range_sizek = state->range_sizek - range0_sizek;
+
+	printk(KERN_INFO "range: %016lx - %016lx\n", range_basek<<10, (range_basek + range_sizek)<<10);
+	state->reg = range_to_mtrr(state->reg, range_basek,
+			range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+	if (hole_sizek) {
+		printk(KERN_INFO "hole: %016lx - %016lx\n", hole_basek<<10, (hole_basek + hole_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, hole_basek,
+				hole_sizek, MTRR_TYPE_UNCACHABLE, state->address_bits);
+	}
+}
+
+static void __init set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, unsigned long size_pfn)
+{
+	unsigned long basek, sizek;
+
+	if (state->reg >= num_var_ranges)
+		return;
+
+	basek = base_pfn << (PAGE_SHIFT - 10);
+	sizek = size_pfn << (PAGE_SHIFT - 10);
+
+	/* See if I can merge with the last range */
+	if ((basek <= 1024) || (state->range_startk + state->range_sizek == basek)) {
+		unsigned long endk = basek + sizek;
+		state->range_sizek = endk - state->range_startk;
+		return;
+	}
+	/* Write the range mtrrs */
+	if (state->range_sizek != 0) {
+		range_to_mtrr_with_hole(state, basek);
+
+		state->range_startk = 0;
+		state->range_sizek = 0;
+	}
+	/* Allocate an msr */
+	state->range_startk = basek;
+	state->range_sizek  = sizek;
+}
+
+/* mininum size of mtrr block that can take hole */
+static u64 mtrr_chunk_size __initdata = (256ULL<<20);
+
+static int __init parse_mtrr_chunk_size_opt(char *p)
+{
+	if (!p)
+		return -EINVAL;
+	mtrr_chunk_size = memparse(p, &p);
+	return 0;
+}
+early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
+
+/* granity of mtrr of block */
+static u64 mtrr_gran_size __initdata = (64ULL<<20);
+
+static int __init parse_mtrr_gran_size_opt(char *p)
+{
+	if (!p)
+		return -EINVAL;
+	mtrr_gran_size = memparse(p, &p);
+	return 0;
+}
+early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
+
+static void __init x86_setup_var_mtrrs(struct res_range *range, int nr_range, unsigned address_bits)
+{
+	struct var_mtrr_state var_state;
+	int i;
+
+	var_state.range_startk = 0;
+	var_state.range_sizek = 0;
+	var_state.reg = 0;
+	var_state.address_bits = address_bits;
+	var_state.chunk_sizek = mtrr_chunk_size >> 10;
+	var_state.gran_sizek = mtrr_gran_size >> 10;
+
+	/* Write the range etc */
+	for (i = 0; i < nr_range; i++)
+		set_var_mtrr_range(&var_state, range[i].start, range[i].end - range[i].start + 1);
+
+	/* Write the last range */
+	range_to_mtrr_with_hole(&var_state, 0);
+	printk(KERN_INFO "DONE variable MTRRs\n");
+	/* Clear out the extra MTRR's */
+	while (var_state.reg < num_var_ranges)
+		set_var_mtrr(var_state.reg++, 0, 0, 0, var_state.address_bits);
+}
+
+static int __init x86_get_mtrr_mem_range(struct res_range *range, int nr_range, unsigned long extra_remove_base, unsigned long extra_remove_size)
+{
+	unsigned long i, base, size;
+	mtrr_type type;
+
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type != MTRR_TYPE_WRBACK)
+			continue;
+		nr_range = add_range(range, nr_range, base, base + size - 1, 1);
+	}
+	printk(KERN_INFO "After WB checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+
+	/* take out UC ranges */
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type != MTRR_TYPE_UNCACHABLE)
+			continue;
+		if (!size)
+			continue;
+		subtract_range(range, base, base + size - 1);
+	}
+	if (extra_remove_size)
+		subtract_range(range, extra_remove_base,  extra_remove_base + extra_remove_size  - 1);
+
+	/* get new range num */
+	nr_range = 0;
+	for (i = 0; i < RANGE_NUM; i++) {
+		if (!range[i].end)
+			continue;
+		nr_range++;
+	}
+	printk(KERN_INFO "After UC checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+
+	/* sort the ranges */
+	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+	printk(KERN_INFO "After sorting\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+
+	return nr_range;
+}
+
+static int __init mtrr_cleanup(unsigned address_bits)
+{
+	unsigned long i, base, size, def, dummy;
+	mtrr_type type;
+	struct res_range range[RANGE_NUM];
+	int nr_range;
+	unsigned long extra_remove_base, extra_remove_size;
+
+	/* extra one for all 0 */
+	int num[MTRR_NUM_TYPES + 1];
+
+	if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
+		return 0;
+	rdmsr(MTRRdefType_MSR, def, dummy);
+	def &= 0xff;
+	if (def != MTRR_TYPE_UNCACHABLE)
+		return 0;
+
+	/* check entries number */
+	memset(num, 0, sizeof(num));
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type >= MTRR_NUM_TYPES)
+			continue;
+		if (!size)
+			type = MTRR_NUM_TYPES;
+		num[type]++;
+	}
+
+	/* check if we got UC entries */
+	if (!num[MTRR_TYPE_UNCACHABLE])
+		return 0;
+
+	/* check if we only had WB and UC */
+	if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
+		num_var_ranges - num[MTRR_NUM_TYPES])
+		return 0;
+
+	memset(range, 0, sizeof(range));
+	extra_remove_size = 0;
+	if (mtrr_tom2) {
+		extra_remove_base = 1 << (32 - PAGE_SHIFT);
+		extra_remove_size = (mtrr_tom2>>PAGE_SHIFT) - extra_remove_base;
+	}
+	nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, extra_remove_size);
+
+	/* convert ranges to var ranges state */
+	x86_setup_var_mtrrs(range, nr_range, address_bits);
+
+	return 1;
+
+}
+
 static int disable_mtrr_trim;
 
 static int __init disable_mtrr_trim_setup(char *str)
@@ -729,18 +1176,21 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
  */
 void __init mtrr_bp_init(void)
 {
+	u32 phys_addr;
 	init_ifs();
 
+	phys_addr = 32;
+
 	if (cpu_has_mtrr) {
 		mtrr_if = &generic_mtrr_ops;
 		size_or_mask = 0xff000000;	/* 36 bits */
 		size_and_mask = 0x00f00000;
+		phys_addr = 36;
 
 		/* This is an AMD specific MSR, but we assume(hope?) that
 		   Intel will implement it to when they extend the address
 		   bus of the Xeon. */
 		if (cpuid_eax(0x80000000) >= 0x80000008) {
-			u32 phys_addr;
 			phys_addr = cpuid_eax(0x80000008) & 0xff;
 			/* CPUID workaround for Intel 0F33/0F34 CPU */
 			if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@ -758,6 +1208,7 @@ void __init mtrr_bp_init(void)
 			   don't support PAE */
 			size_or_mask = 0xfff00000;	/* 32 bits */
 			size_and_mask = 0;
+			phys_addr = 32;
 		}
 	} else {
 		switch (boot_cpu_data.x86_vendor) {
@@ -791,8 +1242,13 @@ void __init mtrr_bp_init(void)
 	if (mtrr_if) {
 		set_num_var_ranges();
 		init_table();
-		if (use_intel())
+		if (use_intel()) {
 			get_mtrr_state();
+
+			if (mtrr_cleanup(phys_addr))
+				mtrr_if->set_all();
+
+		}
 	}
 }
 
@@ -829,9 +1285,10 @@ static int __init mtrr_init_finialize(void)
 {
 	if (!mtrr_if)
 		return 0;
-	if (use_intel())
-		mtrr_state_warn();
-	else {
+	if (use_intel()) {
+		if (enable_mtrr_cleanup < 1)
+			mtrr_state_warn();
+	} else {
 		/* The CPUs haven't MTRR and seem to not support SMP. They have
 		 * specific drivers, we use a tricky method to support
 		 * suspend/resume for them.
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 2cc77eb6fea..2dc4ec656b2 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -81,6 +81,8 @@ void set_mtrr_done(struct set_mtrr_context *ctxt);
 void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
 void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
 
+void fill_mtrr_var_range(unsigned int index,
+		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
 void get_mtrr_state(void);
 
 extern void set_mtrr_ops(struct mtrr_ops * ops);
@@ -92,6 +94,7 @@ extern struct mtrr_ops * mtrr_if;
 #define use_intel()	(mtrr_if && mtrr_if->use_intel_if == 1)
 
 extern unsigned int num_var_ranges;
+extern u64 mtrr_tom2;
 
 void mtrr_state_warn(void);
 const char *mtrr_attrib_to_str(int x);
-- 
cgit v1.2.3


From 42651f15824d003e8357693ab72c4dbb3e280836 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel.send@gmail.com>
Date: Tue, 29 Apr 2008 01:59:49 -0700
Subject: x86: fix trimming e820 with MTRR holes.

converting MTRR layout from continous to discrete, some time could run out of
MTRRs. So add gran_sizek to prevent that by dumpping small RAM piece less than
gran_sizek.

previous trimming only can handle highest_pfn from mtrr to end_pfn from e820.
when have more than 4g RAM installed, there will be holes below 4g. so need to
check ram below 4g is coverred well.

need to be applied after
	[PATCH] x86: mtrr cleanup for converting continuous to discrete layout v7

Signed-off-by: Yinghai Lu <yinghai.lu@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/mtrr/main.c | 101 +++++++++++++++++++++++++++++++++-------
 1 file changed, 84 insertions(+), 17 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 8a6f68b45e3..9ab5c16b0d5 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -1095,6 +1095,17 @@ int __init amd_special_default_mtrr(void)
 	return 0;
 }
 
+static u64 __init real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn)
+{
+	u64 trim_start, trim_size;
+	trim_start =  start_pfn;
+	trim_start <<= PAGE_SHIFT;
+	trim_size = limit_pfn;
+	trim_size <<= PAGE_SHIFT;
+	trim_size -= trim_start;
+	return update_memory_range(trim_start, trim_size, E820_RAM,
+				E820_RESERVED);
+}
 /**
  * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
  * @end_pfn: ending page frame number
@@ -1110,8 +1121,13 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 {
 	unsigned long i, base, size, highest_pfn = 0, def, dummy;
 	mtrr_type type;
-	u64 trim_start, trim_size;
+	struct res_range range[RANGE_NUM];
+	int nr_range;
+	u64 total_real_trim_size;
+	int changed;
 
+	/* extra one for all 0 */
+	int num[MTRR_NUM_TYPES + 1];
 	/*
 	 * Make sure we only trim uncachable memory on machines that
 	 * support the Intel MTRR architecture:
@@ -1123,9 +1139,6 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 	if (def != MTRR_TYPE_UNCACHABLE)
 		return 0;
 
-	if (amd_special_default_mtrr())
-		return 0;
-
 	/* Find highest cached pfn */
 	for (i = 0; i < num_var_ranges; i++) {
 		mtrr_if->get(i, &base, &size, &type);
@@ -1145,26 +1158,80 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 		return 0;
 	}
 
-	if (highest_pfn < end_pfn) {
+	/* check entries number */
+	memset(num, 0, sizeof(num));
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type >= MTRR_NUM_TYPES)
+			continue;
+		if (!size)
+			type = MTRR_NUM_TYPES;
+		num[type]++;
+	}
+
+	/* no entry for WB? */
+	if (!num[MTRR_TYPE_WRBACK])
+		return 0;
+
+	/* check if we only had WB and UC */
+	if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
+		num_var_ranges - num[MTRR_NUM_TYPES])
+		return 0;
+
+	memset(range, 0, sizeof(range));
+	nr_range = 0;
+	if (mtrr_tom2) {
+		range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
+		range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
+		if (highest_pfn < range[nr_range].end + 1)
+			highest_pfn = range[nr_range].end + 1;
+		nr_range++;
+	}
+	nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
+
+	changed = 0;
+	total_real_trim_size = 0;
+
+	/* check the top at first */
+	i = nr_range - 1;
+	if (range[i].end + 1 < end_pfn) {
+			total_real_trim_size += real_trim_memory(range[i].end + 1, end_pfn);
+	}
+
+	if (total_real_trim_size) {
 		printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
-			" all of memory, losing %luMB of RAM.\n",
-			(end_pfn - highest_pfn) >> (20 - PAGE_SHIFT));
+			" all of memory, losing %lluMB of RAM.\n",
+			total_real_trim_size >> 20);
 
 		WARN_ON(1);
 
-		printk(KERN_INFO "update e820 for mtrr\n");
-		trim_start = highest_pfn;
-		trim_start <<= PAGE_SHIFT;
-		trim_size = end_pfn;
-		trim_size <<= PAGE_SHIFT;
-		trim_size -= trim_start;
-		update_memory_range(trim_start, trim_size, E820_RAM,
-					E820_RESERVED);
+		printk(KERN_INFO "update e820 for mtrr -- end_pfn\n");
 		update_e820();
-		return 1;
+		changed = 1;
 	}
 
-	return 0;
+	total_real_trim_size = 0;
+	if (range[0].start)
+		total_real_trim_size += real_trim_memory(0, range[0].start);
+
+	for (i = 0; i < nr_range - 1; i--) {
+		if (range[i].end + 1 < range[i+1].start)
+			total_real_trim_size += real_trim_memory(range[i].end + 1, range[i+1].start);
+	}
+
+	if (total_real_trim_size) {
+		printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
+			" all of memory, losing %lluMB of RAM.\n",
+			total_real_trim_size >> 20);
+
+		WARN_ON(1);
+
+		printk(KERN_INFO "update e820 for mtrr -- holes\n");
+		update_e820();
+		changed = 1;
+	}
+
+	return changed;
 }
 
 /**
-- 
cgit v1.2.3


From 8a374026c265476b1acfc3c186a66d59ebdb2cda Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel.send@gmail.com>
Date: Tue, 29 Apr 2008 20:25:16 -0700
Subject: x86: fix trimming e820 with MTRR holes. - fix

v2: process hole then end_pfn
    fix update_memory_range with whole cover comparing

Signed-off-by: Yinghai Lu <yinghai.lu@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/mtrr/main.c | 44 ++++++++++++++---------------------------
 1 file changed, 15 insertions(+), 29 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 9ab5c16b0d5..5a2a4c14633 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -1098,11 +1098,12 @@ int __init amd_special_default_mtrr(void)
 static u64 __init real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn)
 {
 	u64 trim_start, trim_size;
-	trim_start =  start_pfn;
+	trim_start = start_pfn;
 	trim_start <<= PAGE_SHIFT;
 	trim_size = limit_pfn;
 	trim_size <<= PAGE_SHIFT;
 	trim_size -= trim_start;
+
 	return update_memory_range(trim_start, trim_size, E820_RAM,
 				E820_RESERVED);
 }
@@ -1124,7 +1125,6 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 	struct res_range range[RANGE_NUM];
 	int nr_range;
 	u64 total_real_trim_size;
-	int changed;
 
 	/* extra one for all 0 */
 	int num[MTRR_NUM_TYPES + 1];
@@ -1189,49 +1189,35 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 	}
 	nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
 
-	changed = 0;
-	total_real_trim_size = 0;
-
-	/* check the top at first */
-	i = nr_range - 1;
-	if (range[i].end + 1 < end_pfn) {
-			total_real_trim_size += real_trim_memory(range[i].end + 1, end_pfn);
-	}
-
-	if (total_real_trim_size) {
-		printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
-			" all of memory, losing %lluMB of RAM.\n",
-			total_real_trim_size >> 20);
-
-		WARN_ON(1);
-
-		printk(KERN_INFO "update e820 for mtrr -- end_pfn\n");
-		update_e820();
-		changed = 1;
-	}
-
 	total_real_trim_size = 0;
+	/* check the head */
 	if (range[0].start)
 		total_real_trim_size += real_trim_memory(0, range[0].start);
-
-	for (i = 0; i < nr_range - 1; i--) {
+	/* check the holes */
+	for (i = 0; i < nr_range - 1; i++) {
 		if (range[i].end + 1 < range[i+1].start)
 			total_real_trim_size += real_trim_memory(range[i].end + 1, range[i+1].start);
 	}
+	/* check the top */
+	i = nr_range - 1;
+	if (range[i].end + 1 < end_pfn)
+		total_real_trim_size += real_trim_memory(range[i].end + 1, end_pfn);
 
 	if (total_real_trim_size) {
 		printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
 			" all of memory, losing %lluMB of RAM.\n",
 			total_real_trim_size >> 20);
 
-		WARN_ON(1);
+		if (enable_mtrr_cleanup < 1)
+			WARN_ON(1);
 
-		printk(KERN_INFO "update e820 for mtrr -- holes\n");
+		printk(KERN_INFO "update e820 for mtrr\n");
 		update_e820();
-		changed = 1;
+
+		return 1;
 	}
 
-	return changed;
+	return 0;
 }
 
 /**
-- 
cgit v1.2.3


From f5098d62c1d1cede8ff23d01bbf50a421f110562 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel.send@gmail.com>
Date: Tue, 29 Apr 2008 20:25:58 -0700
Subject: x86: mtrr cleanup for converting continuous to discrete layout v8 -
 fix

v9: address format change requests by Ingo
    more case handling in range_to_var_with_hole

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/mtrr/main.c | 164 ++++++++++++++++++++++------------------
 1 file changed, 90 insertions(+), 74 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 5a2a4c14633..79597d3c343 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -611,17 +611,9 @@ static struct sysdev_driver mtrr_sysdev_driver = {
 };
 
 #ifdef CONFIG_MTRR_SANITIZER
-
-#ifdef CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT
-static int enable_mtrr_cleanup __initdata = 1;
-#else
-static int enable_mtrr_cleanup __initdata;
-#endif
-
+static int enable_mtrr_cleanup __initdata = CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
 #else
-
 static int enable_mtrr_cleanup __initdata = -1;
-
 #endif
 
 static int __init disable_mtrr_cleanup_setup(char *str)
@@ -640,6 +632,7 @@ static int __init enable_mtrr_cleanup_setup(char *str)
 }
 early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
 
+/* should be related to MTRR_VAR_RANGES nums */
 #define RANGE_NUM 256
 
 struct res_range {
@@ -647,13 +640,27 @@ struct res_range {
 	unsigned long end;
 };
 
-static int __init add_range(struct res_range *range, int nr_range, unsigned long start,
-			      unsigned long end, int merge)
+static int __init
+add_range(struct res_range *range, int nr_range, unsigned long start,
+			      unsigned long end)
 {
-	int i;
+	/* out of slots */
+	if (nr_range >= RANGE_NUM)
+		return nr_range;
 
-	if (!merge)
-		goto addit;
+	range[nr_range].start = start;
+	range[nr_range].end = end;
+
+	nr_range++;
+
+	return nr_range;
+}
+
+static int __init
+add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
+			      unsigned long end)
+{
+	int i;
 
 	/* try to merge it with old one */
 	for (i = 0; i < nr_range; i++) {
@@ -676,24 +683,14 @@ static int __init add_range(struct res_range *range, int nr_range, unsigned long
 		return nr_range;
 	}
 
-addit:
 	/* need to add that */
-	if (nr_range >= RANGE_NUM)
-		return nr_range;
-
-	range[nr_range].start = start;
-	range[nr_range].end = end;
-
-	nr_range++;
-
-	return nr_range;
-
+	return add_range(range, nr_range, start, end);
 }
-static void __init subtract_range(struct res_range *range, unsigned long start,
-				unsigned long end)
+
+static void __init
+subtract_range(struct res_range *range, unsigned long start, unsigned long end)
 {
-	int i;
-	int j;
+	int i, j;
 
 	for (j = 0; j < RANGE_NUM; j++) {
 		if (!range[j].end)
@@ -747,46 +744,47 @@ static int __init cmp_range(const void *x1, const void *x2)
 }
 
 struct var_mtrr_state {
-	unsigned long range_startk, range_sizek;
-	unsigned long chunk_sizek;
-	unsigned long gran_sizek;
-	unsigned int reg;
-	unsigned address_bits;
+	unsigned long	range_startk;
+	unsigned long	range_sizek;
+	unsigned long	chunk_sizek;
+	unsigned long	gran_sizek;
+	unsigned int	reg;
+	unsigned int	address_bits;
 };
 
-static void __init set_var_mtrr(
-	unsigned int reg, unsigned long basek, unsigned long sizek,
-	unsigned char type, unsigned address_bits)
+static void __init
+set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
+		unsigned char type, unsigned address_bits)
 {
 	u32 base_lo, base_hi, mask_lo, mask_hi;
-	unsigned address_mask_high;
+	u64 base, mask;
 
 	if (!sizek) {
 		fill_mtrr_var_range(reg, 0, 0, 0, 0);
 		return;
 	}
 
-	address_mask_high = ((1u << (address_bits - 32u)) - 1u);
+	mask = (1ULL << address_bits) - 1;
+	mask &= ~((((u64)sizek) << 10) - 1);
 
-	base_hi = basek >> 22;
-	base_lo  = basek << 10;
+	base  = ((u64)basek) << 10;
 
-	if (sizek < 4*1024*1024) {
-		mask_hi = address_mask_high;
-		mask_lo = ~((sizek << 10) - 1);
-	} else {
-		mask_hi = address_mask_high & (~((sizek >> 22) - 1));
-		mask_lo = 0;
-	}
+	base |= type;
+	mask |= 0x800;
+
+	base_lo = base & ((1ULL<<32) - 1);
+	base_hi = base >> 32;
+
+	mask_lo = mask & ((1ULL<<32) - 1);
+	mask_hi = mask >> 32;
 
-	base_lo |= type;
-	mask_lo |= 0x800;
 	fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
 }
 
-static unsigned int __init range_to_mtrr(unsigned int reg,
-	unsigned long range_startk, unsigned long range_sizek,
-	unsigned char type, unsigned address_bits)
+static unsigned int __init
+range_to_mtrr(unsigned int reg, unsigned long range_startk,
+	      unsigned long range_sizek, unsigned char type,
+	      unsigned address_bits)
 {
 	if (!range_sizek || (reg >= num_var_ranges))
 		return reg;
@@ -794,6 +792,7 @@ static unsigned int __init range_to_mtrr(unsigned int reg,
 	while (range_sizek) {
 		unsigned long max_align, align;
 		unsigned long sizek;
+
 		/* Compute the maximum size I can make a range */
 		if (range_startk)
 			max_align = ffs(range_startk) - 1;
@@ -818,7 +817,8 @@ static unsigned int __init range_to_mtrr(unsigned int reg,
 	return reg;
 }
 
-static void __init range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek)
+static void __init
+range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek)
 {
 	unsigned long hole_basek, hole_sizek;
 	unsigned long range0_basek, range0_sizek;
@@ -848,23 +848,31 @@ static void __init range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigne
 	/* try to append some small hole */
 	range0_basek = state->range_startk;
 	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
-	if ((range0_sizek == state->range_sizek) ||
-	    ((range0_basek + range0_sizek - chunk_sizek > basek) && basek)) {
+	if (range0_sizek == state->range_sizek) {
 			printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + state->range_sizek)<<10);
 			state->reg = range_to_mtrr(state->reg, range0_basek,
 				state->range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
 		return;
+	} else if (basek) {
+	    while (range0_basek + range0_sizek - chunk_sizek > basek) {
+		range0_sizek -= chunk_sizek;
+		if (!range0_sizek)
+			break;
+	    }
 	}
 
 
-	range0_sizek -= chunk_sizek;
+	if (range0_sizek > chunk_sizek)
+		range0_sizek -= chunk_sizek;
 	printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10);
 	state->reg = range_to_mtrr(state->reg, range0_basek,
 			range0_sizek, MTRR_TYPE_WRBACK, state->address_bits);
 
 	range_basek = range0_basek + range0_sizek;
 	range_sizek = chunk_sizek;
-	if (range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) {
+
+	if ((range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) &&
+	    (range_basek + range_sizek <= basek)) {
 		hole_sizek = range_sizek - (state->range_sizek - range0_sizek);
 		hole_basek = range_basek + range_sizek - hole_sizek;
 	} else
@@ -880,7 +888,9 @@ static void __init range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigne
 	}
 }
 
-static void __init set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, unsigned long size_pfn)
+static void __init
+set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
+		   unsigned long size_pfn)
 {
 	unsigned long basek, sizek;
 
@@ -921,7 +931,7 @@ static int __init parse_mtrr_chunk_size_opt(char *p)
 early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
 
 /* granity of mtrr of block */
-static u64 mtrr_gran_size __initdata = (64ULL<<20);
+static u64 mtrr_gran_size __initdata = (1ULL<<20);
 
 static int __init parse_mtrr_gran_size_opt(char *p)
 {
@@ -932,17 +942,19 @@ static int __init parse_mtrr_gran_size_opt(char *p)
 }
 early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
 
-static void __init x86_setup_var_mtrrs(struct res_range *range, int nr_range, unsigned address_bits)
+static void __init
+x86_setup_var_mtrrs(struct res_range *range, int nr_range,
+		    unsigned address_bits)
 {
 	struct var_mtrr_state var_state;
 	int i;
 
-	var_state.range_startk = 0;
-	var_state.range_sizek = 0;
-	var_state.reg = 0;
-	var_state.address_bits = address_bits;
-	var_state.chunk_sizek = mtrr_chunk_size >> 10;
-	var_state.gran_sizek = mtrr_gran_size >> 10;
+	var_state.range_startk	= 0;
+	var_state.range_sizek	= 0;
+	var_state.reg		= 0;
+	var_state.address_bits	= address_bits;
+	var_state.chunk_sizek	= mtrr_chunk_size >> 10;
+	var_state.gran_sizek	= mtrr_gran_size >> 10;
 
 	/* Write the range etc */
 	for (i = 0; i < nr_range; i++)
@@ -952,11 +964,16 @@ static void __init x86_setup_var_mtrrs(struct res_range *range, int nr_range, un
 	range_to_mtrr_with_hole(&var_state, 0);
 	printk(KERN_INFO "DONE variable MTRRs\n");
 	/* Clear out the extra MTRR's */
-	while (var_state.reg < num_var_ranges)
-		set_var_mtrr(var_state.reg++, 0, 0, 0, var_state.address_bits);
+	while (var_state.reg < num_var_ranges) {
+		set_var_mtrr(var_state.reg, 0, 0, 0, var_state.address_bits);
+		var_state.reg++;
+	}
 }
 
-static int __init x86_get_mtrr_mem_range(struct res_range *range, int nr_range, unsigned long extra_remove_base, unsigned long extra_remove_size)
+static int __init
+x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
+		       unsigned long extra_remove_base,
+		       unsigned long extra_remove_size)
 {
 	unsigned long i, base, size;
 	mtrr_type type;
@@ -965,7 +982,7 @@ static int __init x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
 		mtrr_if->get(i, &base, &size, &type);
 		if (type != MTRR_TYPE_WRBACK)
 			continue;
-		nr_range = add_range(range, nr_range, base, base + size - 1, 1);
+		nr_range = add_range_with_merge(range, nr_range, base, base + size - 1);
 	}
 	printk(KERN_INFO "After WB checking\n");
 	for (i = 0; i < nr_range; i++)
@@ -1005,11 +1022,11 @@ static int __init x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
 
 static int __init mtrr_cleanup(unsigned address_bits)
 {
+	unsigned long extra_remove_base, extra_remove_size;
 	unsigned long i, base, size, def, dummy;
-	mtrr_type type;
 	struct res_range range[RANGE_NUM];
+	mtrr_type type;
 	int nr_range;
-	unsigned long extra_remove_base, extra_remove_size;
 
 	/* extra one for all 0 */
 	int num[MTRR_NUM_TYPES + 1];
@@ -1053,7 +1070,6 @@ static int __init mtrr_cleanup(unsigned address_bits)
 	x86_setup_var_mtrrs(range, nr_range, address_bits);
 
 	return 1;
-
 }
 
 static int disable_mtrr_trim;
-- 
cgit v1.2.3


From 12031a624af7816ec7660b82be648aa3703b4ebe Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel.send@gmail.com>
Date: Fri, 2 May 2008 02:40:22 -0700
Subject: x86: mtrr cleanup for converting continuous to discrete - auto detect
 v4

Loop through mtrr chunk_size and gran_size from 1M to 2G to find out
the optimal value so user does not need to add mtrr_chunk_size and
mtrr_gran_size to the kernel command line.

If optimal value is not found, print out all list to help select less
optimal value.

Add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card
need more entries.

v2: find the one with more spare entries
v3: fix hole_basek offset
v4: tight the compare between range and range_new
    loop stop with 4g

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Gabriel C <nix.or.die@googlemail.com>
Cc: Mika Fischer <mika.fischer@zoopnet.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mtrr/main.c | 610 ++++++++++++++++++++++++++++++----------
 1 file changed, 464 insertions(+), 146 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 79597d3c343..e6c162c379a 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -610,28 +610,6 @@ static struct sysdev_driver mtrr_sysdev_driver = {
 	.resume		= mtrr_restore,
 };
 
-#ifdef CONFIG_MTRR_SANITIZER
-static int enable_mtrr_cleanup __initdata = CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
-#else
-static int enable_mtrr_cleanup __initdata = -1;
-#endif
-
-static int __init disable_mtrr_cleanup_setup(char *str)
-{
-	if (enable_mtrr_cleanup != -1)
-		enable_mtrr_cleanup = 0;
-	return 0;
-}
-early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
-
-static int __init enable_mtrr_cleanup_setup(char *str)
-{
-	if (enable_mtrr_cleanup != -1)
-		enable_mtrr_cleanup = 1;
-	return 0;
-}
-early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
-
 /* should be related to MTRR_VAR_RANGES nums */
 #define RANGE_NUM 256
 
@@ -702,13 +680,15 @@ subtract_range(struct res_range *range, unsigned long start, unsigned long end)
 			continue;
 		}
 
-		if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
+		if (start <= range[j].start && end < range[j].end &&
+		    range[j].start < end + 1) {
 			range[j].start = end + 1;
 			continue;
 		}
 
 
-		if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
+		if (start > range[j].start && end >= range[j].end &&
+		    range[j].end > start - 1) {
 			range[j].end = start - 1;
 			continue;
 		}
@@ -743,18 +723,123 @@ static int __init cmp_range(const void *x1, const void *x2)
 	return start1 - start2;
 }
 
+struct var_mtrr_range_state {
+	unsigned long base_pfn;
+	unsigned long size_pfn;
+	mtrr_type type;
+};
+
+struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
+
+static int __init
+x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
+		       unsigned long extra_remove_base,
+		       unsigned long extra_remove_size)
+{
+	unsigned long i, base, size;
+	mtrr_type type;
+
+	for (i = 0; i < num_var_ranges; i++) {
+		type = range_state[i].type;
+		if (type != MTRR_TYPE_WRBACK)
+			continue;
+		base = range_state[i].base_pfn;
+		size = range_state[i].size_pfn;
+		nr_range = add_range_with_merge(range, nr_range, base,
+						base + size - 1);
+	}
+	printk(KERN_DEBUG "After WB checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+				 range[i].start, range[i].end + 1);
+
+	/* take out UC ranges */
+	for (i = 0; i < num_var_ranges; i++) {
+		type = range_state[i].type;
+		if (type != MTRR_TYPE_UNCACHABLE)
+			continue;
+		size = range_state[i].size_pfn;
+		if (!size)
+			continue;
+		base = range_state[i].base_pfn;
+		subtract_range(range, base, base + size - 1);
+	}
+	if (extra_remove_size)
+		subtract_range(range, extra_remove_base,
+				 extra_remove_base + extra_remove_size  - 1);
+
+	/* get new range num */
+	nr_range = 0;
+	for (i = 0; i < RANGE_NUM; i++) {
+		if (!range[i].end)
+			continue;
+		nr_range++;
+	}
+	printk(KERN_DEBUG "After UC checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+			 range[i].start, range[i].end + 1);
+
+	/* sort the ranges */
+	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+	printk(KERN_DEBUG "After sorting\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+				 range[i].start, range[i].end + 1);
+
+	/* clear those is not used */
+	for (i = nr_range; i < RANGE_NUM; i++)
+		memset(&range[i], 0, sizeof(range[i]));
+
+	return nr_range;
+}
+
+static struct res_range __initdata range[RANGE_NUM];
+
+#ifdef CONFIG_MTRR_SANITIZER
+
+static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
+{
+	unsigned long sum;
+	int i;
+
+	sum = 0;
+	for (i = 0; i < nr_range; i++)
+		sum += range[i].end + 1 - range[i].start;
+
+	return sum;
+}
+
+static int enable_mtrr_cleanup __initdata =
+	CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
+
+static int __init disable_mtrr_cleanup_setup(char *str)
+{
+	if (enable_mtrr_cleanup != -1)
+		enable_mtrr_cleanup = 0;
+	return 0;
+}
+early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
+
+static int __init enable_mtrr_cleanup_setup(char *str)
+{
+	if (enable_mtrr_cleanup != -1)
+		enable_mtrr_cleanup = 1;
+	return 0;
+}
+early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
+
 struct var_mtrr_state {
 	unsigned long	range_startk;
 	unsigned long	range_sizek;
 	unsigned long	chunk_sizek;
 	unsigned long	gran_sizek;
 	unsigned int	reg;
-	unsigned int	address_bits;
 };
 
 static void __init
 set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
-		unsigned char type, unsigned address_bits)
+		unsigned char type, unsigned int address_bits)
 {
 	u32 base_lo, base_hi, mask_lo, mask_hi;
 	u64 base, mask;
@@ -781,10 +866,34 @@ set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
 	fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
 }
 
+static void __init
+save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
+		unsigned char type)
+{
+	range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
+	range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
+	range_state[reg].type = type;
+}
+
+static void __init
+set_var_mtrr_all(unsigned int address_bits)
+{
+	unsigned long basek, sizek;
+	unsigned char type;
+	unsigned int reg;
+
+	for (reg = 0; reg < num_var_ranges; reg++) {
+		basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
+		sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
+		type = range_state[reg].type;
+
+		set_var_mtrr(reg, basek, sizek, type, address_bits);
+	}
+}
+
 static unsigned int __init
 range_to_mtrr(unsigned int reg, unsigned long range_startk,
-	      unsigned long range_sizek, unsigned char type,
-	      unsigned address_bits)
+	      unsigned long range_sizek, unsigned char type)
 {
 	if (!range_sizek || (reg >= num_var_ranges))
 		return reg;
@@ -803,12 +912,13 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
 			align = max_align;
 
 		sizek = 1 << align;
-		printk(KERN_INFO "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n",
+		printk(KERN_DEBUG "Setting variable MTRR %d, base: %ldMB, "
+		       "range: %ldMB, type %s\n",
 			reg, range_startk >> 10, sizek >> 10,
 			(type == MTRR_TYPE_UNCACHABLE)?"UC":
 			    ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
 			);
-		set_var_mtrr(reg++, range_startk, sizek, type, address_bits);
+		save_var_mtrr(reg++, range_startk, sizek, type);
 		range_startk += sizek;
 		range_sizek -= sizek;
 		if (reg >= num_var_ranges)
@@ -817,10 +927,12 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
 	return reg;
 }
 
-static void __init
-range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek)
+static unsigned __init
+range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
+			unsigned long sizek)
 {
 	unsigned long hole_basek, hole_sizek;
+	unsigned long second_basek, second_sizek;
 	unsigned long range0_basek, range0_sizek;
 	unsigned long range_basek, range_sizek;
 	unsigned long chunk_sizek;
@@ -828,64 +940,95 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek)
 
 	hole_basek = 0;
 	hole_sizek = 0;
+	second_basek = 0;
+	second_sizek = 0;
 	chunk_sizek = state->chunk_sizek;
 	gran_sizek = state->gran_sizek;
 
 	/* align with gran size, prevent small block used up MTRRs */
 	range_basek = ALIGN(state->range_startk, gran_sizek);
 	if ((range_basek > basek) && basek)
-		return;
-	range_sizek = ALIGN(state->range_sizek - (range_basek - state->range_startk), gran_sizek);
+		return second_sizek;
+	state->range_sizek -= (range_basek - state->range_startk);
+	range_sizek = ALIGN(state->range_sizek, gran_sizek);
 
-	while (range_basek + range_sizek > (state->range_startk + state->range_sizek)) {
+	while (range_sizek > state->range_sizek) {
 		range_sizek -= gran_sizek;
 		if (!range_sizek)
-			return;
+			return 0;
 	}
-	state->range_startk = range_basek;
 	state->range_sizek = range_sizek;
 
 	/* try to append some small hole */
 	range0_basek = state->range_startk;
 	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
 	if (range0_sizek == state->range_sizek) {
-			printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + state->range_sizek)<<10);
-			state->reg = range_to_mtrr(state->reg, range0_basek,
-				state->range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
-		return;
-	} else if (basek) {
-	    while (range0_basek + range0_sizek - chunk_sizek > basek) {
+		printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", range0_basek<<10,
+				(range0_basek + state->range_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, range0_basek,
+				state->range_sizek, MTRR_TYPE_WRBACK);
+		return 0;
+	}
+
+	range0_sizek -= chunk_sizek;
+	if (range0_sizek && sizek) {
+	    while (range0_basek + range0_sizek > (basek + sizek)) {
 		range0_sizek -= chunk_sizek;
 		if (!range0_sizek)
 			break;
 	    }
 	}
 
+	if (range0_sizek) {
+		printk(KERN_DEBUG "range0: %016lx - %016lx\n", range0_basek<<10,
+				(range0_basek + range0_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, range0_basek,
+				range0_sizek, MTRR_TYPE_WRBACK);
 
-	if (range0_sizek > chunk_sizek)
-		range0_sizek -= chunk_sizek;
-	printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10);
-	state->reg = range_to_mtrr(state->reg, range0_basek,
-			range0_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+	}
 
 	range_basek = range0_basek + range0_sizek;
 	range_sizek = chunk_sizek;
 
-	if ((range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) &&
-	    (range_basek + range_sizek <= basek)) {
-		hole_sizek = range_sizek - (state->range_sizek - range0_sizek);
-		hole_basek = range_basek + range_sizek - hole_sizek;
-	} else
+	if (range_basek + range_sizek > basek &&
+	    range_basek + range_sizek <= (basek + sizek)) {
+		/* one hole */
+		second_basek = basek;
+		second_sizek = range_basek + range_sizek - basek;
+	}
+
+	/* if last piece, only could one hole near end */
+	if ((second_basek || !basek) &&
+	    range_sizek - (state->range_sizek - range0_sizek) - second_sizek <
+	    (chunk_sizek >> 1)) {
+		/*
+		 * one hole in middle (second_sizek is 0) or at end
+		 * (second_sizek is 0 )
+		 */
+		hole_sizek = range_sizek - (state->range_sizek - range0_sizek)
+				 - second_sizek;
+		hole_basek = range_basek + range_sizek - hole_sizek
+				 - second_sizek;
+	} else {
+		/* fallback for big hole, or several holes */
 		range_sizek = state->range_sizek - range0_sizek;
+		second_basek = 0;
+		second_sizek = 0;
+	}
 
-	printk(KERN_INFO "range: %016lx - %016lx\n", range_basek<<10, (range_basek + range_sizek)<<10);
-	state->reg = range_to_mtrr(state->reg, range_basek,
-			range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+	printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10,
+			 (range_basek + range_sizek)<<10);
+	state->reg = range_to_mtrr(state->reg, range_basek, range_sizek,
+					 MTRR_TYPE_WRBACK);
 	if (hole_sizek) {
-		printk(KERN_INFO "hole: %016lx - %016lx\n", hole_basek<<10, (hole_basek + hole_sizek)<<10);
-		state->reg = range_to_mtrr(state->reg, hole_basek,
-				hole_sizek, MTRR_TYPE_UNCACHABLE, state->address_bits);
+		printk(KERN_DEBUG "hole: %016lx - %016lx\n", hole_basek<<10,
+				 (hole_basek + hole_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek,
+						 MTRR_TYPE_UNCACHABLE);
+
 	}
+
+	return second_sizek;
 }
 
 static void __init
@@ -893,6 +1036,7 @@ set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
 		   unsigned long size_pfn)
 {
 	unsigned long basek, sizek;
+	unsigned long second_sizek = 0;
 
 	if (state->reg >= num_var_ranges)
 		return;
@@ -901,21 +1045,19 @@ set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
 	sizek = size_pfn << (PAGE_SHIFT - 10);
 
 	/* See if I can merge with the last range */
-	if ((basek <= 1024) || (state->range_startk + state->range_sizek == basek)) {
+	if ((basek <= 1024) ||
+	    (state->range_startk + state->range_sizek == basek)) {
 		unsigned long endk = basek + sizek;
 		state->range_sizek = endk - state->range_startk;
 		return;
 	}
 	/* Write the range mtrrs */
-	if (state->range_sizek != 0) {
-		range_to_mtrr_with_hole(state, basek);
+	if (state->range_sizek != 0)
+		second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
 
-		state->range_startk = 0;
-		state->range_sizek = 0;
-	}
 	/* Allocate an msr */
-	state->range_startk = basek;
-	state->range_sizek  = sizek;
+	state->range_startk = basek + second_sizek;
+	state->range_sizek  = sizek - second_sizek;
 }
 
 /* mininum size of mtrr block that can take hole */
@@ -931,7 +1073,7 @@ static int __init parse_mtrr_chunk_size_opt(char *p)
 early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
 
 /* granity of mtrr of block */
-static u64 mtrr_gran_size __initdata = (1ULL<<20);
+static u64 mtrr_gran_size __initdata;
 
 static int __init parse_mtrr_gran_size_opt(char *p)
 {
@@ -942,91 +1084,84 @@ static int __init parse_mtrr_gran_size_opt(char *p)
 }
 early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
 
-static void __init
+static int nr_mtrr_spare_reg __initdata =
+				 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
+
+static int __init parse_mtrr_spare_reg(char *arg)
+{
+	if (arg)
+		nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
+	return 0;
+}
+
+early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
+
+static int __init
 x86_setup_var_mtrrs(struct res_range *range, int nr_range,
-		    unsigned address_bits)
+		    u64 chunk_size, u64 gran_size)
 {
 	struct var_mtrr_state var_state;
 	int i;
+	int num_reg;
 
 	var_state.range_startk	= 0;
 	var_state.range_sizek	= 0;
 	var_state.reg		= 0;
-	var_state.address_bits	= address_bits;
-	var_state.chunk_sizek	= mtrr_chunk_size >> 10;
-	var_state.gran_sizek	= mtrr_gran_size >> 10;
+	var_state.chunk_sizek	= chunk_size >> 10;
+	var_state.gran_sizek	= gran_size >> 10;
+
+	memset(range_state, 0, sizeof(range_state));
 
 	/* Write the range etc */
 	for (i = 0; i < nr_range; i++)
-		set_var_mtrr_range(&var_state, range[i].start, range[i].end - range[i].start + 1);
+		set_var_mtrr_range(&var_state, range[i].start,
+				   range[i].end - range[i].start + 1);
 
 	/* Write the last range */
-	range_to_mtrr_with_hole(&var_state, 0);
-	printk(KERN_INFO "DONE variable MTRRs\n");
+	if (var_state.range_sizek != 0)
+		range_to_mtrr_with_hole(&var_state, 0, 0);
+	printk(KERN_DEBUG "DONE variable MTRRs\n");
+
+	num_reg = var_state.reg;
 	/* Clear out the extra MTRR's */
 	while (var_state.reg < num_var_ranges) {
-		set_var_mtrr(var_state.reg, 0, 0, 0, var_state.address_bits);
+		save_var_mtrr(var_state.reg, 0, 0, 0);
 		var_state.reg++;
 	}
-}
-
-static int __init
-x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
-		       unsigned long extra_remove_base,
-		       unsigned long extra_remove_size)
-{
-	unsigned long i, base, size;
-	mtrr_type type;
-
-	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &base, &size, &type);
-		if (type != MTRR_TYPE_WRBACK)
-			continue;
-		nr_range = add_range_with_merge(range, nr_range, base, base + size - 1);
-	}
-	printk(KERN_INFO "After WB checking\n");
-	for (i = 0; i < nr_range; i++)
-		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
 
-	/* take out UC ranges */
-	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &base, &size, &type);
-		if (type != MTRR_TYPE_UNCACHABLE)
-			continue;
-		if (!size)
-			continue;
-		subtract_range(range, base, base + size - 1);
-	}
-	if (extra_remove_size)
-		subtract_range(range, extra_remove_base,  extra_remove_base + extra_remove_size  - 1);
+	return num_reg;
+}
 
-	/* get new range num */
-	nr_range = 0;
-	for (i = 0; i < RANGE_NUM; i++) {
-		if (!range[i].end)
-			continue;
-		nr_range++;
-	}
-	printk(KERN_INFO "After UC checking\n");
-	for (i = 0; i < nr_range; i++)
-		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+struct mtrr_cleanup_result {
+	unsigned long gran_sizek;
+	unsigned long chunk_sizek;
+	unsigned long lose_cover_sizek;
+	unsigned int num_reg;
+	int bad;
+};
 
-	/* sort the ranges */
-	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
-	printk(KERN_INFO "After sorting\n");
-	for (i = 0; i < nr_range; i++)
-		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+/*
+ * gran_size: 1M, 2M, ..., 2G
+ * chunk size: gran_size, ..., 4G
+ * so we need (2+13)*6
+ */
+#define NUM_RESULT	90
+#define PSHIFT		(PAGE_SHIFT - 10)
 
-	return nr_range;
-}
+static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
+static struct res_range __initdata range_new[RANGE_NUM];
+static unsigned long __initdata min_loss_pfn[RANGE_NUM];
 
 static int __init mtrr_cleanup(unsigned address_bits)
 {
 	unsigned long extra_remove_base, extra_remove_size;
 	unsigned long i, base, size, def, dummy;
-	struct res_range range[RANGE_NUM];
 	mtrr_type type;
-	int nr_range;
+	int nr_range, nr_range_new;
+	u64 chunk_size, gran_size;
+	unsigned long range_sums, range_sums_new;
+	int index_good;
+	int num_reg_good;
 
 	/* extra one for all 0 */
 	int num[MTRR_NUM_TYPES + 1];
@@ -1038,10 +1173,20 @@ static int __init mtrr_cleanup(unsigned address_bits)
 	if (def != MTRR_TYPE_UNCACHABLE)
 		return 0;
 
+	/* get it and store it aside */
+	memset(range_state, 0, sizeof(range_state));
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		range_state[i].base_pfn = base;
+		range_state[i].size_pfn = size;
+		range_state[i].type = type;
+	}
+
 	/* check entries number */
 	memset(num, 0, sizeof(num));
 	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &base, &size, &type);
+		type = range_state[i].type;
+		size = range_state[i].size_pfn;
 		if (type >= MTRR_NUM_TYPES)
 			continue;
 		if (!size)
@@ -1062,15 +1207,172 @@ static int __init mtrr_cleanup(unsigned address_bits)
 	extra_remove_size = 0;
 	if (mtrr_tom2) {
 		extra_remove_base = 1 << (32 - PAGE_SHIFT);
-		extra_remove_size = (mtrr_tom2>>PAGE_SHIFT) - extra_remove_base;
+		extra_remove_size =
+			(mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
+	}
+	nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
+					  extra_remove_size);
+	range_sums = sum_ranges(range, nr_range);
+	printk(KERN_INFO "total RAM coverred: %ldM\n",
+	       range_sums >> (20 - PAGE_SHIFT));
+
+	if (mtrr_chunk_size && mtrr_gran_size) {
+		int num_reg;
+
+		/* convert ranges to var ranges state */
+		num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
+					      mtrr_gran_size);
+
+		/* we got new setting in range_state, check it */
+		memset(range_new, 0, sizeof(range_new));
+		nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
+						      extra_remove_base,
+						      extra_remove_size);
+		range_sums_new = sum_ranges(range_new, nr_range_new);
+
+		i = 0;
+		result[i].chunk_sizek = mtrr_chunk_size >> 10;
+		result[i].gran_sizek = mtrr_gran_size >> 10;
+		result[i].num_reg = num_reg;
+		if (range_sums < range_sums_new) {
+			result[i].lose_cover_sizek =
+				(range_sums_new - range_sums) << PSHIFT;
+			result[i].bad = 1;
+		} else
+			result[i].lose_cover_sizek =
+				(range_sums - range_sums_new) << PSHIFT;
+
+		printk(KERN_INFO " %sgran_size: %ldM  \tchunk_size: %ldM  \t",
+			 result[i].bad?" BAD ":"", result[i].gran_sizek >> 10,
+			 result[i].chunk_sizek >> 10);
+		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ldM \n",
+			 result[i].num_reg, result[i].bad?"-":"",
+			 result[i].lose_cover_sizek >> 10);
+		if (!result[i].bad) {
+			set_var_mtrr_all(address_bits);
+			return 1;
+		}
+		printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
+		       "will find optimal one\n");
+		memset(result, 0, sizeof(result[0]));
+	}
+
+	i = 0;
+	memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
+	memset(result, 0, sizeof(result));
+	for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) {
+		for (chunk_size = gran_size; chunk_size < (1ULL<<33);
+		     chunk_size <<= 1) {
+			int num_reg;
+
+			printk(KERN_INFO
+			       "\ngran_size: %lldM   chunk_size_size: %lldM\n",
+			       gran_size >> 20, chunk_size >> 20);
+			if (i >= NUM_RESULT)
+				continue;
+
+			/* convert ranges to var ranges state */
+			num_reg = x86_setup_var_mtrrs(range, nr_range,
+							 chunk_size, gran_size);
+
+			/* we got new setting in range_state, check it */
+			memset(range_new, 0, sizeof(range_new));
+			nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
+					 extra_remove_base, extra_remove_size);
+			range_sums_new = sum_ranges(range_new, nr_range_new);
+
+			result[i].chunk_sizek = chunk_size >> 10;
+			result[i].gran_sizek = gran_size >> 10;
+			result[i].num_reg = num_reg;
+			if (range_sums < range_sums_new) {
+				result[i].lose_cover_sizek =
+					(range_sums_new - range_sums) << PSHIFT;
+				result[i].bad = 1;
+			} else
+				result[i].lose_cover_sizek =
+					(range_sums - range_sums_new) << PSHIFT;
+
+			/* double check it */
+			if (!result[i].bad && !result[i].lose_cover_sizek) {
+				if (nr_range_new != nr_range ||
+					memcmp(range, range_new, sizeof(range)))
+						result[i].bad = 1;
+			}
+
+			if (!result[i].bad && (range_sums - range_sums_new <
+					       min_loss_pfn[num_reg])) {
+				min_loss_pfn[num_reg] =
+					range_sums - range_sums_new;
+			}
+			i++;
+		}
 	}
-	nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, extra_remove_size);
 
-	/* convert ranges to var ranges state */
-	x86_setup_var_mtrrs(range, nr_range, address_bits);
+	/* print out all */
+	for (i = 0; i < NUM_RESULT; i++) {
+		printk(KERN_INFO "%sgran_size: %ldM  \tchunk_size: %ldM  \t",
+		       result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10,
+		       result[i].chunk_sizek >> 10);
+		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ldM \n",
+		       result[i].num_reg, result[i].bad?"-":"",
+		       result[i].lose_cover_sizek >> 10);
+	}
 
-	return 1;
+	/* try to find the optimal index */
+	if (nr_mtrr_spare_reg >= num_var_ranges)
+		nr_mtrr_spare_reg = num_var_ranges - 1;
+	num_reg_good = -1;
+	for (i = 1; i < num_var_ranges + 1 - nr_mtrr_spare_reg; i++) {
+		if (!min_loss_pfn[i]) {
+			num_reg_good = i;
+			break;
+		}
+	}
+
+	index_good = -1;
+	if (num_reg_good != -1) {
+		for (i = 0; i < NUM_RESULT; i++) {
+			if (!result[i].bad &&
+			    result[i].num_reg == num_reg_good &&
+			    !result[i].lose_cover_sizek) {
+				index_good = i;
+				break;
+			}
+		}
+	}
+
+	if (index_good != -1) {
+		printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
+		i = index_good;
+		printk(KERN_INFO "gran_size: %ldM  \tchunk_size: %ldM  \t",
+				result[i].gran_sizek >> 10,
+				result[i].chunk_sizek >> 10);
+		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %ldM \n",
+				result[i].num_reg,
+				result[i].lose_cover_sizek >> 10);
+		/* convert ranges to var ranges state */
+		chunk_size = result[i].chunk_sizek;
+		chunk_size <<= 10;
+		gran_size = result[i].gran_sizek;
+		gran_size <<= 10;
+		x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
+		set_var_mtrr_all(address_bits);
+		return 1;
+	}
+
+	printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
+	printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
+
+	return 0;
 }
+#else
+static int __init mtrr_cleanup(unsigned address_bits)
+{
+	return 0;
+}
+#endif
+
+static int __initdata changed_by_mtrr_cleanup;
 
 static int disable_mtrr_trim;
 
@@ -1111,7 +1413,8 @@ int __init amd_special_default_mtrr(void)
 	return 0;
 }
 
-static u64 __init real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn)
+static u64 __init real_trim_memory(unsigned long start_pfn,
+				   unsigned long limit_pfn)
 {
 	u64 trim_start, trim_size;
 	trim_start = start_pfn;
@@ -1138,9 +1441,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 {
 	unsigned long i, base, size, highest_pfn = 0, def, dummy;
 	mtrr_type type;
-	struct res_range range[RANGE_NUM];
 	int nr_range;
-	u64 total_real_trim_size;
+	u64 total_trim_size;
 
 	/* extra one for all 0 */
 	int num[MTRR_NUM_TYPES + 1];
@@ -1155,11 +1457,22 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 	if (def != MTRR_TYPE_UNCACHABLE)
 		return 0;
 
-	/* Find highest cached pfn */
+	/* get it and store it aside */
+	memset(range_state, 0, sizeof(range_state));
 	for (i = 0; i < num_var_ranges; i++) {
 		mtrr_if->get(i, &base, &size, &type);
+		range_state[i].base_pfn = base;
+		range_state[i].size_pfn = size;
+		range_state[i].type = type;
+	}
+
+	/* Find highest cached pfn */
+	for (i = 0; i < num_var_ranges; i++) {
+		type = range_state[i].type;
 		if (type != MTRR_TYPE_WRBACK)
 			continue;
+		base = range_state[i].base_pfn;
+		size = range_state[i].size_pfn;
 		if (highest_pfn < base + size)
 			highest_pfn = base + size;
 	}
@@ -1177,9 +1490,10 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 	/* check entries number */
 	memset(num, 0, sizeof(num));
 	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &base, &size, &type);
+		type = range_state[i].type;
 		if (type >= MTRR_NUM_TYPES)
 			continue;
+		size = range_state[i].size_pfn;
 		if (!size)
 			type = MTRR_NUM_TYPES;
 		num[type]++;
@@ -1205,26 +1519,28 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 	}
 	nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
 
-	total_real_trim_size = 0;
+	total_trim_size = 0;
 	/* check the head */
 	if (range[0].start)
-		total_real_trim_size += real_trim_memory(0, range[0].start);
+		total_trim_size += real_trim_memory(0, range[0].start);
 	/* check the holes */
 	for (i = 0; i < nr_range - 1; i++) {
 		if (range[i].end + 1 < range[i+1].start)
-			total_real_trim_size += real_trim_memory(range[i].end + 1, range[i+1].start);
+			total_trim_size += real_trim_memory(range[i].end + 1,
+							    range[i+1].start);
 	}
 	/* check the top */
 	i = nr_range - 1;
 	if (range[i].end + 1 < end_pfn)
-		total_real_trim_size += real_trim_memory(range[i].end + 1, end_pfn);
+		total_trim_size += real_trim_memory(range[i].end + 1,
+							 end_pfn);
 
-	if (total_real_trim_size) {
+	if (total_trim_size) {
 		printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
 			" all of memory, losing %lluMB of RAM.\n",
-			total_real_trim_size >> 20);
+			total_trim_size >> 20);
 
-		if (enable_mtrr_cleanup < 1)
+		if (!changed_by_mtrr_cleanup)
 			WARN_ON(1);
 
 		printk(KERN_INFO "update e820 for mtrr\n");
@@ -1314,8 +1630,10 @@ void __init mtrr_bp_init(void)
 		if (use_intel()) {
 			get_mtrr_state();
 
-			if (mtrr_cleanup(phys_addr))
+			if (mtrr_cleanup(phys_addr)) {
+				changed_by_mtrr_cleanup = 1;
 				mtrr_if->set_all();
+			}
 
 		}
 	}
@@ -1355,7 +1673,7 @@ static int __init mtrr_init_finialize(void)
 	if (!mtrr_if)
 		return 0;
 	if (use_intel()) {
-		if (enable_mtrr_cleanup < 1)
+		if (!changed_by_mtrr_cleanup)
 			mtrr_state_warn();
 	} else {
 		/* The CPUs haven't MTRR and seem to not support SMP. They have
-- 
cgit v1.2.3


From 833e78bfeeef628f0201349a0a05a54f48f07884 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel.send@gmail.com>
Date: Mon, 5 May 2008 15:57:38 -0700
Subject: x86: process fam 10h like k8 with fixed mtrr setting

otherwise fixed MTRR for family 10h may not be changed.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/mtrr/generic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 5aae648600b..a83f5cd7888 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -342,7 +342,7 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
 
 	if (lo != msrwords[0] || hi != msrwords[1]) {
 		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-		    boot_cpu_data.x86 == 15 &&
+		    (boot_cpu_data.x86 >= 0x0f && boot_cpu_data.x86 <= 0x11) &&
 		    ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK))
 			k8_enable_fixed_iorrs();
 		mtrr_wrmsr(msr, msrwords[0], msrwords[1]);
-- 
cgit v1.2.3


From 8004dd965b13b01a96def054d420f6df7ff22d53 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Mon, 12 May 2008 17:40:39 -0700
Subject: x86: amd opteron TOM2 mask val fix

there is a typo in the mask value, need to remove that extra 0,
to avoid 4bit clearing.

Signed-off-by: Yinghal Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mtrr/generic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index a83f5cd7888..509bd3d9eac 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -233,7 +233,7 @@ void __init get_mtrr_state(void)
 		mtrr_tom2 = high;
 		mtrr_tom2 <<= 32;
 		mtrr_tom2 |= low;
-		mtrr_tom2 &= 0xffffff8000000ULL;
+		mtrr_tom2 &= 0xffffff800000ULL;
 	}
 	if (mtrr_show) {
 		int high_width;
-- 
cgit v1.2.3


From 3f03c54a34fa3da680b3341dd3ba965ef3394bd1 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Fri, 9 May 2008 22:40:52 -0700
Subject: x86: mtrr cleanup for converting continuous to discrete layout - fix
 #2

disable the noisy print out.
also use the one the less spare mtrr reg.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mtrr/main.c | 79 +++++++++++++++++++++++++----------------
 1 file changed, 48 insertions(+), 31 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index e6c162c379a..0642201784e 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -730,6 +730,7 @@ struct var_mtrr_range_state {
 };
 
 struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
+static int __initdata debug_print;
 
 static int __init
 x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
@@ -748,10 +749,12 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
 		nr_range = add_range_with_merge(range, nr_range, base,
 						base + size - 1);
 	}
-	printk(KERN_DEBUG "After WB checking\n");
-	for (i = 0; i < nr_range; i++)
-		printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+	if (debug_print) {
+		printk(KERN_DEBUG "After WB checking\n");
+		for (i = 0; i < nr_range; i++)
+			printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
 				 range[i].start, range[i].end + 1);
+	}
 
 	/* take out UC ranges */
 	for (i = 0; i < num_var_ranges; i++) {
@@ -775,17 +778,21 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
 			continue;
 		nr_range++;
 	}
-	printk(KERN_DEBUG "After UC checking\n");
-	for (i = 0; i < nr_range; i++)
-		printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
-			 range[i].start, range[i].end + 1);
+	if  (debug_print) {
+		printk(KERN_DEBUG "After UC checking\n");
+		for (i = 0; i < nr_range; i++)
+			printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+				 range[i].start, range[i].end + 1);
+	}
 
 	/* sort the ranges */
 	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
-	printk(KERN_DEBUG "After sorting\n");
-	for (i = 0; i < nr_range; i++)
-		printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+	if  (debug_print) {
+		printk(KERN_DEBUG "After sorting\n");
+		for (i = 0; i < nr_range; i++)
+			printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
 				 range[i].start, range[i].end + 1);
+	}
 
 	/* clear those is not used */
 	for (i = nr_range; i < RANGE_NUM; i++)
@@ -912,12 +919,13 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
 			align = max_align;
 
 		sizek = 1 << align;
-		printk(KERN_DEBUG "Setting variable MTRR %d, base: %ldMB, "
-		       "range: %ldMB, type %s\n",
-			reg, range_startk >> 10, sizek >> 10,
-			(type == MTRR_TYPE_UNCACHABLE)?"UC":
-			    ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
-			);
+		if (debug_print)
+			printk(KERN_DEBUG "Setting variable MTRR %d, "
+				"base: %ldMB, range: %ldMB, type %s\n",
+				reg, range_startk >> 10, sizek >> 10,
+				(type == MTRR_TYPE_UNCACHABLE)?"UC":
+				    ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
+				);
 		save_var_mtrr(reg++, range_startk, sizek, type);
 		range_startk += sizek;
 		range_sizek -= sizek;
@@ -963,7 +971,9 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
 	range0_basek = state->range_startk;
 	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
 	if (range0_sizek == state->range_sizek) {
-		printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", range0_basek<<10,
+		if (debug_print)
+			printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
+				range0_basek<<10,
 				(range0_basek + state->range_sizek)<<10);
 		state->reg = range_to_mtrr(state->reg, range0_basek,
 				state->range_sizek, MTRR_TYPE_WRBACK);
@@ -980,7 +990,9 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
 	}
 
 	if (range0_sizek) {
-		printk(KERN_DEBUG "range0: %016lx - %016lx\n", range0_basek<<10,
+		if (debug_print)
+			printk(KERN_DEBUG "range0: %016lx - %016lx\n",
+				range0_basek<<10,
 				(range0_basek + range0_sizek)<<10);
 		state->reg = range_to_mtrr(state->reg, range0_basek,
 				range0_sizek, MTRR_TYPE_WRBACK);
@@ -1016,13 +1028,15 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
 		second_sizek = 0;
 	}
 
-	printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10,
+	if (debug_print)
+		printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10,
 			 (range_basek + range_sizek)<<10);
 	state->reg = range_to_mtrr(state->reg, range_basek, range_sizek,
 					 MTRR_TYPE_WRBACK);
 	if (hole_sizek) {
-		printk(KERN_DEBUG "hole: %016lx - %016lx\n", hole_basek<<10,
-				 (hole_basek + hole_sizek)<<10);
+		if (debug_print)
+			printk(KERN_DEBUG "hole: %016lx - %016lx\n",
+				 hole_basek<<10, (hole_basek + hole_sizek)<<10);
 		state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek,
 						 MTRR_TYPE_UNCACHABLE);
 
@@ -1120,7 +1134,6 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range,
 	/* Write the last range */
 	if (var_state.range_sizek != 0)
 		range_to_mtrr_with_hole(&var_state, 0, 0);
-	printk(KERN_DEBUG "DONE variable MTRRs\n");
 
 	num_reg = var_state.reg;
 	/* Clear out the extra MTRR's */
@@ -1219,6 +1232,7 @@ static int __init mtrr_cleanup(unsigned address_bits)
 	if (mtrr_chunk_size && mtrr_gran_size) {
 		int num_reg;
 
+		debug_print = 1;
 		/* convert ranges to var ranges state */
 		num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
 					      mtrr_gran_size);
@@ -1242,8 +1256,8 @@ static int __init mtrr_cleanup(unsigned address_bits)
 			result[i].lose_cover_sizek =
 				(range_sums - range_sums_new) << PSHIFT;
 
-		printk(KERN_INFO " %sgran_size: %ldM  \tchunk_size: %ldM  \t",
-			 result[i].bad?" BAD ":"", result[i].gran_sizek >> 10,
+		printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
+			 result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10,
 			 result[i].chunk_sizek >> 10);
 		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ldM \n",
 			 result[i].num_reg, result[i].bad?"-":"",
@@ -1254,6 +1268,7 @@ static int __init mtrr_cleanup(unsigned address_bits)
 		}
 		printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
 		       "will find optimal one\n");
+		debug_print = 0;
 		memset(result, 0, sizeof(result[0]));
 	}
 
@@ -1265,9 +1280,10 @@ static int __init mtrr_cleanup(unsigned address_bits)
 		     chunk_size <<= 1) {
 			int num_reg;
 
-			printk(KERN_INFO
+			if (debug_print)
+				printk(KERN_INFO
 			       "\ngran_size: %lldM   chunk_size_size: %lldM\n",
-			       gran_size >> 20, chunk_size >> 20);
+				       gran_size >> 20, chunk_size >> 20);
 			if (i >= NUM_RESULT)
 				continue;
 
@@ -1310,10 +1326,10 @@ static int __init mtrr_cleanup(unsigned address_bits)
 
 	/* print out all */
 	for (i = 0; i < NUM_RESULT; i++) {
-		printk(KERN_INFO "%sgran_size: %ldM  \tchunk_size: %ldM  \t",
+		printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
 		       result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10,
 		       result[i].chunk_sizek >> 10);
-		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ldM \n",
+		printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n",
 		       result[i].num_reg, result[i].bad?"-":"",
 		       result[i].lose_cover_sizek >> 10);
 	}
@@ -1322,7 +1338,7 @@ static int __init mtrr_cleanup(unsigned address_bits)
 	if (nr_mtrr_spare_reg >= num_var_ranges)
 		nr_mtrr_spare_reg = num_var_ranges - 1;
 	num_reg_good = -1;
-	for (i = 1; i < num_var_ranges + 1 - nr_mtrr_spare_reg; i++) {
+	for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
 		if (!min_loss_pfn[i]) {
 			num_reg_good = i;
 			break;
@@ -1344,10 +1360,10 @@ static int __init mtrr_cleanup(unsigned address_bits)
 	if (index_good != -1) {
 		printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
 		i = index_good;
-		printk(KERN_INFO "gran_size: %ldM  \tchunk_size: %ldM  \t",
+		printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t",
 				result[i].gran_sizek >> 10,
 				result[i].chunk_sizek >> 10);
-		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %ldM \n",
+		printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n",
 				result[i].num_reg,
 				result[i].lose_cover_sizek >> 10);
 		/* convert ranges to var ranges state */
@@ -1355,6 +1371,7 @@ static int __init mtrr_cleanup(unsigned address_bits)
 		chunk_size <<= 10;
 		gran_size = result[i].gran_sizek;
 		gran_size <<= 10;
+		debug_print = 1;
 		x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
 		set_var_mtrr_all(address_bits);
 		return 1;
-- 
cgit v1.2.3


From 4d285878564bb46cf64e54be18eeffe33ca583a0 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Thu, 22 May 2008 18:48:32 -0400
Subject: x86: Move the AMD64 specific parts out of setup_64.c

Create a separate amd_64.c file in the cpu/ dir for
the useful parts to live in.

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/Makefile |   1 +
 arch/x86/kernel/cpu/amd_64.c | 235 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 236 insertions(+)
 create mode 100644 arch/x86/kernel/cpu/amd_64.c

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index a0c6f819088..ef065c1a2e1 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -7,6 +7,7 @@ obj-y			+= proc.o feature_names.o
 
 obj-$(CONFIG_X86_32)	+= common.o bugs.o
 obj-$(CONFIG_X86_32)	+= amd.o
+obj-$(CONFIG_X86_64)	+= amd_64.o
 obj-$(CONFIG_X86_32)	+= cyrix.o
 obj-$(CONFIG_X86_32)	+= centaur.o
 obj-$(CONFIG_X86_32)	+= transmeta.o
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
new file mode 100644
index 00000000000..1746f6f9572
--- /dev/null
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -0,0 +1,235 @@
+#include <linux/init.h>
+#include <linux/mm.h>
+
+#include <asm/numa_64.h>
+#include <asm/mmconfig.h>
+#include <asm/cacheflush.h>
+
+#include <mach_apic.h>
+
+extern int __cpuinit get_model_name(struct cpuinfo_x86 *c);
+extern void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c);
+
+int force_mwait __cpuinitdata;
+
+#ifdef CONFIG_NUMA
+static int __cpuinit nearby_node(int apicid)
+{
+	int i, node;
+
+	for (i = apicid - 1; i >= 0; i--) {
+		node = apicid_to_node[i];
+		if (node != NUMA_NO_NODE && node_online(node))
+			return node;
+	}
+	for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
+		node = apicid_to_node[i];
+		if (node != NUMA_NO_NODE && node_online(node))
+			return node;
+	}
+	return first_node(node_online_map); /* Shouldn't happen */
+}
+#endif
+
+/*
+ * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
+ * Assumes number of cores is a power of two.
+ */
+static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+	unsigned bits;
+#ifdef CONFIG_NUMA
+	int cpu = smp_processor_id();
+	int node = 0;
+	unsigned apicid = hard_smp_processor_id();
+#endif
+	bits = c->x86_coreid_bits;
+
+	/* Low order bits define the core id (index of core in socket) */
+	c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
+	/* Convert the initial APIC ID into the socket ID */
+	c->phys_proc_id = c->initial_apicid >> bits;
+
+#ifdef CONFIG_NUMA
+	node = c->phys_proc_id;
+	if (apicid_to_node[apicid] != NUMA_NO_NODE)
+		node = apicid_to_node[apicid];
+	if (!node_online(node)) {
+		/* Two possibilities here:
+		   - The CPU is missing memory and no node was created.
+		   In that case try picking one from a nearby CPU
+		   - The APIC IDs differ from the HyperTransport node IDs
+		   which the K8 northbridge parsing fills in.
+		   Assume they are all increased by a constant offset,
+		   but in the same order as the HT nodeids.
+		   If that doesn't result in a usable node fall back to the
+		   path for the previous case.  */
+
+		int ht_nodeid = c->initial_apicid;
+
+		if (ht_nodeid >= 0 &&
+		    apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+			node = apicid_to_node[ht_nodeid];
+		/* Pick a nearby node */
+		if (!node_online(node))
+			node = nearby_node(apicid);
+	}
+	numa_set_node(cpu, node);
+
+	printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
+#endif
+#endif
+}
+
+static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+	unsigned bits, ecx;
+
+	/* Multi core CPU? */
+	if (c->extended_cpuid_level < 0x80000008)
+		return;
+
+	ecx = cpuid_ecx(0x80000008);
+
+	c->x86_max_cores = (ecx & 0xff) + 1;
+
+	/* CPU telling us the core id bits shift? */
+	bits = (ecx >> 12) & 0xF;
+
+	/* Otherwise recompute */
+	if (bits == 0) {
+		while ((1 << bits) < c->x86_max_cores)
+			bits++;
+	}
+
+	c->x86_coreid_bits = bits;
+
+#endif
+}
+
+#define ENABLE_C1E_MASK		0x18000000
+#define CPUID_PROCESSOR_SIGNATURE	1
+#define CPUID_XFAM		0x0ff00000
+#define CPUID_XFAM_K8		0x00000000
+#define CPUID_XFAM_10H		0x00100000
+#define CPUID_XFAM_11H		0x00200000
+#define CPUID_XMOD		0x000f0000
+#define CPUID_XMOD_REV_F	0x00040000
+
+/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */
+static __cpuinit int amd_apic_timer_broken(void)
+{
+	u32 lo, hi, eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
+
+	switch (eax & CPUID_XFAM) {
+	case CPUID_XFAM_K8:
+		if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F)
+			break;
+	case CPUID_XFAM_10H:
+	case CPUID_XFAM_11H:
+		rdmsr(MSR_K8_ENABLE_C1E, lo, hi);
+		if (lo & ENABLE_C1E_MASK)
+			return 1;
+		break;
+	default:
+		/* err on the side of caution */
+		return 1;
+	}
+	return 0;
+}
+
+void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
+{
+	early_init_amd_mc(c);
+
+	/* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
+	if (c->x86_power & (1<<8))
+		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+}
+
+void __cpuinit init_amd(struct cpuinfo_x86 *c)
+{
+	unsigned level;
+
+#ifdef CONFIG_SMP
+	unsigned long value;
+
+	/*
+	 * Disable TLB flush filter by setting HWCR.FFDIS on K8
+	 * bit 6 of msr C001_0015
+	 *
+	 * Errata 63 for SH-B3 steppings
+	 * Errata 122 for all steppings (F+ have it disabled by default)
+	 */
+	if (c->x86 == 15) {
+		rdmsrl(MSR_K8_HWCR, value);
+		value |= 1 << 6;
+		wrmsrl(MSR_K8_HWCR, value);
+	}
+#endif
+
+	/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+	   3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
+	clear_cpu_cap(c, 0*32+31);
+
+	/* On C+ stepping K8 rep microcode works well for copy/memset */
+	level = cpuid_eax(1);
+	if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) ||
+			     level >= 0x0f58))
+		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+	if (c->x86 == 0x10 || c->x86 == 0x11)
+		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+
+	/* Enable workaround for FXSAVE leak */
+	if (c->x86 >= 6)
+		set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
+
+	level = get_model_name(c);
+	if (!level) {
+		switch (c->x86) {
+		case 15:
+			/* Should distinguish Models here, but this is only
+			   a fallback anyways. */
+			strcpy(c->x86_model_id, "Hammer");
+			break;
+		}
+	}
+	display_cacheinfo(c);
+
+	/* Multi core CPU? */
+	if (c->extended_cpuid_level >= 0x80000008)
+		amd_detect_cmp(c);
+
+	if (c->extended_cpuid_level >= 0x80000006 &&
+		(cpuid_edx(0x80000006) & 0xf000))
+		num_cache_leaves = 4;
+	else
+		num_cache_leaves = 3;
+
+	if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11)
+		set_cpu_cap(c, X86_FEATURE_K8);
+
+	/* MFENCE stops RDTSC speculation */
+	set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+
+	if (c->x86 == 0x10)
+		fam10h_check_enable_mmcfg();
+
+	if (amd_apic_timer_broken())
+		disable_apic_timer = 1;
+
+	if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
+		unsigned long long tseg;
+
+		/*
+		 * Split up direct mapping around the TSEG SMM area.
+		 * Don't do it for gbpages because there seems very little
+		 * benefit in doing so.
+		 */
+		if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) &&
+		(tseg >> PMD_SHIFT) < (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT)))
+			set_memory_4k((unsigned long)__va(tseg), 1);
+	}
+}
-- 
cgit v1.2.3


From a82fbe31cb387bb246e2d3b3c177f551bb991135 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Thu, 22 May 2008 18:54:32 -0400
Subject: x86: Move the 64-bit Intel specific parts out of setup_64.c

Create a separate intel_64.c file in the cpu/ dir for
the useful parts to live in.

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/Makefile   |  1 +
 arch/x86/kernel/cpu/intel_64.c | 97 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 98 insertions(+)
 create mode 100644 arch/x86/kernel/cpu/intel_64.c

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index ef065c1a2e1..b7a11924fed 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_X86_32)	+= cyrix.o
 obj-$(CONFIG_X86_32)	+= centaur.o
 obj-$(CONFIG_X86_32)	+= transmeta.o
 obj-$(CONFIG_X86_32)	+= intel.o
+obj-$(CONFIG_X86_64)	+= intel_64.o
 obj-$(CONFIG_X86_32)	+= umc.o
 
 obj-$(CONFIG_X86_MCE)	+= mcheck/
diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c
new file mode 100644
index 00000000000..e5f929f6c3d
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_64.c
@@ -0,0 +1,97 @@
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/topology.h>
+#include <asm/numa_64.h>
+
+void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
+{
+	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
+	    (c->x86 == 0x6 && c->x86_model >= 0x0e))
+		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+}
+
+/*
+ * find out the number of processor cores on the die
+ */
+static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
+{
+	unsigned int eax, t;
+
+	if (c->cpuid_level < 4)
+		return 1;
+
+	cpuid_count(4, 0, &eax, &t, &t, &t);
+
+	if (eax & 0x1f)
+		return ((eax >> 26) + 1);
+	else
+		return 1;
+}
+
+static void __cpuinit srat_detect_node(void)
+{
+#ifdef CONFIG_NUMA
+	unsigned node;
+	int cpu = smp_processor_id();
+	int apicid = hard_smp_processor_id();
+
+	/* Don't do the funky fallback heuristics the AMD version employs
+	   for now. */
+	node = apicid_to_node[apicid];
+	if (node == NUMA_NO_NODE || !node_online(node))
+		node = first_node(node_online_map);
+	numa_set_node(cpu, node);
+
+	printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
+#endif
+}
+
+void __cpuinit init_intel(struct cpuinfo_x86 *c)
+{
+	/* Cache sizes */
+	unsigned n;
+
+	init_intel_cacheinfo(c);
+	if (c->cpuid_level > 9) {
+		unsigned eax = cpuid_eax(10);
+		/* Check for version and the number of counters */
+		if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
+			set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
+	}
+
+	if (cpu_has_ds) {
+		unsigned int l1, l2;
+		rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
+		if (!(l1 & (1<<11)))
+			set_cpu_cap(c, X86_FEATURE_BTS);
+		if (!(l1 & (1<<12)))
+			set_cpu_cap(c, X86_FEATURE_PEBS);
+	}
+
+
+	if (cpu_has_bts)
+		ds_init_intel(c);
+
+	n = c->extended_cpuid_level;
+	if (n >= 0x80000008) {
+		unsigned eax = cpuid_eax(0x80000008);
+		c->x86_virt_bits = (eax >> 8) & 0xff;
+		c->x86_phys_bits = eax & 0xff;
+		/* CPUID workaround for Intel 0F34 CPU */
+		if (c->x86_vendor == X86_VENDOR_INTEL &&
+		    c->x86 == 0xF && c->x86_model == 0x3 &&
+		    c->x86_mask == 0x4)
+			c->x86_phys_bits = 36;
+	}
+
+	if (c->x86 == 15)
+		c->x86_cache_alignment = c->x86_clflush_size * 2;
+	if (c->x86 == 6)
+		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+	set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+	c->x86_max_cores = intel_num_cpu_cores(c);
+
+	srat_detect_node();
+}
-- 
cgit v1.2.3


From 7e2191127eb414d7d5a11df6552ab6e3845d17a1 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Thu, 22 May 2008 18:55:06 -0400
Subject: x86: Remove workaround for prescott (32bit P4) from 64-bit code.

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/intel_64.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c
index e5f929f6c3d..b3391219948 100644
--- a/arch/x86/kernel/cpu/intel_64.c
+++ b/arch/x86/kernel/cpu/intel_64.c
@@ -79,11 +79,6 @@ void __cpuinit init_intel(struct cpuinfo_x86 *c)
 		unsigned eax = cpuid_eax(0x80000008);
 		c->x86_virt_bits = (eax >> 8) & 0xff;
 		c->x86_phys_bits = eax & 0xff;
-		/* CPUID workaround for Intel 0F34 CPU */
-		if (c->x86_vendor == X86_VENDOR_INTEL &&
-		    c->x86 == 0xF && c->x86_model == 0x3 &&
-		    c->x86_mask == 0x4)
-			c->x86_phys_bits = 36;
 	}
 
 	if (c->x86 == 15)
-- 
cgit v1.2.3


From 30a713180b3d08fdec5ca572e5a1cd35253c5d8e Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Thu, 22 May 2008 18:57:25 -0400
Subject: x86: Move the 64-bit Centaur specific parts out of setup_64.c

Create a separate centaur_64.c file in the cpu/ dir for
the useful parts to live in.

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/Makefile     |  1 +
 arch/x86/kernel/cpu/centaur_64.c | 31 +++++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+)
 create mode 100644 arch/x86/kernel/cpu/centaur_64.c

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index b7a11924fed..c77a1c50d94 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_X86_32)	+= amd.o
 obj-$(CONFIG_X86_64)	+= amd_64.o
 obj-$(CONFIG_X86_32)	+= cyrix.o
 obj-$(CONFIG_X86_32)	+= centaur.o
+obj-$(CONFIG_X86_64)	+= centaur_64.o
 obj-$(CONFIG_X86_32)	+= transmeta.o
 obj-$(CONFIG_X86_32)	+= intel.o
 obj-$(CONFIG_X86_64)	+= intel_64.o
diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c
new file mode 100644
index 00000000000..bac96d187d0
--- /dev/null
+++ b/arch/x86/kernel/cpu/centaur_64.c
@@ -0,0 +1,31 @@
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+
+void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
+{
+	if (c->x86 == 0x6 && c->x86_model >= 0xf)
+		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+}
+
+void __cpuinit init_centaur(struct cpuinfo_x86 *c)
+{
+	/* Cache sizes */
+	unsigned n;
+
+	n = c->extended_cpuid_level;
+	if (n >= 0x80000008) {
+		unsigned eax = cpuid_eax(0x80000008);
+		c->x86_virt_bits = (eax >> 8) & 0xff;
+		c->x86_phys_bits = eax & 0xff;
+	}
+
+	if (c->x86 == 0x6 && c->x86_model >= 0xf) {
+		c->x86_cache_alignment = c->x86_clflush_size * 2;
+		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+	}
+	set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+}
-- 
cgit v1.2.3


From 1c47cd638e8302bc38be1f6d81067950e038ebd3 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Fri, 30 May 2008 15:42:45 -0700
Subject: x86: fix overlong line in arch/x86/kernel/cpu/amd_64.c

Clean up an overlong line in arch/x86/kernel/cpu/amd_64.c.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/amd_64.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index 1746f6f9572..c815c2c0484 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -229,7 +229,8 @@ void __cpuinit init_amd(struct cpuinfo_x86 *c)
 		 * benefit in doing so.
 		 */
 		if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) &&
-		(tseg >> PMD_SHIFT) < (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT)))
+		    (tseg >> PMD_SHIFT) <
+			(max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT)))
 			set_memory_4k((unsigned long)__va(tseg), 1);
 	}
 }
-- 
cgit v1.2.3


From 831d991821daedd4839073dbca55514432ef1768 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Mon, 3 Sep 2007 10:17:39 +0200
Subject: x86: add PCI extended config space access for AMD Barcelona

This patch implements PCI extended configuration space access for
AMD's Barcelona CPUs. It extends the method using CF8/CFC IO
addresses. An x86 capability bit has been introduced that is set for
CPUs supporting PCI extended config space accesses.

Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/amd.c    | 4 ++++
 arch/x86/kernel/cpu/amd_64.c | 3 +++
 2 files changed, 7 insertions(+)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 24586682829..99221f9834e 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -6,6 +6,7 @@
 #include <asm/apic.h>
 
 #include <mach_apic.h>
+#include "../setup.h"
 #include "cpu.h"
 
 /*
@@ -308,6 +309,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 
 	if (cpu_has_xmm2)
 		set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+
+	if (c->x86 == 0x10)
+		amd_enable_pci_ext_cfg(c);
 }
 
 static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index c815c2c0484..180097e9921 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -217,6 +217,9 @@ void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	if (c->x86 == 0x10)
 		fam10h_check_enable_mmcfg();
 
+	if (c->x86 == 0x10)
+		amd_enable_pci_ext_cfg(c);
+
 	if (amd_apic_timer_broken())
 		disable_apic_timer = 1;
 
-- 
cgit v1.2.3


From 1a5726528a70bb239bdd149aef7f2155cd2b1699 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 2 Jun 2008 12:21:36 +0200
Subject: fix build bug in "x86: add PCI extended config space access for AMD
 Barcelona"

---
 arch/x86/kernel/cpu/amd.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 99221f9834e..656b40aed64 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -4,6 +4,7 @@
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/apic.h>
+#include <asm/mmconfig.h>
 
 #include <mach_apic.h>
 #include "../setup.h"
-- 
cgit v1.2.3


From d44b9d17faf7bca165ce73a1acb936b65a3f0cc6 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Tue, 3 Jun 2008 13:06:07 -0700
Subject: x86: move bugs_64.c to cpu/bugs_64.c

It looks good to move bugs_64.c to cpu/bugs_64.c.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/Makefile  |  1 +
 arch/x86/kernel/cpu/bugs_64.c | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+)
 create mode 100644 arch/x86/kernel/cpu/bugs_64.c

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index c77a1c50d94..65b1be5fe9c 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -6,6 +6,7 @@ obj-y			:= intel_cacheinfo.o addon_cpuid_features.o
 obj-y			+= proc.o feature_names.o
 
 obj-$(CONFIG_X86_32)	+= common.o bugs.o
+obj-$(CONFIG_X86_64)	+= bugs_64.o
 obj-$(CONFIG_X86_32)	+= amd.o
 obj-$(CONFIG_X86_64)	+= amd_64.o
 obj-$(CONFIG_X86_32)	+= cyrix.o
diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c
new file mode 100644
index 00000000000..9a3ed0649d4
--- /dev/null
+++ b/arch/x86/kernel/cpu/bugs_64.c
@@ -0,0 +1,33 @@
+/*
+ *  Copyright (C) 1994  Linus Torvalds
+ *  Copyright (C) 2000  SuSE
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <asm/alternative.h>
+#include <asm/bugs.h>
+#include <asm/processor.h>
+#include <asm/mtrr.h>
+#include <asm/cacheflush.h>
+
+void __init check_bugs(void)
+{
+	identify_boot_cpu();
+#if !defined(CONFIG_SMP)
+	printk("CPU: ");
+	print_cpu_info(&boot_cpu_data);
+#endif
+	alternative_instructions();
+
+	/*
+	 * Make sure the first 2MB area is not mapped by huge pages
+	 * There are typically fixed size MTRRs in there and overlapping
+	 * MTRRs into large pages causes slow downs.
+	 *
+	 * Right now we don't do that with gbpages because there seems
+	 * very little benefit for that case.
+	 */
+	if (!direct_gbpages)
+		set_memory_4k((unsigned long)__va(0), 1);
+}
-- 
cgit v1.2.3


From 1a1b1d1322ebd1ece405f3057cdd408bc77e391d Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Wed, 4 Jun 2008 01:00:58 +0400
Subject: x86: watchdog - check for CPU is being supported

This patch does check if CPU is being recongnized
before call the unreserve(). Since enable_lapic_nmi_watchdog()
does have such a check the same is make sense here too
in a sake of code consistency (but nothing more).

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: mingo@redhat.com
Cc: hpa@zytor.com
Cc: macro@linux-mips.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/perfctr-watchdog.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index f9ae93adffe..ddda4b64f54 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -181,7 +181,9 @@ void disable_lapic_nmi_watchdog(void)
 		return;
 
 	on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
-	wd_ops->unreserve();
+
+	if (wd_ops)
+		wd_ops->unreserve();
 
 	BUG_ON(atomic_read(&nmi_active) != 0);
 }
-- 
cgit v1.2.3


From 9e26d84273541a8c6c2efb705457ca8e6245fb73 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Fri, 6 Jun 2008 12:01:13 +0200
Subject: fix build bug in "x86: add PCI extended config space access for AMD
 Barcelona"

Also much less code now.

Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/amd.c    | 2 --
 arch/x86/kernel/cpu/amd_64.c | 1 +
 arch/x86/kernel/cpu/cpu.h    | 5 +++++
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 656b40aed64..a38d54f4ff2 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -4,10 +4,8 @@
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/apic.h>
-#include <asm/mmconfig.h>
 
 #include <mach_apic.h>
-#include "../setup.h"
 #include "cpu.h"
 
 /*
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index 180097e9921..626fc21f027 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -6,6 +6,7 @@
 #include <asm/cacheflush.h>
 
 #include <mach_apic.h>
+#include "cpu.h"
 
 extern int __cpuinit get_model_name(struct cpuinfo_x86 *c);
 extern void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 783691b2a73..f5d5bb1b554 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -1,3 +1,4 @@
+#ifdef CONFIG_X86_32
 
 struct cpu_model_info {
 	int vendor;
@@ -36,3 +37,7 @@ extern struct cpu_vendor_dev __x86cpuvendor_start[], __x86cpuvendor_end[];
 
 extern int get_model_name(struct cpuinfo_x86 *c);
 extern void display_cacheinfo(struct cpuinfo_x86 *c);
+
+#endif /* CONFIG_X86_32 */
+
+extern void __cpuinit amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c);
-- 
cgit v1.2.3


From aa83f3f2cfc74d66d01b1d2eb1485ea1103a0f4e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 9 Jun 2008 17:11:13 +0200
Subject: x86: cleanup C1E enabled detection

Rename the "MSR_K8_ENABLE_C1E" MSR to INT_PENDING_MSG, which is the
name in the data sheet as well. Move the C1E mask to the header file.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/amd.c    | 5 ++---
 arch/x86/kernel/cpu/amd_64.c | 5 ++---
 2 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a38d54f4ff2..30b5055be35 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -25,7 +25,6 @@ extern void vide(void);
 __asm__(".align 4\nvide: ret");
 
 #ifdef CONFIG_X86_LOCAL_APIC
-#define ENABLE_C1E_MASK         0x18000000
 #define CPUID_PROCESSOR_SIGNATURE       1
 #define CPUID_XFAM              0x0ff00000
 #define CPUID_XFAM_K8           0x00000000
@@ -45,8 +44,8 @@ static __cpuinit int amd_apic_timer_broken(void)
 			break;
 	case CPUID_XFAM_10H:
 	case CPUID_XFAM_11H:
-		rdmsr(MSR_K8_ENABLE_C1E, lo, hi);
-		if (lo & ENABLE_C1E_MASK) {
+		rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
+		if (lo & K8_INTP_C1E_ACTIVE_MASK) {
 			if (smp_processor_id() != boot_cpu_physical_apicid)
 				printk(KERN_INFO "AMD C1E detected late. "
 				       "	Force timer broadcast.\n");
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index 626fc21f027..6eef3c79d15 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -110,7 +110,6 @@ static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
 #endif
 }
 
-#define ENABLE_C1E_MASK		0x18000000
 #define CPUID_PROCESSOR_SIGNATURE	1
 #define CPUID_XFAM		0x0ff00000
 #define CPUID_XFAM_K8		0x00000000
@@ -130,8 +129,8 @@ static __cpuinit int amd_apic_timer_broken(void)
 			break;
 	case CPUID_XFAM_10H:
 	case CPUID_XFAM_11H:
-		rdmsr(MSR_K8_ENABLE_C1E, lo, hi);
-		if (lo & ENABLE_C1E_MASK)
+		rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
+		if (lo & K8_INTP_C1E_ACTIVE_MASK)
 			return 1;
 		break;
 	default:
-- 
cgit v1.2.3


From 732d7be17b98ebfd59e5864c3490f19856fa832c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 9 Jun 2008 17:27:20 +0200
Subject: x86: use cpuinfo to check for interrupt pending message msr

Simplify code: no need to do a cpuid(1) again. The cpuinfo structure
has all necessary information already.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/amd.c    | 41 +++++++++++++++--------------------------
 arch/x86/kernel/cpu/amd_64.c | 38 +++++++++++++++-----------------------
 2 files changed, 30 insertions(+), 49 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 30b5055be35..e76b49e7a91 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -25,35 +25,24 @@ extern void vide(void);
 __asm__(".align 4\nvide: ret");
 
 #ifdef CONFIG_X86_LOCAL_APIC
-#define CPUID_PROCESSOR_SIGNATURE       1
-#define CPUID_XFAM              0x0ff00000
-#define CPUID_XFAM_K8           0x00000000
-#define CPUID_XFAM_10H          0x00100000
-#define CPUID_XFAM_11H          0x00200000
-#define CPUID_XMOD              0x000f0000
-#define CPUID_XMOD_REV_F        0x00040000
 
 /* AMD systems with C1E don't have a working lAPIC timer. Check for that. */
-static __cpuinit int amd_apic_timer_broken(void)
+static __cpuinit int amd_apic_timer_broken(struct cpuinfo_x86 *c)
 {
 	u32 lo, hi;
-	u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
-	switch (eax & CPUID_XFAM) {
-	case CPUID_XFAM_K8:
-		if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F)
-			break;
-	case CPUID_XFAM_10H:
-	case CPUID_XFAM_11H:
-		rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
-		if (lo & K8_INTP_C1E_ACTIVE_MASK) {
-			if (smp_processor_id() != boot_cpu_physical_apicid)
-				printk(KERN_INFO "AMD C1E detected late. "
-				       "	Force timer broadcast.\n");
-			return 1;
-		}
-		break;
-	default:
-		/* err on the side of caution */
+
+	if (c->x86 < 0x0F)
+		return 0;
+
+	/* Family 0x0f models < rev F do not have this MSR */
+	if (c->x86 == 0x0f && c->x86_model < 0x40)
+		return 0;
+
+	rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
+	if (lo & K8_INTP_C1E_ACTIVE_MASK) {
+		if (smp_processor_id() != boot_cpu_physical_apicid)
+			printk(KERN_INFO "AMD C1E detected late. "
+			       "Force timer broadcast.\n");
 		return 1;
 	}
 	return 0;
@@ -297,7 +286,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	}
 
 #ifdef CONFIG_X86_LOCAL_APIC
-	if (amd_apic_timer_broken())
+	if (amd_apic_timer_broken(c))
 		local_apic_timer_disabled = 1;
 #endif
 
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index 6eef3c79d15..f5fc161d8f2 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -110,31 +110,23 @@ static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
 #endif
 }
 
-#define CPUID_PROCESSOR_SIGNATURE	1
-#define CPUID_XFAM		0x0ff00000
-#define CPUID_XFAM_K8		0x00000000
-#define CPUID_XFAM_10H		0x00100000
-#define CPUID_XFAM_11H		0x00200000
-#define CPUID_XMOD		0x000f0000
-#define CPUID_XMOD_REV_F	0x00040000
-
 /* AMD systems with C1E don't have a working lAPIC timer. Check for that. */
-static __cpuinit int amd_apic_timer_broken(void)
+static __cpuinit int amd_apic_timer_broken(struct cpuinfo_x86 *c)
 {
-	u32 lo, hi, eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
+	u32 lo, hi;
 
-	switch (eax & CPUID_XFAM) {
-	case CPUID_XFAM_K8:
-		if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F)
-			break;
-	case CPUID_XFAM_10H:
-	case CPUID_XFAM_11H:
-		rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
-		if (lo & K8_INTP_C1E_ACTIVE_MASK)
-			return 1;
-		break;
-	default:
-		/* err on the side of caution */
+	if (c->x86 < 0x0F)
+		return 0;
+
+	/* Family 0x0f models < rev F do not have this MSR */
+	if (c->x86 == 0x0f && c->x86_model < 0x40)
+		return 0;
+
+	rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
+	if (lo & K8_INTP_C1E_ACTIVE_MASK) {
+		if (smp_processor_id() != boot_cpu_physical_apicid)
+			printk(KERN_INFO "AMD C1E detected late. "
+			       "Force timer broadcast.\n");
 		return 1;
 	}
 	return 0;
@@ -220,7 +212,7 @@ void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	if (c->x86 == 0x10)
 		amd_enable_pci_ext_cfg(c);
 
-	if (amd_apic_timer_broken())
+	if (amd_apic_timer_broken(c))
 		disable_apic_timer = 1;
 
 	if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
-- 
cgit v1.2.3


From ee863ba7ab3d3ed8a9585d378aae69d1e3e9f1b4 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Tue, 10 Jun 2008 16:04:30 +0200
Subject: x86: unconditionally enable PAT for AMD CPUs

If PAT support is advertised it should just work. No errata known.

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/addon_cpuid_features.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index d8b3e4a9d66..0fbd06241e0 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -54,14 +54,11 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
 {
 	switch (c->x86_vendor) {
-	case X86_VENDOR_AMD:
-		if (c->x86 >= 0xf && c->x86 <= 0x11)
-			return;
-		break;
 	case X86_VENDOR_INTEL:
 		if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15))
 			return;
 		break;
+	case X86_VENDOR_AMD:
 	case X86_VENDOR_CENTAUR:
 	case X86_VENDOR_TRANSMETA:
 		return;
-- 
cgit v1.2.3


From 97cfab6ac4ddfda0d722393bbf46cc40bc332107 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Tue, 10 Jun 2008 16:05:18 +0200
Subject: x86: PAT: fix ambiguous paranoia check in pat_init()

Starting with commit 8d4a4300854f3971502e81dacd930704cb88f606 (x86:
cleanup PAT cpu validation) the PAT CPU feature flag is not cleared
anymore. Now the error message

  "PAT enabled, but CPU feature cleared"

in pat_init() is misleading.

Furthermore the current code does not check for existence of the PAT
CPU feature flag if a CPU is whitelisted in validate_pat_support.

This patch clears pat_wc_enabled if boot CPU has no PAT feature flag
and adapts the paranoia check.

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/addon_cpuid_features.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 0fbd06241e0..2df461f06a5 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -53,6 +53,9 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 #ifdef CONFIG_X86_PAT
 void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
 {
+	if (!cpu_has_pat)
+		pat_disable("PAT not supported by CPU.");
+
 	switch (c->x86_vendor) {
 	case X86_VENDOR_INTEL:
 		if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15))
@@ -64,8 +67,6 @@ void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
 		return;
 	}
 
-	pat_disable(cpu_has_pat ?
-		    "PAT disabled. Not yet verified on this CPU type." :
-		    "PAT not supported by CPU.");
+	pat_disable("PAT disabled. Not yet verified on this CPU type.");
 }
 #endif
-- 
cgit v1.2.3


From cd7a4e936d345ab4cb49d68192d90bd4e4c58458 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Tue, 10 Jun 2008 16:05:39 +0200
Subject: x86: PAT: fixed checkpatch errors (and whitespaces)

x86: PAT: fixed checkpatch errors (and whitespaces)

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/addon_cpuid_features.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 2df461f06a5..84a8220a607 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -1,9 +1,7 @@
-
 /*
  *	Routines to indentify additional cpu features that are scattered in
  *	cpuid space.
  */
-
 #include <linux/cpu.h>
 
 #include <asm/pat.h>
-- 
cgit v1.2.3


From b4b3bd96f26586e53ab5482f1869221dd1b5ac36 Mon Sep 17 00:00:00 2001
From: Daniel Rahn <Daniel.Rahn@novell.com>
Date: Fri, 6 Jun 2008 09:42:36 +0200
Subject: x86: correctly report NR_BANKS in mce_64.c

attached is a no-brainer that makes kernel correctly report
NR_BANKS for MCE. We are right now limited to NR_BANKS==6, but the
error message will use the available number of banks instead of the
defined maximum.

For a Nehalem based system it will print:

"MCE: warning: using only 9 banks"

while the correct message would be

"MCE: warning: using only 6 banks"

Signed-off-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mcheck/mce_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index f1f3f5e163b..8c8299ce7ad 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -445,9 +445,9 @@ static void mce_init(void *dummy)
 	rdmsrl(MSR_IA32_MCG_CAP, cap);
 	banks = cap & 0xff;
 	if (banks > MCE_EXTENDED_BANK) {
+		banks = MCE_EXTENDED_BANK;
 		printk(KERN_INFO "MCE: warning: using only %d banks\n",
 		       MCE_EXTENDED_BANK);
-		banks = MCE_EXTENDED_BANK;
 	}
 	/* Use accurate RIP reporting if available. */
 	if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
-- 
cgit v1.2.3


From fe94ae995d33a4df35b6b9cd0504e87d7e37c8de Mon Sep 17 00:00:00 2001
From: Paolo Ciarrocchi <paolo.ciarrocchi@gmail.com>
Date: Sat, 14 Jun 2008 14:06:19 +0200
Subject: x86: coding style fixes to arch/x86/kernel/cpu/mcheck/p4.c

Before:
total: 16 errors, 34 warnings, 257 lines checked

After:
total: 0 errors, 2 warnings, 257 lines checked

No changes in the compiled code:

paolo@paolo-desktop:~/linux.trees.git$ size /tmp/p4*
   text    data     bss     dec     hex filename
   2644       4       4    2652     a5c /tmp/p4.o.after
   2644       4       4    2652     a5c /tmp/p4.o.before

paolo@paolo-desktop:~/linux.trees.git$ md5sum /tmp/p4*
13f1b21c4246b31a28aaff38184586ca  /tmp/p4.o.after
13f1b21c4246b31a28aaff38184586ca  /tmp/p4.o.before

Signed-off-by: Paolo Ciarrocchi <paolo.ciarrocchi@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mcheck/p4.c | 90 ++++++++++++++++++++---------------------
 1 file changed, 45 insertions(+), 45 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index cb03345554a..eef001ad3bd 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -8,7 +8,7 @@
 #include <linux/interrupt.h>
 #include <linux/smp.h>
 
-#include <asm/processor.h> 
+#include <asm/processor.h>
 #include <asm/system.h>
 #include <asm/msr.h>
 #include <asm/apic.h>
@@ -32,12 +32,12 @@ struct intel_mce_extended_msrs {
 	/* u32 *reserved[]; */
 };
 
-static int mce_num_extended_msrs = 0;
+static int mce_num_extended_msrs;
 
 
 #ifdef CONFIG_X86_MCE_P4THERMAL
 static void unexpected_thermal_interrupt(struct pt_regs *regs)
-{	
+{
 	printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
 			smp_processor_id());
 	add_taint(TAINT_MACHINE_CHECK);
@@ -83,7 +83,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
 	 * be some SMM goo which handles it, so we can't even put a handler
 	 * since it might be delivered via SMI already -zwanem.
 	 */
-	rdmsr (MSR_IA32_MISC_ENABLE, l, h);
+	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
 	h = apic_read(APIC_LVTTHMR);
 	if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
 		printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
@@ -91,7 +91,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
 		return; /* -EBUSY */
 	}
 
-	/* check whether a vector already exists, temporarily masked? */	
+	/* check whether a vector already exists, temporarily masked? */
 	if (h & APIC_VECTOR_MASK) {
 		printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already "
 				"installed\n",
@@ -104,18 +104,18 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
 	h |= (APIC_DM_FIXED | APIC_LVT_MASKED);	/* we'll mask till we're ready */
 	apic_write_around(APIC_LVTTHMR, h);
 
-	rdmsr (MSR_IA32_THERM_INTERRUPT, l, h);
-	wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
+	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
+	wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
 
 	/* ok we're good to go... */
 	vendor_thermal_interrupt = intel_thermal_interrupt;
-	
-	rdmsr (MSR_IA32_MISC_ENABLE, l, h);
-	wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
 
-	l = apic_read (APIC_LVTTHMR);
-	apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
-	printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
+	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+	wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h);
+
+	l = apic_read(APIC_LVTTHMR);
+	apic_write_around(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+	printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
 
 	/* enable thermal throttle processing */
 	atomic_set(&therm_throt_en, 1);
@@ -129,28 +129,28 @@ static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
 {
 	u32 h;
 
-	rdmsr (MSR_IA32_MCG_EAX, r->eax, h);
-	rdmsr (MSR_IA32_MCG_EBX, r->ebx, h);
-	rdmsr (MSR_IA32_MCG_ECX, r->ecx, h);
-	rdmsr (MSR_IA32_MCG_EDX, r->edx, h);
-	rdmsr (MSR_IA32_MCG_ESI, r->esi, h);
-	rdmsr (MSR_IA32_MCG_EDI, r->edi, h);
-	rdmsr (MSR_IA32_MCG_EBP, r->ebp, h);
-	rdmsr (MSR_IA32_MCG_ESP, r->esp, h);
-	rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h);
-	rdmsr (MSR_IA32_MCG_EIP, r->eip, h);
+	rdmsr(MSR_IA32_MCG_EAX, r->eax, h);
+	rdmsr(MSR_IA32_MCG_EBX, r->ebx, h);
+	rdmsr(MSR_IA32_MCG_ECX, r->ecx, h);
+	rdmsr(MSR_IA32_MCG_EDX, r->edx, h);
+	rdmsr(MSR_IA32_MCG_ESI, r->esi, h);
+	rdmsr(MSR_IA32_MCG_EDI, r->edi, h);
+	rdmsr(MSR_IA32_MCG_EBP, r->ebp, h);
+	rdmsr(MSR_IA32_MCG_ESP, r->esp, h);
+	rdmsr(MSR_IA32_MCG_EFLAGS, r->eflags, h);
+	rdmsr(MSR_IA32_MCG_EIP, r->eip, h);
 }
 
-static void intel_machine_check(struct pt_regs * regs, long error_code)
+static void intel_machine_check(struct pt_regs *regs, long error_code)
 {
-	int recover=1;
+	int recover = 1;
 	u32 alow, ahigh, high, low;
 	u32 mcgstl, mcgsth;
 	int i;
 
-	rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
+	rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
 	if (mcgstl & (1<<0))	/* Recoverable ? */
-		recover=0;
+		recover = 0;
 
 	printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
 		smp_processor_id(), mcgsth, mcgstl);
@@ -191,20 +191,20 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
 	}
 
 	if (recover & 2)
-		panic ("CPU context corrupt");
+		panic("CPU context corrupt");
 	if (recover & 1)
-		panic ("Unable to continue");
+		panic("Unable to continue");
 
 	printk(KERN_EMERG "Attempting to continue.\n");
-	/* 
-	 * Do not clear the MSR_IA32_MCi_STATUS if the error is not 
+	/*
+	 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
 	 * recoverable/continuable.This will allow BIOS to look at the MSRs
 	 * for errors if the OS could not log the error.
 	 */
-	for (i=0; i<nr_mce_banks; i++) {
+	for (i = 0; i < nr_mce_banks; i++) {
 		u32 msr;
 		msr = MSR_IA32_MC0_STATUS+i*4;
-		rdmsr (msr, low, high);
+		rdmsr(msr, low, high);
 		if (high&(1<<31)) {
 			/* Clear it */
 			wrmsr(msr, 0UL, 0UL);
@@ -214,7 +214,7 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
 		}
 	}
 	mcgstl &= ~(1<<2);
-	wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
+	wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
 }
 
 
@@ -222,30 +222,30 @@ void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
 	int i;
-	
+
 	machine_check_vector = intel_machine_check;
 	wmb();
 
-	printk (KERN_INFO "Intel machine check architecture supported.\n");
-	rdmsr (MSR_IA32_MCG_CAP, l, h);
+	printk(KERN_INFO "Intel machine check architecture supported.\n");
+	rdmsr(MSR_IA32_MCG_CAP, l, h);
 	if (l & (1<<8))	/* Control register present ? */
-		wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
 	nr_mce_banks = l & 0xff;
 
-	for (i=0; i<nr_mce_banks; i++) {
-		wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
-		wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
+	for (i = 0; i < nr_mce_banks; i++) {
+		wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
+		wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
 	}
 
-	set_in_cr4 (X86_CR4_MCE);
-	printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
+	set_in_cr4(X86_CR4_MCE);
+	printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
 		smp_processor_id());
 
 	/* Check for P4/Xeon extended MCE MSRs */
-	rdmsr (MSR_IA32_MCG_CAP, l, h);
+	rdmsr(MSR_IA32_MCG_CAP, l, h);
 	if (l & (1<<9))	{/* MCG_EXT_P */
 		mce_num_extended_msrs = (l >> 16) & 0xff;
-		printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
+		printk(KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
 				" available\n",
 			smp_processor_id(), mce_num_extended_msrs);
 
-- 
cgit v1.2.3


From 5175676a2d012ca5e5ad5eaedbfc1da5d5660d2a Mon Sep 17 00:00:00 2001
From: Paolo Ciarrocchi <paolo.ciarrocchi@gmail.com>
Date: Sat, 14 Jun 2008 14:37:14 +0200
Subject: x86: coding style fixes to arch/x86/kernel/cpu/mcheck/k7.c

Before:
total: 6 errors, 13 warnings, 105 lines checked

After:
total: 0 errors, 0 warnings, 105 lines checked

paolo@paolo-desktop:~/linux.trees.git$ size /tmp/k7*
   text    data     bss     dec     hex filename
   1135       0       0    1135     46f /tmp/k7.o.after
   1135       0       0    1135     46f /tmp/k7.o.before

paolo@paolo-desktop:~/linux.trees.git$ md5sum /tmp/k7*
87b14954045aa37dbaee6fb7e022ed9a  /tmp/k7.o.after
87b14954045aa37dbaee6fb7e022ed9a  /tmp/k7.o.before

Signed-off-by: Paolo Ciarrocchi <paolo.ciarrocchi@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mcheck/k7.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
index e633c9c2b76..f390c9f6635 100644
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -9,23 +9,23 @@
 #include <linux/interrupt.h>
 #include <linux/smp.h>
 
-#include <asm/processor.h> 
+#include <asm/processor.h>
 #include <asm/system.h>
 #include <asm/msr.h>
 
 #include "mce.h"
 
 /* Machine Check Handler For AMD Athlon/Duron */
-static void k7_machine_check(struct pt_regs * regs, long error_code)
+static void k7_machine_check(struct pt_regs *regs, long error_code)
 {
-	int recover=1;
+	int recover = 1;
 	u32 alow, ahigh, high, low;
 	u32 mcgstl, mcgsth;
 	int i;
 
-	rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
+	rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
 	if (mcgstl & (1<<0))	/* Recoverable ? */
-		recover=0;
+		recover = 0;
 
 	printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
 		smp_processor_id(), mcgsth, mcgstl);
@@ -60,12 +60,12 @@ static void k7_machine_check(struct pt_regs * regs, long error_code)
 	}
 
 	if (recover&2)
-		panic ("CPU context corrupt");
+		panic("CPU context corrupt");
 	if (recover&1)
-		panic ("Unable to continue");
-	printk (KERN_EMERG "Attempting to continue.\n");
+		panic("Unable to continue");
+	printk(KERN_EMERG "Attempting to continue.\n");
 	mcgstl &= ~(1<<2);
-	wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
+	wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
 }
 
 
@@ -81,25 +81,25 @@ void amd_mcheck_init(struct cpuinfo_x86 *c)
 	machine_check_vector = k7_machine_check;
 	wmb();
 
-	printk (KERN_INFO "Intel machine check architecture supported.\n");
-	rdmsr (MSR_IA32_MCG_CAP, l, h);
+	printk(KERN_INFO "Intel machine check architecture supported.\n");
+	rdmsr(MSR_IA32_MCG_CAP, l, h);
 	if (l & (1<<8))	/* Control register present ? */
-		wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
 	nr_mce_banks = l & 0xff;
 
 	/* Clear status for MC index 0 separately, we don't touch CTL,
 	 * as some K7 Athlons cause spurious MCEs when its enabled. */
 	if (boot_cpu_data.x86 == 6) {
-		wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
+		wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0);
 		i = 1;
 	} else
 		i = 0;
-	for (; i<nr_mce_banks; i++) {
-		wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
-		wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
+	for (; i < nr_mce_banks; i++) {
+		wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
+		wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
 	}
 
-	set_in_cr4 (X86_CR4_MCE);
-	printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
+	set_in_cr4(X86_CR4_MCE);
+	printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
 		smp_processor_id());
 }
-- 
cgit v1.2.3


From 219835f10eaef19d2b976076f2bc4ad806856c55 Mon Sep 17 00:00:00 2001
From: Paolo Ciarrocchi <paolo.ciarrocchi@gmail.com>
Date: Sat, 14 Jun 2008 21:11:39 +0200
Subject: x86: coding style fixes to x86/kernel/cpu/cpufreq/cpufreq-nforce2.c

Before:
total: 22 errors, 8 warnings, 440 lines checked

After:
total: 0 errors, 8 warnings, 442 lines checked

paolo@paolo-desktop:~/linux.trees.git$ md5sum /tmp/cpufreq-nforce2.o.*
3d4330a5d188fe904446e5948a618b48  /tmp/cpufreq-nforce2.o.after
1477e6b0dcd6f59b1fb6b4490042eca6  /tmp/cpufreq-nforce2.o.before
^^^ I guess this is because I fixed a few "do not initialise statics to 0 or NULL"

paolo@paolo-desktop:~/linux.trees.git$ size /tmp/cpufreq-nforce2.o.*
   text    data     bss     dec     hex filename
   1923      72      16    2011     7db /tmp/cpufreq-nforce2.o.after
   1923      72      16    2011     7db /tmp/cpufreq-nforce2.o.before

Signed-off-by: Paolo Ciarrocchi <paolo.ciarrocchi@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c | 44 ++++++++++++++-------------
 1 file changed, 23 insertions(+), 21 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
index f03e9153618..965ea52767a 100644
--- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
@@ -26,9 +26,10 @@
 #define NFORCE2_SAFE_DISTANCE 50
 
 /* Delay in ms between FSB changes */
-//#define NFORCE2_DELAY 10
+/* #define NFORCE2_DELAY 10 */
 
-/* nforce2_chipset:
+/*
+ * nforce2_chipset:
  * FSB is changed using the chipset
  */
 static struct pci_dev *nforce2_chipset_dev;
@@ -36,13 +37,13 @@ static struct pci_dev *nforce2_chipset_dev;
 /* fid:
  * multiplier * 10
  */
-static int fid = 0;
+static int fid;
 
 /* min_fsb, max_fsb:
  * minimum and maximum FSB (= FSB at boot time)
  */
-static int min_fsb = 0;
-static int max_fsb = 0;
+static int min_fsb;
+static int max_fsb;
 
 MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>");
 MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver");
@@ -53,7 +54,7 @@ module_param(min_fsb, int, 0444);
 
 MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)");
 MODULE_PARM_DESC(min_fsb,
-                 "Minimum FSB to use, if not defined: current FSB - 50");
+		"Minimum FSB to use, if not defined: current FSB - 50");
 
 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg)
 
@@ -139,7 +140,7 @@ static unsigned int nforce2_fsb_read(int bootfsb)
 
 	/* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */
 	nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
-						0x01EF,PCI_ANY_ID,PCI_ANY_ID,NULL);
+						0x01EF, PCI_ANY_ID, PCI_ANY_ID, NULL);
 	if (!nforce2_sub5)
 		return 0;
 
@@ -147,13 +148,13 @@ static unsigned int nforce2_fsb_read(int bootfsb)
 	fsb /= 1000000;
 
 	/* Check if PLL register is already set */
-	pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp);
+	pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
 
-	if(bootfsb || !temp)
+	if (bootfsb || !temp)
 		return fsb;
-		
+
 	/* Use PLL register FSB value */
-	pci_read_config_dword(nforce2_chipset_dev,NFORCE2_PLLREG, &temp);
+	pci_read_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, &temp);
 	fsb = nforce2_calc_fsb(temp);
 
 	return fsb;
@@ -184,7 +185,7 @@ static int nforce2_set_fsb(unsigned int fsb)
 	}
 
 	/* First write? Then set actual value */
-	pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp);
+	pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
 	if (!temp) {
 		pll = nforce2_calc_pll(tfsb);
 
@@ -210,7 +211,8 @@ static int nforce2_set_fsb(unsigned int fsb)
 			tfsb--;
 
 		/* Calculate the PLL reg. value */
-		if ((pll = nforce2_calc_pll(tfsb)) == -1)
+		pll = nforce2_calc_pll(tfsb);
+		if (pll == -1)
 			return -EINVAL;
 
 		nforce2_write_pll(pll);
@@ -249,7 +251,7 @@ static unsigned int nforce2_get(unsigned int cpu)
 static int nforce2_target(struct cpufreq_policy *policy,
 			  unsigned int target_freq, unsigned int relation)
 {
-//        unsigned long         flags;
+/*        unsigned long         flags; */
 	struct cpufreq_freqs freqs;
 	unsigned int target_fsb;
 
@@ -271,17 +273,17 @@ static int nforce2_target(struct cpufreq_policy *policy,
 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
 
 	/* Disable IRQs */
-	//local_irq_save(flags);
+	/* local_irq_save(flags); */
 
 	if (nforce2_set_fsb(target_fsb) < 0)
 		printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n",
-                       target_fsb);
+			target_fsb);
 	else
 		dprintk("Changed FSB successfully to %d\n",
-                       target_fsb);
+			target_fsb);
 
 	/* Enable IRQs */
-	//local_irq_restore(flags);
+	/* local_irq_restore(flags); */
 
 	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
 
@@ -302,8 +304,8 @@ static int nforce2_verify(struct cpufreq_policy *policy)
 		policy->max = (fsb_pol_max + 1) * fid * 100;
 
 	cpufreq_verify_within_limits(policy,
-                                     policy->cpuinfo.min_freq,
-                                     policy->cpuinfo.max_freq);
+				     policy->cpuinfo.min_freq,
+				     policy->cpuinfo.max_freq);
 	return 0;
 }
 
@@ -347,7 +349,7 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy)
 	/* Set maximum FSB to FSB at boot time */
 	max_fsb = nforce2_fsb_read(1);
 
-	if(!max_fsb)
+	if (!max_fsb)
 		return -EIO;
 
 	if (!min_fsb)
-- 
cgit v1.2.3


From 8691e5a8f691cc2a4fda0651e8d307aaba0e7d68 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 6 Jun 2008 11:18:06 +0200
Subject: smp_call_function: get rid of the unused nonatomic/retry argument

It's never used and the comments refer to nonatomic and retry
interchangably. So get rid of it.

Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/x86/kernel/cpu/mtrr/main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 6a1e278d932..290652cefdd 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -222,7 +222,7 @@ static void set_mtrr(unsigned int reg, unsigned long base,
 	atomic_set(&data.gate,0);
 
 	/*  Start the ball rolling on other CPUs  */
-	if (smp_call_function(ipi_handler, &data, 1, 0) != 0)
+	if (smp_call_function(ipi_handler, &data, 0) != 0)
 		panic("mtrr: timed out waiting for other CPUs\n");
 
 	local_irq_save(flags);
@@ -822,7 +822,7 @@ void mtrr_ap_init(void)
  */
 void mtrr_save_state(void)
 {
-	smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1);
+	smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1);
 }
 
 static int __init mtrr_init_finialize(void)
-- 
cgit v1.2.3


From 15c8b6c1aaaf1c4edd67e2f02e4d8e1bd1a51c0d Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 9 May 2008 09:39:44 +0200
Subject: on_each_cpu(): kill unused 'retry' parameter

It's not even passed on to smp_call_function() anymore, since that
was removed. So kill it.

Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/x86/kernel/cpu/mcheck/mce_64.c    | 6 +++---
 arch/x86/kernel/cpu/mcheck/non-fatal.c | 2 +-
 arch/x86/kernel/cpu/perfctr-watchdog.c | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index e07e8c068ae..43b7cb59491 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -363,7 +363,7 @@ static void mcheck_check_cpu(void *info)
 
 static void mcheck_timer(struct work_struct *work)
 {
-	on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
+	on_each_cpu(mcheck_check_cpu, NULL, 1);
 
 	/*
 	 * Alert userspace if needed.  If we logged an MCE, reduce the
@@ -612,7 +612,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
 	 * Collect entries that were still getting written before the
 	 * synchronize.
 	 */
-	on_each_cpu(collect_tscs, cpu_tsc, 1, 1);
+	on_each_cpu(collect_tscs, cpu_tsc, 1);
 	for (i = next; i < MCE_LOG_LEN; i++) {
 		if (mcelog.entry[i].finished &&
 		    mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
@@ -737,7 +737,7 @@ static void mce_restart(void)
 	if (next_interval)
 		cancel_delayed_work(&mcheck_work);
 	/* Timer race is harmless here */
-	on_each_cpu(mce_init, NULL, 1, 1);
+	on_each_cpu(mce_init, NULL, 1);
 	next_interval = check_interval * HZ;
 	if (next_interval)
 		schedule_delayed_work(&mcheck_work,
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
index 00ccb6c14ec..cc1fccdd31e 100644
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -59,7 +59,7 @@ static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
 
 static void mce_work_fn(struct work_struct *work)
 {
-	on_each_cpu(mce_checkregs, NULL, 1, 1);
+	on_each_cpu(mce_checkregs, NULL, 1);
 	schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
 }
 
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index f9ae93adffe..58043f06d7e 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -180,7 +180,7 @@ void disable_lapic_nmi_watchdog(void)
 	if (atomic_read(&nmi_active) <= 0)
 		return;
 
-	on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
+	on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
 	wd_ops->unreserve();
 
 	BUG_ON(atomic_read(&nmi_active) != 0);
@@ -202,7 +202,7 @@ void enable_lapic_nmi_watchdog(void)
 		return;
 	}
 
-	on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
+	on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
 	touch_nmi_watchdog();
 }
 
-- 
cgit v1.2.3


From 38c4c97c62a30aef276663c1128a2051a25ead7d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 20 May 2008 19:17:02 +0200
Subject: x86-mce: BKL pushdown

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/x86/kernel/cpu/mcheck/mce_64.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index e07e8c068ae..4ef151633e8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -9,6 +9,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/smp_lock.h>
 #include <linux/string.h>
 #include <linux/rcupdate.h>
 #include <linux/kallsyms.h>
@@ -527,10 +528,12 @@ static int open_exclu;	/* already open exclusive? */
 
 static int mce_open(struct inode *inode, struct file *file)
 {
+	lock_kernel();
 	spin_lock(&mce_state_lock);
 
 	if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
 		spin_unlock(&mce_state_lock);
+		unlock_kernel();
 		return -EBUSY;
 	}
 
@@ -539,6 +542,7 @@ static int mce_open(struct inode *inode, struct file *file)
 	open_count++;
 
 	spin_unlock(&mce_state_lock);
+	unlock_kernel();
 
 	return nonseekable_open(inode, file);
 }
-- 
cgit v1.2.3


From 2d144e63098be47c21ad59d68a4fd17bd73a3aaf Mon Sep 17 00:00:00 2001
From: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue, 24 Jun 2008 17:12:56 -0700
Subject: x86, mce_64.c: mce_cpu_quirks being ignored

Quirks getting ignored was a bug. Below patch fixes the bug, until
we have the dynamic banks support.

Sysfs choice configuration should not have any issues with the earlier patch
as we look for NR_SYSFS_BANKS in do_machine_check().

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Max Asbock <masbock@us.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mcheck/mce_64.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 8c8299ce7ad..501ca1cea27 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -463,7 +463,11 @@ static void mce_init(void *dummy)
 		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
 
 	for (i = 0; i < banks; i++) {
-		wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL);
+		if (i < NR_SYSFS_BANKS)
+			wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
+		else
+			wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL);
+
 		wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
 	}
 }
-- 
cgit v1.2.3


From aa276e1cafb3ce9d01d1e837bcd67e92616013ac Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 9 Jun 2008 19:15:00 +0200
Subject: x86, clockevents: add C1E aware idle function

C1E on AMD machines is like C3 but without control from the OS. Up to
now we disabled the local apic timer for those machines as it stops
when the CPU goes into C1E. This excludes those machines from high
resolution timers / dynamic ticks, which hurts especially X2 based
laptops.

The current boot time C1E detection has another, more serious flaw
as well: some BIOSes do not enable C1E until the ACPI processor module
is loaded. This causes systems to stop working after that point.

To work nicely with C1E enabled machines we use a separate idle
function, which checks on idle entry whether C1E was enabled in the
Interrupt Pending Message MSR. This allows us to do timer broadcasting
for C1E and covers the late enablement of C1E as well.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/amd.c    | 30 ------------------------------
 arch/x86/kernel/cpu/amd_64.c | 25 -------------------------
 2 files changed, 55 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e76b49e7a91..acc891ae590 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -24,31 +24,6 @@
 extern void vide(void);
 __asm__(".align 4\nvide: ret");
 
-#ifdef CONFIG_X86_LOCAL_APIC
-
-/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */
-static __cpuinit int amd_apic_timer_broken(struct cpuinfo_x86 *c)
-{
-	u32 lo, hi;
-
-	if (c->x86 < 0x0F)
-		return 0;
-
-	/* Family 0x0f models < rev F do not have this MSR */
-	if (c->x86 == 0x0f && c->x86_model < 0x40)
-		return 0;
-
-	rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
-	if (lo & K8_INTP_C1E_ACTIVE_MASK) {
-		if (smp_processor_id() != boot_cpu_physical_apicid)
-			printk(KERN_INFO "AMD C1E detected late. "
-			       "Force timer broadcast.\n");
-		return 1;
-	}
-	return 0;
-}
-#endif
-
 int force_mwait __cpuinitdata;
 
 static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
@@ -285,11 +260,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 			num_cache_leaves = 3;
 	}
 
-#ifdef CONFIG_X86_LOCAL_APIC
-	if (amd_apic_timer_broken(c))
-		local_apic_timer_disabled = 1;
-#endif
-
 	/* K6s reports MCEs but don't actually have all the MSRs */
 	if (c->x86 < 6)
 		clear_cpu_cap(c, X86_FEATURE_MCE);
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index f5fc161d8f2..f8d20588bde 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -110,28 +110,6 @@ static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
 #endif
 }
 
-/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */
-static __cpuinit int amd_apic_timer_broken(struct cpuinfo_x86 *c)
-{
-	u32 lo, hi;
-
-	if (c->x86 < 0x0F)
-		return 0;
-
-	/* Family 0x0f models < rev F do not have this MSR */
-	if (c->x86 == 0x0f && c->x86_model < 0x40)
-		return 0;
-
-	rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
-	if (lo & K8_INTP_C1E_ACTIVE_MASK) {
-		if (smp_processor_id() != boot_cpu_physical_apicid)
-			printk(KERN_INFO "AMD C1E detected late. "
-			       "Force timer broadcast.\n");
-		return 1;
-	}
-	return 0;
-}
-
 void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
 {
 	early_init_amd_mc(c);
@@ -212,9 +190,6 @@ void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	if (c->x86 == 0x10)
 		amd_enable_pci_ext_cfg(c);
 
-	if (amd_apic_timer_broken(c))
-		disable_apic_timer = 1;
-
 	if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
 		unsigned long long tseg;
 
-- 
cgit v1.2.3


From 3a27dd1ce5de08e21e0266ddf00e6f1f843bfe8b Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 12 Jun 2008 20:19:23 +0200
Subject: x86: Move PCI IO ECS code to x86/pci

"Form follows function". Code is now where it belongs to.

Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/amd.c    | 3 ---
 arch/x86/kernel/cpu/amd_64.c | 4 ----
 arch/x86/kernel/cpu/cpu.h    | 2 --
 3 files changed, 9 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index acc891ae590..81a07ca65d4 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -266,9 +266,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 
 	if (cpu_has_xmm2)
 		set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
-
-	if (c->x86 == 0x10)
-		amd_enable_pci_ext_cfg(c);
 }
 
 static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index f8d20588bde..250bfe6064a 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -6,7 +6,6 @@
 #include <asm/cacheflush.h>
 
 #include <mach_apic.h>
-#include "cpu.h"
 
 extern int __cpuinit get_model_name(struct cpuinfo_x86 *c);
 extern void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c);
@@ -187,9 +186,6 @@ void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	if (c->x86 == 0x10)
 		fam10h_check_enable_mmcfg();
 
-	if (c->x86 == 0x10)
-		amd_enable_pci_ext_cfg(c);
-
 	if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
 		unsigned long long tseg;
 
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index f5d5bb1b554..40ad1893fe8 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -39,5 +39,3 @@ extern int get_model_name(struct cpuinfo_x86 *c);
 extern void display_cacheinfo(struct cpuinfo_x86 *c);
 
 #endif /* CONFIG_X86_32 */
-
-extern void __cpuinit amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c);
-- 
cgit v1.2.3


From dcd32b6a1ffe6c040f8346f7fbaf4318bb8ae41c Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Fri, 20 Jun 2008 08:18:09 +0200
Subject: x86: make 64-bit identify_cpu use cpu_dev

we may need to move some functions to common.c later

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/amd_64.c     | 17 +++++++++++++----
 arch/x86/kernel/cpu/centaur_64.c | 16 ++++++++++++++--
 arch/x86/kernel/cpu/cpu.h        |  6 ++++--
 arch/x86/kernel/cpu/intel_64.c   | 15 +++++++++++++--
 4 files changed, 44 insertions(+), 10 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index 250bfe6064a..30b7557c964 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -7,8 +7,7 @@
 
 #include <mach_apic.h>
 
-extern int __cpuinit get_model_name(struct cpuinfo_x86 *c);
-extern void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c);
+#include "cpu.h"
 
 int force_mwait __cpuinitdata;
 
@@ -109,7 +108,7 @@ static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
 #endif
 }
 
-void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
+static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
 {
 	early_init_amd_mc(c);
 
@@ -118,7 +117,7 @@ void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 }
 
-void __cpuinit init_amd(struct cpuinfo_x86 *c)
+static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 {
 	unsigned level;
 
@@ -200,3 +199,13 @@ void __cpuinit init_amd(struct cpuinfo_x86 *c)
 			set_memory_4k((unsigned long)__va(tseg), 1);
 	}
 }
+
+static struct cpu_dev amd_cpu_dev __cpuinitdata = {
+	.c_vendor	= "AMD",
+	.c_ident	= { "AuthenticAMD" },
+	.c_early_init   = early_init_amd,
+	.c_init		= init_amd,
+};
+
+cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev);
+
diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c
index bac96d187d0..13526fd5cce 100644
--- a/arch/x86/kernel/cpu/centaur_64.c
+++ b/arch/x86/kernel/cpu/centaur_64.c
@@ -4,13 +4,15 @@
 #include <asm/cpufeature.h>
 #include <asm/processor.h>
 
-void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
+#include "cpu.h"
+
+static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
 {
 	if (c->x86 == 0x6 && c->x86_model >= 0xf)
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 }
 
-void __cpuinit init_centaur(struct cpuinfo_x86 *c)
+static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
 {
 	/* Cache sizes */
 	unsigned n;
@@ -29,3 +31,13 @@ void __cpuinit init_centaur(struct cpuinfo_x86 *c)
 	}
 	set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
 }
+
+static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
+	.c_vendor	= "Centaur",
+	.c_ident	= { "CentaurHauls" },
+	.c_early_init	= early_init_centaur,
+	.c_init		= init_centaur,
+};
+
+cpu_vendor_dev_register(X86_VENDOR_CENTAUR, &centaur_cpu_dev);
+
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 40ad1893fe8..4d894e8565f 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -1,4 +1,6 @@
-#ifdef CONFIG_X86_32
+#ifndef ARCH_X86_CPU_H
+
+#define ARCH_X86_CPU_H
 
 struct cpu_model_info {
 	int vendor;
@@ -38,4 +40,4 @@ extern struct cpu_vendor_dev __x86cpuvendor_start[], __x86cpuvendor_end[];
 extern int get_model_name(struct cpuinfo_x86 *c);
 extern void display_cacheinfo(struct cpuinfo_x86 *c);
 
-#endif /* CONFIG_X86_32 */
+#endif
diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c
index b3391219948..fcb1cc9d75c 100644
--- a/arch/x86/kernel/cpu/intel_64.c
+++ b/arch/x86/kernel/cpu/intel_64.c
@@ -5,7 +5,9 @@
 #include <asm/topology.h>
 #include <asm/numa_64.h>
 
-void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
+#include "cpu.h"
+
+static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 {
 	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
 	    (c->x86 == 0x6 && c->x86_model >= 0x0e))
@@ -48,7 +50,7 @@ static void __cpuinit srat_detect_node(void)
 #endif
 }
 
-void __cpuinit init_intel(struct cpuinfo_x86 *c)
+static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 {
 	/* Cache sizes */
 	unsigned n;
@@ -90,3 +92,12 @@ void __cpuinit init_intel(struct cpuinfo_x86 *c)
 
 	srat_detect_node();
 }
+
+static struct cpu_dev intel_cpu_dev __cpuinitdata = {
+	.c_vendor	= "Intel",
+	.c_ident	= { "GenuineIntel" },
+	.c_early_init   = early_init_intel,
+	.c_init		= init_intel,
+};
+cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev);
+
-- 
cgit v1.2.3


From d0be6bdea103b8d04c8a3495538b7c0011ae4129 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 15 Jun 2008 18:58:51 -0700
Subject: x86: rename two e820 related functions

rename update_memory_range to e820_update_range
rename add_memory_region to e820_add_region

to make it more clear that they are about e820 map operations.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mtrr/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 0642201784e..105afe12beb 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -1440,7 +1440,7 @@ static u64 __init real_trim_memory(unsigned long start_pfn,
 	trim_size <<= PAGE_SHIFT;
 	trim_size -= trim_start;
 
-	return update_memory_range(trim_start, trim_size, E820_RAM,
+	return e820_update_range(trim_start, trim_size, E820_RAM,
 				E820_RESERVED);
 }
 /**
-- 
cgit v1.2.3


From ce38cc79964687d3c7e92663bc040552416fca27 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 21 Jun 2008 01:14:27 -0700
Subject: x86: clean up init_amd()

1. move out calling of check_enable_amd_mmconf_dmi out of setup_64.c
   put it into init_amd(), so don't need to make extra dmi check for
   system with other cpus.
2. 15 --> 0xf

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/amd_64.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index 30b7557c964..958526d6a74 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -131,7 +131,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	 * Errata 63 for SH-B3 steppings
 	 * Errata 122 for all steppings (F+ have it disabled by default)
 	 */
-	if (c->x86 == 15) {
+	if (c->x86 == 0xf) {
 		rdmsrl(MSR_K8_HWCR, value);
 		value |= 1 << 6;
 		wrmsrl(MSR_K8_HWCR, value);
@@ -143,10 +143,11 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	clear_cpu_cap(c, 0*32+31);
 
 	/* On C+ stepping K8 rep microcode works well for copy/memset */
-	level = cpuid_eax(1);
-	if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) ||
-			     level >= 0x0f58))
-		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+	if (c->x86 == 0xf) {
+		level = cpuid_eax(1);
+		if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
+			set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+	}
 	if (c->x86 == 0x10 || c->x86 == 0x11)
 		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
 
@@ -157,7 +158,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	level = get_model_name(c);
 	if (!level) {
 		switch (c->x86) {
-		case 15:
+		case 0xf:
 			/* Should distinguish Models here, but this is only
 			   a fallback anyways. */
 			strcpy(c->x86_model_id, "Hammer");
@@ -176,14 +177,19 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	else
 		num_cache_leaves = 3;
 
-	if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11)
+	if (c->x86 >= 0xf && c->x86 <= 0x11)
 		set_cpu_cap(c, X86_FEATURE_K8);
 
 	/* MFENCE stops RDTSC speculation */
 	set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
 
-	if (c->x86 == 0x10)
+	if (c->x86 == 0x10) {
+		/* do this for boot cpu */
+		if (c == &boot_cpu_data)
+			check_enable_amd_mmconf_dmi();
+
 		fam10h_check_enable_mmcfg();
+	}
 
 	if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
 		unsigned long long tseg;
-- 
cgit v1.2.3


From f580366f77cc4e035a68369105fbeae5bf436b4c Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 21 Jun 2008 03:24:19 -0700
Subject: x86: seperate funcs from setup_64 to cpu common_64.c

Signed-off-by: Yinghai Lu <yhlu.kernel@mail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/Makefile    |   2 +-
 arch/x86/kernel/cpu/common_64.c | 406 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 407 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/cpu/common_64.c

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 65b1be5fe9c..ee76eaad300 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -6,7 +6,7 @@ obj-y			:= intel_cacheinfo.o addon_cpuid_features.o
 obj-y			+= proc.o feature_names.o
 
 obj-$(CONFIG_X86_32)	+= common.o bugs.o
-obj-$(CONFIG_X86_64)	+= bugs_64.o
+obj-$(CONFIG_X86_64)	+= common_64.o bugs_64.o
 obj-$(CONFIG_X86_32)	+= amd.o
 obj-$(CONFIG_X86_64)	+= amd_64.o
 obj-$(CONFIG_X86_32)	+= cyrix.o
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
new file mode 100644
index 00000000000..b6f205063f7
--- /dev/null
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -0,0 +1,406 @@
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/bootmem.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/mtrr.h>
+#include <asm/mce.h>
+#include <asm/pat.h>
+#include <asm/numa.h>
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <asm/mpspec.h>
+#include <asm/apic.h>
+#include <mach_apic.h>
+#endif
+
+#include "cpu.h"
+
+/* We need valid kernel segments for data and code in long mode too
+ * IRET will check the segment types  kkeil 2000/10/28
+ * Also sysret mandates a special GDT layout
+ */
+/* The TLS descriptors are currently at a different place compared to i386.
+   Hopefully nobody expects them at a fixed place (Wine?) */
+DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
+	[GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
+	[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
+	[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
+	[GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
+	[GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
+	[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
+} };
+EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
+
+__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
+
+/* Current gdt points %fs at the "master" per-cpu area: after this,
+ * it's on the real one. */
+void switch_to_new_gdt(void)
+{
+	struct desc_ptr gdt_descr;
+
+	gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
+	gdt_descr.size = GDT_SIZE - 1;
+	load_gdt(&gdt_descr);
+}
+
+struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
+
+static void __cpuinit default_init(struct cpuinfo_x86 *c)
+{
+	display_cacheinfo(c);
+}
+
+static struct cpu_dev __cpuinitdata default_cpu = {
+	.c_init	= default_init,
+	.c_vendor = "Unknown",
+};
+static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
+
+int __cpuinit get_model_name(struct cpuinfo_x86 *c)
+{
+	unsigned int *v;
+
+	if (c->extended_cpuid_level < 0x80000004)
+		return 0;
+
+	v = (unsigned int *) c->x86_model_id;
+	cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
+	cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
+	cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
+	c->x86_model_id[48] = 0;
+	return 1;
+}
+
+
+void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
+{
+	unsigned int n, dummy, eax, ebx, ecx, edx;
+
+	n = c->extended_cpuid_level;
+
+	if (n >= 0x80000005) {
+		cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
+		printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), "
+		       "D cache %dK (%d bytes/line)\n",
+		       edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
+		c->x86_cache_size = (ecx>>24) + (edx>>24);
+		/* On K8 L1 TLB is inclusive, so don't count it */
+		c->x86_tlbsize = 0;
+	}
+
+	if (n >= 0x80000006) {
+		cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
+		ecx = cpuid_ecx(0x80000006);
+		c->x86_cache_size = ecx >> 16;
+		c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
+
+		printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
+		c->x86_cache_size, ecx & 0xFF);
+	}
+	if (n >= 0x80000008) {
+		cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
+		c->x86_virt_bits = (eax >> 8) & 0xff;
+		c->x86_phys_bits = eax & 0xff;
+	}
+}
+
+void __cpuinit detect_ht(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+	u32 eax, ebx, ecx, edx;
+	int index_msb, core_bits;
+
+	cpuid(1, &eax, &ebx, &ecx, &edx);
+
+
+	if (!cpu_has(c, X86_FEATURE_HT))
+		return;
+	if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
+		goto out;
+
+	smp_num_siblings = (ebx & 0xff0000) >> 16;
+
+	if (smp_num_siblings == 1) {
+		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
+	} else if (smp_num_siblings > 1) {
+
+		if (smp_num_siblings > NR_CPUS) {
+			printk(KERN_WARNING "CPU: Unsupported number of "
+			       "siblings %d", smp_num_siblings);
+			smp_num_siblings = 1;
+			return;
+		}
+
+		index_msb = get_count_order(smp_num_siblings);
+		c->phys_proc_id = phys_pkg_id(index_msb);
+
+		smp_num_siblings = smp_num_siblings / c->x86_max_cores;
+
+		index_msb = get_count_order(smp_num_siblings);
+
+		core_bits = get_count_order(c->x86_max_cores);
+
+		c->cpu_core_id = phys_pkg_id(index_msb) &
+					       ((1 << core_bits) - 1);
+	}
+out:
+	if ((c->x86_max_cores * smp_num_siblings) > 1) {
+		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
+		       c->phys_proc_id);
+		printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
+		       c->cpu_core_id);
+	}
+
+#endif
+}
+
+static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
+{
+	char *v = c->x86_vendor_id;
+	int i;
+	static int printed;
+
+	for (i = 0; i < X86_VENDOR_NUM; i++) {
+		if (cpu_devs[i]) {
+			if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
+			    (cpu_devs[i]->c_ident[1] &&
+			    !strcmp(v, cpu_devs[i]->c_ident[1]))) {
+				c->x86_vendor = i;
+				this_cpu = cpu_devs[i];
+				return;
+			}
+		}
+	}
+	if (!printed) {
+		printed++;
+		printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
+		printk(KERN_ERR "CPU: Your system may be unstable.\n");
+	}
+	c->x86_vendor = X86_VENDOR_UNKNOWN;
+}
+
+static void __init early_cpu_support_print(void)
+{
+	int i,j;
+	struct cpu_dev *cpu_devx;
+
+	printk("KERNEL supported cpus:\n");
+	for (i = 0; i < X86_VENDOR_NUM; i++) {
+		cpu_devx = cpu_devs[i];
+		if (!cpu_devx)
+			continue;
+		for (j = 0; j < 2; j++) {
+			if (!cpu_devx->c_ident[j])
+				continue;
+			printk("  %s %s\n", cpu_devx->c_vendor,
+				cpu_devx->c_ident[j]);
+		}
+	}
+}
+
+static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
+
+void __init early_cpu_init(void)
+{
+        struct cpu_vendor_dev *cvdev;
+
+        for (cvdev = __x86cpuvendor_start ;
+             cvdev < __x86cpuvendor_end   ;
+             cvdev++)
+                cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
+	early_cpu_support_print();
+	early_identify_cpu(&boot_cpu_data);
+}
+
+/* Do some early cpuid on the boot CPU to get some parameter that are
+   needed before check_bugs. Everything advanced is in identify_cpu
+   below. */
+static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
+{
+	u32 tfms, xlvl;
+
+	c->loops_per_jiffy = loops_per_jiffy;
+	c->x86_cache_size = -1;
+	c->x86_vendor = X86_VENDOR_UNKNOWN;
+	c->x86_model = c->x86_mask = 0;	/* So far unknown... */
+	c->x86_vendor_id[0] = '\0'; /* Unset */
+	c->x86_model_id[0] = '\0';  /* Unset */
+	c->x86_clflush_size = 64;
+	c->x86_cache_alignment = c->x86_clflush_size;
+	c->x86_max_cores = 1;
+	c->x86_coreid_bits = 0;
+	c->extended_cpuid_level = 0;
+	memset(&c->x86_capability, 0, sizeof c->x86_capability);
+
+	/* Get vendor name */
+	cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
+	      (unsigned int *)&c->x86_vendor_id[0],
+	      (unsigned int *)&c->x86_vendor_id[8],
+	      (unsigned int *)&c->x86_vendor_id[4]);
+
+	get_cpu_vendor(c);
+
+	/* Initialize the standard set of capabilities */
+	/* Note that the vendor-specific code below might override */
+
+	/* Intel-defined flags: level 0x00000001 */
+	if (c->cpuid_level >= 0x00000001) {
+		__u32 misc;
+		cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
+		      &c->x86_capability[0]);
+		c->x86 = (tfms >> 8) & 0xf;
+		c->x86_model = (tfms >> 4) & 0xf;
+		c->x86_mask = tfms & 0xf;
+		if (c->x86 == 0xf)
+			c->x86 += (tfms >> 20) & 0xff;
+		if (c->x86 >= 0x6)
+			c->x86_model += ((tfms >> 16) & 0xF) << 4;
+		if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
+			c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
+	} else {
+		/* Have CPUID level 0 only - unheard of */
+		c->x86 = 4;
+	}
+
+	c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
+#ifdef CONFIG_SMP
+	c->phys_proc_id = c->initial_apicid;
+#endif
+	/* AMD-defined flags: level 0x80000001 */
+	xlvl = cpuid_eax(0x80000000);
+	c->extended_cpuid_level = xlvl;
+	if ((xlvl & 0xffff0000) == 0x80000000) {
+		if (xlvl >= 0x80000001) {
+			c->x86_capability[1] = cpuid_edx(0x80000001);
+			c->x86_capability[6] = cpuid_ecx(0x80000001);
+		}
+		if (xlvl >= 0x80000004)
+			get_model_name(c); /* Default name */
+	}
+
+	/* Transmeta-defined flags: level 0x80860001 */
+	xlvl = cpuid_eax(0x80860000);
+	if ((xlvl & 0xffff0000) == 0x80860000) {
+		/* Don't set x86_cpuid_level here for now to not confuse. */
+		if (xlvl >= 0x80860001)
+			c->x86_capability[2] = cpuid_edx(0x80860001);
+	}
+
+	c->extended_cpuid_level = cpuid_eax(0x80000000);
+	if (c->extended_cpuid_level >= 0x80000007)
+		c->x86_power = cpuid_edx(0x80000007);
+
+	if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
+	    cpu_devs[c->x86_vendor]->c_early_init)
+		cpu_devs[c->x86_vendor]->c_early_init(c);
+
+	validate_pat_support(c);
+
+	/* early_param could clear that, but recall get it set again */
+	if (disable_apic)
+		clear_cpu_cap(c, X86_FEATURE_APIC);
+}
+
+/*
+ * This does the hard work of actually picking apart the CPU stuff...
+ */
+void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
+{
+	int i;
+
+	early_identify_cpu(c);
+
+	init_scattered_cpuid_features(c);
+
+	c->apicid = phys_pkg_id(0);
+
+	/*
+	 * Vendor-specific initialization.  In this section we
+	 * canonicalize the feature flags, meaning if there are
+	 * features a certain CPU supports which CPUID doesn't
+	 * tell us, CPUID claiming incorrect flags, or other bugs,
+	 * we handle them here.
+	 *
+	 * At the end of this section, c->x86_capability better
+	 * indicate the features this CPU genuinely supports!
+	 */
+	if (this_cpu->c_init)
+		this_cpu->c_init(c);
+
+	detect_ht(c);
+
+	/*
+	 * On SMP, boot_cpu_data holds the common feature set between
+	 * all CPUs; so make sure that we indicate which features are
+	 * common between the CPUs.  The first time this routine gets
+	 * executed, c == &boot_cpu_data.
+	 */
+	if (c != &boot_cpu_data) {
+		/* AND the already accumulated flags with these */
+		for (i = 0; i < NCAPINTS; i++)
+			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
+	}
+
+	/* Clear all flags overriden by options */
+	for (i = 0; i < NCAPINTS; i++)
+		c->x86_capability[i] &= ~cleared_cpu_caps[i];
+
+#ifdef CONFIG_X86_MCE
+	mcheck_init(c);
+#endif
+	select_idle_routine(c);
+
+#ifdef CONFIG_NUMA
+	numa_add_cpu(smp_processor_id());
+#endif
+
+}
+
+void __cpuinit identify_boot_cpu(void)
+{
+	identify_cpu(&boot_cpu_data);
+}
+
+void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
+{
+	BUG_ON(c == &boot_cpu_data);
+	identify_cpu(c);
+	mtrr_ap_init();
+}
+
+static __init int setup_noclflush(char *arg)
+{
+	setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
+	return 1;
+}
+__setup("noclflush", setup_noclflush);
+
+void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
+{
+	if (c->x86_model_id[0])
+		printk(KERN_CONT "%s", c->x86_model_id);
+
+	if (c->x86_mask || c->cpuid_level >= 0)
+		printk(KERN_CONT " stepping %02x\n", c->x86_mask);
+	else
+		printk(KERN_CONT "\n");
+}
+
+static __init int setup_disablecpuid(char *arg)
+{
+	int bit;
+	if (get_option(&arg, &bit) && bit < NCAPINTS*32)
+		setup_clear_cpu_cap(bit);
+	else
+		return 0;
+	return 1;
+}
+__setup("clearcpuid=", setup_disablecpuid);
-- 
cgit v1.2.3


From 9a250347591da3e60b5ee53dd1d341732f081117 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 21 Jun 2008 03:24:00 -0700
Subject: x86: change identify_cpu to static

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/common.c    | 2 +-
 arch/x86/kernel/cpu/common_64.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index d0463a94624..80ab20d4fa3 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -427,7 +427,7 @@ __setup("serialnumber", x86_serial_nr_setup);
 /*
  * This does the hard work of actually picking apart the CPU stuff...
  */
-void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
+static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 {
 	int i;
 
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index b6f205063f7..48ba7996158 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -312,7 +312,7 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 /*
  * This does the hard work of actually picking apart the CPU stuff...
  */
-void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
+static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 {
 	int i;
 
-- 
cgit v1.2.3


From 0f0124fa742da7c51e2e3c5ded7f5e5e06ddc195 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 21 Jun 2008 16:25:37 -0700
Subject: x86: merge setup64.c into common_64.c

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/common_64.c | 277 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 276 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index 48ba7996158..9fb5b7caaa8 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -1,10 +1,17 @@
 #include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/bootmem.h>
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/kgdb.h>
+#include <linux/topology.h>
 #include <linux/string.h>
 #include <linux/delay.h>
 #include <linux/smp.h>
 #include <linux/module.h>
 #include <linux/percpu.h>
-#include <linux/bootmem.h>
 #include <asm/processor.h>
 #include <asm/i387.h>
 #include <asm/msr.h>
@@ -19,6 +26,15 @@
 #include <asm/apic.h>
 #include <mach_apic.h>
 #endif
+#include <asm/pda.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+#include <asm/atomic.h>
+#include <asm/proto.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/genapic.h>
 
 #include "cpu.h"
 
@@ -404,3 +420,262 @@ static __init int setup_disablecpuid(char *arg)
 	return 1;
 }
 __setup("clearcpuid=", setup_disablecpuid);
+
+#ifndef CONFIG_DEBUG_BOOT_PARAMS
+struct boot_params __initdata boot_params;
+#else
+struct boot_params boot_params;
+#endif
+
+cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
+
+struct x8664_pda **_cpu_pda __read_mostly;
+EXPORT_SYMBOL(_cpu_pda);
+
+struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
+
+char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
+
+unsigned long __supported_pte_mask __read_mostly = ~0UL;
+EXPORT_SYMBOL_GPL(__supported_pte_mask);
+
+static int do_not_nx __cpuinitdata;
+
+/* noexec=on|off
+Control non executable mappings for 64bit processes.
+
+on	Enable(default)
+off	Disable
+*/
+static int __init nonx_setup(char *str)
+{
+	if (!str)
+		return -EINVAL;
+	if (!strncmp(str, "on", 2)) {
+		__supported_pte_mask |= _PAGE_NX;
+		do_not_nx = 0;
+	} else if (!strncmp(str, "off", 3)) {
+		do_not_nx = 1;
+		__supported_pte_mask &= ~_PAGE_NX;
+	}
+	return 0;
+}
+early_param("noexec", nonx_setup);
+
+int force_personality32;
+
+/* noexec32=on|off
+Control non executable heap for 32bit processes.
+To control the stack too use noexec=off
+
+on	PROT_READ does not imply PROT_EXEC for 32bit processes (default)
+off	PROT_READ implies PROT_EXEC
+*/
+static int __init nonx32_setup(char *str)
+{
+	if (!strcmp(str, "on"))
+		force_personality32 &= ~READ_IMPLIES_EXEC;
+	else if (!strcmp(str, "off"))
+		force_personality32 |= READ_IMPLIES_EXEC;
+	return 1;
+}
+__setup("noexec32=", nonx32_setup);
+
+void pda_init(int cpu)
+{
+	struct x8664_pda *pda = cpu_pda(cpu);
+
+	/* Setup up data that may be needed in __get_free_pages early */
+	asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
+	/* Memory clobbers used to order PDA accessed */
+	mb();
+	wrmsrl(MSR_GS_BASE, pda);
+	mb();
+
+	pda->cpunumber = cpu;
+	pda->irqcount = -1;
+	pda->kernelstack = (unsigned long)stack_thread_info() -
+				 PDA_STACKOFFSET + THREAD_SIZE;
+	pda->active_mm = &init_mm;
+	pda->mmu_state = 0;
+
+	if (cpu == 0) {
+		/* others are initialized in smpboot.c */
+		pda->pcurrent = &init_task;
+		pda->irqstackptr = boot_cpu_stack;
+	} else {
+		pda->irqstackptr = (char *)
+			__get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
+		if (!pda->irqstackptr)
+			panic("cannot allocate irqstack for cpu %d", cpu);
+
+		if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
+			pda->nodenumber = cpu_to_node(cpu);
+	}
+
+	pda->irqstackptr += IRQSTACKSIZE-64;
+}
+
+char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
+			   DEBUG_STKSZ]
+__attribute__((section(".bss.page_aligned")));
+
+extern asmlinkage void ignore_sysret(void);
+
+/* May not be marked __init: used by software suspend */
+void syscall_init(void)
+{
+	/*
+	 * LSTAR and STAR live in a bit strange symbiosis.
+	 * They both write to the same internal register. STAR allows to
+	 * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
+	 */
+	wrmsrl(MSR_STAR,  ((u64)__USER32_CS)<<48  | ((u64)__KERNEL_CS)<<32);
+	wrmsrl(MSR_LSTAR, system_call);
+	wrmsrl(MSR_CSTAR, ignore_sysret);
+
+#ifdef CONFIG_IA32_EMULATION
+	syscall32_cpu_init();
+#endif
+
+	/* Flags to clear on syscall */
+	wrmsrl(MSR_SYSCALL_MASK,
+	       X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
+}
+
+void __cpuinit check_efer(void)
+{
+	unsigned long efer;
+
+	rdmsrl(MSR_EFER, efer);
+	if (!(efer & EFER_NX) || do_not_nx)
+		__supported_pte_mask &= ~_PAGE_NX;
+}
+
+unsigned long kernel_eflags;
+
+/*
+ * Copies of the original ist values from the tss are only accessed during
+ * debugging, no special alignment required.
+ */
+DEFINE_PER_CPU(struct orig_ist, orig_ist);
+
+/*
+ * cpu_init() initializes state that is per-CPU. Some data is already
+ * initialized (naturally) in the bootstrap process, such as the GDT
+ * and IDT. We reload them nevertheless, this function acts as a
+ * 'CPU state barrier', nothing should get across.
+ * A lot of state is already set up in PDA init.
+ */
+void __cpuinit cpu_init(void)
+{
+	int cpu = stack_smp_processor_id();
+	struct tss_struct *t = &per_cpu(init_tss, cpu);
+	struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
+	unsigned long v;
+	char *estacks = NULL;
+	struct task_struct *me;
+	int i;
+
+	/* CPU 0 is initialised in head64.c */
+	if (cpu != 0)
+		pda_init(cpu);
+	else
+		estacks = boot_exception_stacks;
+
+	me = current;
+
+	if (cpu_test_and_set(cpu, cpu_initialized))
+		panic("CPU#%d already initialized!\n", cpu);
+
+	printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+
+	clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+
+	/*
+	 * Initialize the per-CPU GDT with the boot GDT,
+	 * and set up the GDT descriptor:
+	 */
+
+	switch_to_new_gdt();
+	load_idt((const struct desc_ptr *)&idt_descr);
+
+	memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
+	syscall_init();
+
+	wrmsrl(MSR_FS_BASE, 0);
+	wrmsrl(MSR_KERNEL_GS_BASE, 0);
+	barrier();
+
+	check_efer();
+
+	/*
+	 * set up and load the per-CPU TSS
+	 */
+	for (v = 0; v < N_EXCEPTION_STACKS; v++) {
+		static const unsigned int order[N_EXCEPTION_STACKS] = {
+			[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
+			[DEBUG_STACK - 1] = DEBUG_STACK_ORDER
+		};
+		if (cpu) {
+			estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
+			if (!estacks)
+				panic("Cannot allocate exception stack %ld %d\n",
+				      v, cpu);
+		}
+		estacks += PAGE_SIZE << order[v];
+		orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks;
+	}
+
+	t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+	/*
+	 * <= is required because the CPU will access up to
+	 * 8 bits beyond the end of the IO permission bitmap.
+	 */
+	for (i = 0; i <= IO_BITMAP_LONGS; i++)
+		t->io_bitmap[i] = ~0UL;
+
+	atomic_inc(&init_mm.mm_count);
+	me->active_mm = &init_mm;
+	if (me->mm)
+		BUG();
+	enter_lazy_tlb(&init_mm, me);
+
+	load_sp0(t, &current->thread);
+	set_tss_desc(cpu, t);
+	load_TR_desc();
+	load_LDT(&init_mm.context);
+
+#ifdef CONFIG_KGDB
+	/*
+	 * If the kgdb is connected no debug regs should be altered.  This
+	 * is only applicable when KGDB and a KGDB I/O module are built
+	 * into the kernel and you are using early debugging with
+	 * kgdbwait. KGDB will control the kernel HW breakpoint registers.
+	 */
+	if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
+		arch_kgdb_ops.correct_hw_break();
+	else {
+#endif
+	/*
+	 * Clear all 6 debug registers:
+	 */
+
+	set_debugreg(0UL, 0);
+	set_debugreg(0UL, 1);
+	set_debugreg(0UL, 2);
+	set_debugreg(0UL, 3);
+	set_debugreg(0UL, 6);
+	set_debugreg(0UL, 7);
+#ifdef CONFIG_KGDB
+	/* If the kgdb is connected no debug regs should be altered. */
+	}
+#endif
+
+	fpu_init();
+
+	raw_local_save_flags(kernel_eflags);
+
+	if (is_uv_system())
+		uv_cpu_init();
+}
-- 
cgit v1.2.3


From 7f0be02c5ed1deb04c54c6a17f412e04f417df11 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 22 Jun 2008 17:37:54 -0700
Subject: x86: move boot_params declaring to setup.c

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/common_64.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index 9fb5b7caaa8..39eaefbcec0 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -421,12 +421,6 @@ static __init int setup_disablecpuid(char *arg)
 }
 __setup("clearcpuid=", setup_disablecpuid);
 
-#ifndef CONFIG_DEBUG_BOOT_PARAMS
-struct boot_params __initdata boot_params;
-#else
-struct boot_params boot_params;
-#endif
-
 cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
 
 struct x8664_pda **_cpu_pda __read_mostly;
-- 
cgit v1.2.3


From 47a486cc110fe77518c79a566b50a5c785c813ae Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Tue, 24 Jun 2008 22:52:03 +0200
Subject: x86: perfctr-watchdog.c - coding style cleanup

Just some code beautification. Nothing else.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: macro@linux-mips.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perfctr-watchdog.c | 202 ++++++++++++++++++---------------
 1 file changed, 112 insertions(+), 90 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index ddda4b64f54..2e9bef6e3aa 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -1,11 +1,15 @@
-/* local apic based NMI watchdog for various CPUs.
-   This file also handles reservation of performance counters for coordination
-   with other users (like oprofile).
-
-   Note that these events normally don't tick when the CPU idles. This means
-   the frequency varies with CPU load.
-
-   Original code for K7/P6 written by Keith Owens */
+/*
+ * local apic based NMI watchdog for various CPUs.
+ *
+ * This file also handles reservation of performance counters for coordination
+ * with other users (like oprofile).
+ *
+ * Note that these events normally don't tick when the CPU idles. This means
+ * the frequency varies with CPU load.
+ *
+ * Original code for K7/P6 written by Keith Owens
+ *
+ */
 
 #include <linux/percpu.h>
 #include <linux/module.h>
@@ -36,12 +40,16 @@ struct wd_ops {
 
 static const struct wd_ops *wd_ops;
 
-/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
- * offset from MSR_P4_BSU_ESCR0.  It will be the max for all platforms (for now)
+/*
+ * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
+ * offset from MSR_P4_BSU_ESCR0.
+ *
+ * It will be the max for all platforms (for now)
  */
 #define NMI_MAX_COUNTER_BITS 66
 
-/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
+/*
+ * perfctr_nmi_owner tracks the ownership of the perfctr registers:
  * evtsel_nmi_owner tracks the ownership of the event selection
  * - different performance counters/ event selection may be reserved for
  *   different subsystems this reservation system just tries to coordinate
@@ -73,8 +81,10 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
 	return 0;
 }
 
-/* converts an msr to an appropriate reservation bit */
-/* returns the bit offset of the event selection register */
+/*
+ * converts an msr to an appropriate reservation bit
+ * returns the bit offset of the event selection register
+ */
 static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
 {
 	/* returns the bit offset of the event selection register */
@@ -114,6 +124,7 @@ int avail_to_resrv_perfctr_nmi(unsigned int msr)
 
 	return (!test_bit(counter, perfctr_nmi_owner));
 }
+EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
 
 int reserve_perfctr_nmi(unsigned int msr)
 {
@@ -128,6 +139,7 @@ int reserve_perfctr_nmi(unsigned int msr)
 		return 1;
 	return 0;
 }
+EXPORT_SYMBOL(reserve_perfctr_nmi);
 
 void release_perfctr_nmi(unsigned int msr)
 {
@@ -140,6 +152,7 @@ void release_perfctr_nmi(unsigned int msr)
 
 	clear_bit(counter, perfctr_nmi_owner);
 }
+EXPORT_SYMBOL(release_perfctr_nmi);
 
 int reserve_evntsel_nmi(unsigned int msr)
 {
@@ -154,6 +167,7 @@ int reserve_evntsel_nmi(unsigned int msr)
 		return 1;
 	return 0;
 }
+EXPORT_SYMBOL(reserve_evntsel_nmi);
 
 void release_evntsel_nmi(unsigned int msr)
 {
@@ -166,11 +180,6 @@ void release_evntsel_nmi(unsigned int msr)
 
 	clear_bit(counter, evntsel_nmi_owner);
 }
-
-EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
-EXPORT_SYMBOL(reserve_perfctr_nmi);
-EXPORT_SYMBOL(release_perfctr_nmi);
-EXPORT_SYMBOL(reserve_evntsel_nmi);
 EXPORT_SYMBOL(release_evntsel_nmi);
 
 void disable_lapic_nmi_watchdog(void)
@@ -234,8 +243,8 @@ static unsigned int adjust_for_32bit_ctr(unsigned int hz)
 	return retval;
 }
 
-static void
-write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi_hz)
+static void write_watchdog_counter(unsigned int perfctr_msr,
+				const char *descr, unsigned nmi_hz)
 {
 	u64 count = (u64)cpu_khz * 1000;
 
@@ -246,7 +255,7 @@ write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi
 }
 
 static void write_watchdog_counter32(unsigned int perfctr_msr,
-		const char *descr, unsigned nmi_hz)
+				const char *descr, unsigned nmi_hz)
 {
 	u64 count = (u64)cpu_khz * 1000;
 
@@ -256,9 +265,10 @@ static void write_watchdog_counter32(unsigned int perfctr_msr,
 	wrmsr(perfctr_msr, (u32)(-count), 0);
 }
 
-/* AMD K7/K8/Family10h/Family11h support. AMD keeps this interface
-   nicely stable so there is not much variety */
-
+/*
+ * AMD K7/K8/Family10h/Family11h support.
+ * AMD keeps this interface nicely stable so there is not much variety
+ */
 #define K7_EVNTSEL_ENABLE	(1 << 22)
 #define K7_EVNTSEL_INT		(1 << 20)
 #define K7_EVNTSEL_OS		(1 << 17)
@@ -291,7 +301,7 @@ static int setup_k7_watchdog(unsigned nmi_hz)
 
 	wd->perfctr_msr = perfctr_msr;
 	wd->evntsel_msr = evntsel_msr;
-	wd->cccr_msr = 0;  //unused
+	wd->cccr_msr = 0;  /* unused */
 	return 1;
 }
 
@@ -327,18 +337,19 @@ static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 }
 
 static const struct wd_ops k7_wd_ops = {
-	.reserve = single_msr_reserve,
-	.unreserve = single_msr_unreserve,
-	.setup = setup_k7_watchdog,
-	.rearm = single_msr_rearm,
-	.stop = single_msr_stop_watchdog,
-	.perfctr = MSR_K7_PERFCTR0,
-	.evntsel = MSR_K7_EVNTSEL0,
-	.checkbit = 1ULL<<47,
+	.reserve	= single_msr_reserve,
+	.unreserve	= single_msr_unreserve,
+	.setup		= setup_k7_watchdog,
+	.rearm		= single_msr_rearm,
+	.stop		= single_msr_stop_watchdog,
+	.perfctr	= MSR_K7_PERFCTR0,
+	.evntsel	= MSR_K7_EVNTSEL0,
+	.checkbit	= 1ULL << 47,
 };
 
-/* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */
-
+/*
+ * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
+ */
 #define P6_EVNTSEL0_ENABLE	(1 << 22)
 #define P6_EVNTSEL_INT		(1 << 20)
 #define P6_EVNTSEL_OS		(1 << 17)
@@ -374,52 +385,58 @@ static int setup_p6_watchdog(unsigned nmi_hz)
 
 	wd->perfctr_msr = perfctr_msr;
 	wd->evntsel_msr = evntsel_msr;
-	wd->cccr_msr = 0;  //unused
+	wd->cccr_msr = 0;  /* unused */
 	return 1;
 }
 
 static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 {
-	/* P6 based Pentium M need to re-unmask
+	/*
+	 * P6 based Pentium M need to re-unmask
 	 * the apic vector but it doesn't hurt
 	 * other P6 variant.
-	 * ArchPerfom/Core Duo also needs this */
+	 * ArchPerfom/Core Duo also needs this
+	 */
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
+
 	/* P6/ARCH_PERFMON has 32 bit counter write */
 	write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);
 }
 
 static const struct wd_ops p6_wd_ops = {
-	.reserve = single_msr_reserve,
-	.unreserve = single_msr_unreserve,
-	.setup = setup_p6_watchdog,
-	.rearm = p6_rearm,
-	.stop = single_msr_stop_watchdog,
-	.perfctr = MSR_P6_PERFCTR0,
-	.evntsel = MSR_P6_EVNTSEL0,
-	.checkbit = 1ULL<<39,
+	.reserve	= single_msr_reserve,
+	.unreserve	= single_msr_unreserve,
+	.setup		= setup_p6_watchdog,
+	.rearm		= p6_rearm,
+	.stop		= single_msr_stop_watchdog,
+	.perfctr	= MSR_P6_PERFCTR0,
+	.evntsel	= MSR_P6_EVNTSEL0,
+	.checkbit	= 1ULL << 39,
 };
 
-/* Intel P4 performance counters. By far the most complicated of all. */
-
-#define MSR_P4_MISC_ENABLE_PERF_AVAIL	(1<<7)
-#define P4_ESCR_EVENT_SELECT(N)	((N)<<25)
-#define P4_ESCR_OS		(1<<3)
-#define P4_ESCR_USR		(1<<2)
-#define P4_CCCR_OVF_PMI0	(1<<26)
-#define P4_CCCR_OVF_PMI1	(1<<27)
-#define P4_CCCR_THRESHOLD(N)	((N)<<20)
-#define P4_CCCR_COMPLEMENT	(1<<19)
-#define P4_CCCR_COMPARE		(1<<18)
-#define P4_CCCR_REQUIRED	(3<<16)
-#define P4_CCCR_ESCR_SELECT(N)	((N)<<13)
-#define P4_CCCR_ENABLE		(1<<12)
-#define P4_CCCR_OVF 		(1<<31)
-
-/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
-   CRU_ESCR0 (with any non-null event selector) through a complemented
-   max threshold. [IA32-Vol3, Section 14.9.9] */
+/*
+ * Intel P4 performance counters.
+ * By far the most complicated of all.
+ */
+#define MSR_P4_MISC_ENABLE_PERF_AVAIL	(1 << 7)
+#define P4_ESCR_EVENT_SELECT(N)	((N) << 25)
+#define P4_ESCR_OS		(1 << 3)
+#define P4_ESCR_USR		(1 << 2)
+#define P4_CCCR_OVF_PMI0	(1 << 26)
+#define P4_CCCR_OVF_PMI1	(1 << 27)
+#define P4_CCCR_THRESHOLD(N)	((N) << 20)
+#define P4_CCCR_COMPLEMENT	(1 << 19)
+#define P4_CCCR_COMPARE		(1 << 18)
+#define P4_CCCR_REQUIRED	(3 << 16)
+#define P4_CCCR_ESCR_SELECT(N)	((N) << 13)
+#define P4_CCCR_ENABLE		(1 << 12)
+#define P4_CCCR_OVF 		(1 << 31)
 
+/*
+ * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
+ * CRU_ESCR0 (with any non-null event selector) through a complemented
+ * max threshold. [IA32-Vol3, Section 14.9.9]
+ */
 static int setup_p4_watchdog(unsigned nmi_hz)
 {
 	unsigned int perfctr_msr, evntsel_msr, cccr_msr;
@@ -444,7 +461,8 @@ static int setup_p4_watchdog(unsigned nmi_hz)
 #endif
 		ht_num = 0;
 
-	/* performance counters are shared resources
+	/*
+	 * performance counters are shared resources
 	 * assign each hyperthread its own set
 	 * (re-use the ESCR0 register, seems safe
 	 * and keeps the cccr_val the same)
@@ -542,20 +560,21 @@ static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 }
 
 static const struct wd_ops p4_wd_ops = {
-	.reserve = p4_reserve,
-	.unreserve = p4_unreserve,
-	.setup = setup_p4_watchdog,
-	.rearm = p4_rearm,
-	.stop = stop_p4_watchdog,
+	.reserve	= p4_reserve,
+	.unreserve	= p4_unreserve,
+	.setup		= setup_p4_watchdog,
+	.rearm		= p4_rearm,
+	.stop		= stop_p4_watchdog,
 	/* RED-PEN this is wrong for the other sibling */
-	.perfctr = MSR_P4_BPU_PERFCTR0,
-	.evntsel = MSR_P4_BSU_ESCR0,
-	.checkbit = 1ULL<<39,
+	.perfctr	= MSR_P4_BPU_PERFCTR0,
+	.evntsel	= MSR_P4_BSU_ESCR0,
+	.checkbit	= 1ULL << 39,
 };
 
-/* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully
-   all future Intel CPUs. */
-
+/*
+ * Watchdog using the Intel architected PerfMon.
+ * Used for Core2 and hopefully all future Intel CPUs.
+ */
 #define ARCH_PERFMON_NMI_EVENT_SEL	ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
 #define ARCH_PERFMON_NMI_EVENT_UMASK	ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
 
@@ -601,19 +620,19 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
 
 	wd->perfctr_msr = perfctr_msr;
 	wd->evntsel_msr = evntsel_msr;
-	wd->cccr_msr = 0;  //unused
+	wd->cccr_msr = 0;  /* unused */
 	intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
 	return 1;
 }
 
 static struct wd_ops intel_arch_wd_ops __read_mostly = {
-	.reserve = single_msr_reserve,
-	.unreserve = single_msr_unreserve,
-	.setup = setup_intel_arch_watchdog,
-	.rearm = p6_rearm,
-	.stop = single_msr_stop_watchdog,
-	.perfctr = MSR_ARCH_PERFMON_PERFCTR1,
-	.evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
+	.reserve	= single_msr_reserve,
+	.unreserve	= single_msr_unreserve,
+	.setup		= setup_intel_arch_watchdog,
+	.rearm		= p6_rearm,
+	.stop		= single_msr_stop_watchdog,
+	.perfctr	= MSR_ARCH_PERFMON_PERFCTR1,
+	.evntsel	= MSR_ARCH_PERFMON_EVENTSEL1,
 };
 
 static void probe_nmi_watchdog(void)
@@ -626,8 +645,10 @@ static void probe_nmi_watchdog(void)
 		wd_ops = &k7_wd_ops;
 		break;
 	case X86_VENDOR_INTEL:
-		/* Work around Core Duo (Yonah) errata AE49 where perfctr1
-		   doesn't have a working enable bit. */
+		/*
+		 * Work around Core Duo (Yonah) errata AE49 where perfctr1
+		 * doesn't have a working enable bit.
+		 */
 		if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) {
 			intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
 			intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
@@ -638,7 +659,7 @@ static void probe_nmi_watchdog(void)
 		}
 		switch (boot_cpu_data.x86) {
 		case 6:
-			if (boot_cpu_data.x86_model > 0xd)
+			if (boot_cpu_data.x86_model > 13)
 				return;
 
 			wd_ops = &p6_wd_ops;
@@ -699,10 +720,11 @@ int lapic_wd_event(unsigned nmi_hz)
 {
 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 	u64 ctr;
+
 	rdmsrl(wd->perfctr_msr, ctr);
-	if (ctr & wd_ops->checkbit) { /* perfctr still running? */
+	if (ctr & wd_ops->checkbit) /* perfctr still running? */
 		return 0;
-	}
+
 	wd_ops->rearm(wd, nmi_hz);
 	return 1;
 }
-- 
cgit v1.2.3


From ada857082317e6883cfcf7deb4e0c54d3c447cb0 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 25 Jun 2008 00:19:00 -0400
Subject: x86: remove open-coded save/load segment operations

This removes a pile of buggy open-coded implementations of savesegment
and loadsegment.

(They are buggy because they don't have memory barriers to prevent
them from being reordered with respect to memory accesses.)

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/common_64.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index 39eaefbcec0..75185023529 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -480,7 +480,8 @@ void pda_init(int cpu)
 	struct x8664_pda *pda = cpu_pda(cpu);
 
 	/* Setup up data that may be needed in __get_free_pages early */
-	asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
+	loadsegment(fs, 0);
+	loadsegment(gs, 0);
 	/* Memory clobbers used to order PDA accessed */
 	mb();
 	wrmsrl(MSR_GS_BASE, pda);
-- 
cgit v1.2.3


From f361a450bf1ad14e2b003217dbf3958638631265 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Thu, 10 Jul 2008 20:38:26 -0700
Subject: x86: introduce max_low_pfn_mapped for 64-bit

when more than 4g memory is installed, don't map the big hole below 4g.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/amd_64.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index 958526d6a74..bd182b7616e 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -199,10 +199,14 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 		 * Don't do it for gbpages because there seems very little
 		 * benefit in doing so.
 		 */
-		if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) &&
-		    (tseg >> PMD_SHIFT) <
-			(max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT)))
+		if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
+		    if ((tseg>>PMD_SHIFT) <
+				(max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
+			((tseg>>PMD_SHIFT) <
+				(max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
+			 (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
 			set_memory_4k((unsigned long)__va(tseg), 1);
+		}
 	}
 }
 
-- 
cgit v1.2.3


From 8d28aab59fe939be40efae870ced0b05caa259fb Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 10 Jul 2008 16:22:56 -0700
Subject: x86_64: add pseudo-features for 32-bit compat syscall

Add pseudo-feature bits to describe whether the CPU supports sysenter
and/or syscall from ia32-compat userspace.  This removes a hardcoded
test in vdso32-setup.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/centaur_64.c | 2 ++
 arch/x86/kernel/cpu/common_64.c  | 3 +++
 arch/x86/kernel/cpu/intel_64.c   | 2 ++
 3 files changed, 7 insertions(+)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c
index 13526fd5cce..2026d2119cd 100644
--- a/arch/x86/kernel/cpu/centaur_64.c
+++ b/arch/x86/kernel/cpu/centaur_64.c
@@ -10,6 +10,8 @@ static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
 {
 	if (c->x86 == 0x6 && c->x86_model >= 0xf)
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+
+	set_cpu_cap(c, X86_FEATURE_SYSENTER32);
 }
 
 static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index 75185023529..36537ab9e56 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -314,6 +314,9 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 	if (c->extended_cpuid_level >= 0x80000007)
 		c->x86_power = cpuid_edx(0x80000007);
 
+	/* Assume all 64-bit CPUs support 32-bit syscall */
+	set_cpu_cap(c, X86_FEATURE_SYSCALL32);
+
 	if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
 	    cpu_devs[c->x86_vendor]->c_early_init)
 		cpu_devs[c->x86_vendor]->c_early_init(c);
diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c
index fcb1cc9d75c..02f773399e3 100644
--- a/arch/x86/kernel/cpu/intel_64.c
+++ b/arch/x86/kernel/cpu/intel_64.c
@@ -12,6 +12,8 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
 	    (c->x86 == 0x6 && c->x86_model >= 0x0e))
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+
+	set_cpu_cap(c, X86_FEATURE_SYSENTER32);
 }
 
 /*
-- 
cgit v1.2.3


From 965194c15dc9e4f3bc44432b39c441c86af7f11d Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 12 Jul 2008 14:31:28 -0700
Subject: x86: max_low_pfn_mapped fix, #2

tighten the boundary checks around max_low_pfn_mapped - dont overmap
nor undermap into holes.

also print out tseg for AMD cpus, for diagnostic purposes.
(this is an SMM area, and we split up any big mappings around that area)

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/amd_64.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index bd182b7616e..7c36fb8a28d 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -200,6 +200,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 		 * benefit in doing so.
 		 */
 		if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
+		    printk(KERN_DEBUG "tseg: %010llx\n", tseg);
 		    if ((tseg>>PMD_SHIFT) <
 				(max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
 			((tseg>>PMD_SHIFT) <
-- 
cgit v1.2.3


From 3d88cca7085cffce077f808f36551e9050eb9e3a Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 12 Jul 2008 22:52:55 -0700
Subject: x86: fix numaq_tsc_disable calling

got this on a test-system:

 calling  numaq_tsc_disable+0x0/0x39
 NUMAQ: disabling TSC
 initcall numaq_tsc_disable+0x0/0x39 returned 0 after 0 msecs

that's because we should not be using arch_initcall to call numaq_tsc_disable.

need to call it in setup_arch before time_init()/tsc_init()
and call it in init_intel() to make the cpu feature bits right.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/intel.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fe9224c51d3..70609efdf1d 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -226,6 +226,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 
 	if (cpu_has_bts)
 		ds_init_intel(c);
+
+#ifdef CONFIG_X86_NUMAQ
+	numaq_tsc_disable();
+#endif
 }
 
 static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
-- 
cgit v1.2.3


From 87a1c441e1aeaf00f97e63dfc310ea7684ec9dda Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 13 Jul 2008 14:30:35 -0700
Subject: x86: get x86_phys_bits early

when try to make hpet_enable use io_remap instead fixmap got

ioremap: invalid physical address fed00000
------------[ cut here ]------------
WARNING: at arch/x86/mm/ioremap.c:161 __ioremap_caller+0x8c/0x2f3()
Modules linked in:
Pid: 0, comm: swapper Not tainted 2.6.26-rc9-tip-01873-ga9827e7-dirty #358

Call Trace:
 [<ffffffff8026615e>] warn_on_slowpath+0x6c/0xa7
 [<ffffffff802e2313>] ? __slab_alloc+0x20a/0x3fb
 [<ffffffff802d85c5>] ? mpol_new+0x88/0x17d
 [<ffffffff8022a4f4>] ? mcount_call+0x5/0x31
 [<ffffffff8022a4f4>] ? mcount_call+0x5/0x31
 [<ffffffff8024b0d2>] __ioremap_caller+0x8c/0x2f3
 [<ffffffff80e86dbd>] ? hpet_enable+0x39/0x241
 [<ffffffff8022a4f4>] ? mcount_call+0x5/0x31
 [<ffffffff8024b466>] ioremap_nocache+0x2a/0x40
 [<ffffffff80e86dbd>] hpet_enable+0x39/0x241
 [<ffffffff80e7a1f6>] hpet_time_init+0x21/0x4e
 [<ffffffff80e730e9>] start_kernel+0x302/0x395
 [<ffffffff80e722aa>] x86_64_start_reservations+0xb9/0xd4
 [<ffffffff80e722fe>] ? x86_64_init_pda+0x39/0x4f
 [<ffffffff80e72400>] x86_64_start_kernel+0xec/0x107

---[ end trace a7919e7f17c0a725 ]---

it seems for amd system that is set later...
try to move setting early in early_identify_cpu.
and remove same code for intel and centaur.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/centaur_64.c | 10 ----------
 arch/x86/kernel/cpu/common_64.c  | 14 ++++++++------
 arch/x86/kernel/cpu/intel_64.c   | 10 ----------
 3 files changed, 8 insertions(+), 26 deletions(-)

(limited to 'arch/x86/kernel/cpu')

diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c
index 2026d2119cd..1d181c40e2e 100644
--- a/arch/x86/kernel/cpu/centaur_64.c
+++ b/arch/x86/kernel/cpu/centaur_64.c
@@ -16,16 +16,6 @@ static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
 
 static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
 {
-	/* Cache sizes */
-	unsigned n;
-
-	n = c->extended_cpuid_level;
-	if (n >= 0x80000008) {
-		unsigned eax = cpuid_eax(0x80000008);
-		c->x86_virt_bits = (eax >> 8) & 0xff;
-		c->x86_phys_bits = eax & 0xff;
-	}
-
 	if (c->x86 == 0x6 && c->x86_model >= 0xf) {
 		c->x86_cache_alignment = c->x86_clflush_size * 2;
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index 36537ab9e56..7b8cc72feb4 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -98,7 +98,7 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c)
 
 void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
 {
-	unsigned int n, dummy, eax, ebx, ecx, edx;
+	unsigned int n, dummy, ebx, ecx, edx;
 
 	n = c->extended_cpuid_level;
 
@@ -121,11 +121,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
 		printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
 		c->x86_cache_size, ecx & 0xFF);
 	}
-	if (n >= 0x80000008) {
-		cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
-		c->x86_virt_bits = (eax >> 8) & 0xff;
-		c->x86_phys_bits = eax & 0xff;
-	}
 }
 
 void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -314,6 +309,13 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 	if (c->extended_cpuid_level >= 0x80000007)
 		c->x86_power = cpuid_edx(0x80000007);
 
+	if (c->extended_cpuid_level >= 0x80000008) {
+		u32 eax = cpuid_eax(0x80000008);
+
+		c->x86_virt_bits = (eax >> 8) & 0xff;
+		c->x86_phys_bits = eax & 0xff;
+	}
+
 	/* Assume all 64-bit CPUs support 32-bit syscall */
 	set_cpu_cap(c, X86_FEATURE_SYSCALL32);
 
diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c
index 02f773399e3..1019c58d39f 100644
--- a/arch/x86/kernel/cpu/intel_64.c
+++ b/arch/x86/kernel/cpu/intel_64.c
@@ -54,9 +54,6 @@ static void __cpuinit srat_detect_node(void)
 
 static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 {
-	/* Cache sizes */
-	unsigned n;
-
 	init_intel_cacheinfo(c);
 	if (c->cpuid_level > 9) {
 		unsigned eax = cpuid_eax(10);
@@ -78,13 +75,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 	if (cpu_has_bts)
 		ds_init_intel(c);
 
-	n = c->extended_cpuid_level;
-	if (n >= 0x80000008) {
-		unsigned eax = cpuid_eax(0x80000008);
-		c->x86_virt_bits = (eax >> 8) & 0xff;
-		c->x86_phys_bits = eax & 0xff;
-	}
-
 	if (c->x86 == 15)
 		c->x86_cache_alignment = c->x86_clflush_size * 2;
 	if (c->x86 == 6)
-- 
cgit v1.2.3