From 849620fab413355eff48232eac5a8c53c57615c5 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 14 May 2009 17:10:52 +0200
Subject: Revert "oprofile: discover counters for op ppro too"

This reverts commit 59512900baab03c5629f2ff5efad1d5d4e682ece.

arch_perfmon_setup_counters() is actually never called for ppro, so
there is no code that changes the numbers in op_ppro_spec. The patch
as it is has no effect.

Cc: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_ppro.c | 8 +++-----
 arch/x86/oprofile/op_x86_model.h  | 2 +-
 2 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 10131fbdaad..2a123990a84 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -213,9 +213,9 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
 }
 
 
-struct op_x86_model_spec op_ppro_spec = {
-	.num_counters		= 2,	/* can be overriden */
-	.num_controls		= 2,	/* dito */
+struct op_x86_model_spec const op_ppro_spec = {
+	.num_counters		= 2,
+	.num_controls		= 2,
 	.fill_in_addresses	= &ppro_fill_in_addresses,
 	.setup_ctrs		= &ppro_setup_ctrs,
 	.check_ctrs		= &ppro_check_ctrs,
@@ -251,8 +251,6 @@ void arch_perfmon_setup_counters(void)
 
 	op_arch_perfmon_spec.num_counters = num_counters;
 	op_arch_perfmon_spec.num_controls = num_counters;
-	op_ppro_spec.num_counters = num_counters;
-	op_ppro_spec.num_controls = num_counters;
 }
 
 struct op_x86_model_spec op_arch_perfmon_spec = {
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 825e79064d6..2317149c94f 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -45,7 +45,7 @@ struct op_x86_model_spec {
 	void (*shutdown)(struct op_msrs const * const msrs);
 };
 
-extern struct op_x86_model_spec op_ppro_spec;
+extern struct op_x86_model_spec const op_ppro_spec;
 extern struct op_x86_model_spec const op_p4_spec;
 extern struct op_x86_model_spec const op_p4_ht2_spec;
 extern struct op_x86_model_spec const op_amd_spec;
-- 
cgit v1.2.3


From e419294ed3c98cccc145202e4fe165bfd8099d63 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Sun, 12 Oct 2008 15:12:34 -0400
Subject: x86/oprofile: moving arch_perfmon counter setup to
 op_x86_model_spec.init

The function arch_perfmon_init() in nmi_int.c is model specific. This
patch moves it to op_model_ppro.c by using the init function pointer
in struct op_x86_model_spec.

Cc: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c       | 21 +++++++++------------
 arch/x86/oprofile/op_model_ppro.c |  9 ++++++++-
 arch/x86/oprofile/op_x86_model.h  |  2 --
 3 files changed, 17 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 3b285e656e2..dd8515301fb 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -427,7 +427,7 @@ static int __init ppro_init(char **cpu_type)
 		*cpu_type = "i386/core_2";
 		break;
 	case 26:
-		arch_perfmon_setup_counters();
+		model = &op_arch_perfmon_spec;
 		*cpu_type = "i386/core_i7";
 		break;
 	case 28:
@@ -442,16 +442,6 @@ static int __init ppro_init(char **cpu_type)
 	return 1;
 }
 
-static int __init arch_perfmon_init(char **cpu_type)
-{
-	if (!cpu_has_arch_perfmon)
-		return 0;
-	*cpu_type = "i386/arch_perfmon";
-	model = &op_arch_perfmon_spec;
-	arch_perfmon_setup_counters();
-	return 1;
-}
-
 /* in order to get sysfs right */
 static int using_nmi;
 
@@ -509,8 +499,15 @@ int __init op_nmi_init(struct oprofile_operations *ops)
 			break;
 		}
 
-		if (!cpu_type && !arch_perfmon_init(&cpu_type))
+		if (cpu_type)
+			break;
+
+		if (!cpu_has_arch_perfmon)
 			return -ENODEV;
+
+		/* use arch perfmon as fallback */
+		cpu_type = "i386/arch_perfmon";
+		model = &op_arch_perfmon_spec;
 		break;
 
 	default:
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 2a123990a84..ae581196688 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -233,7 +233,7 @@ struct op_x86_model_spec const op_ppro_spec = {
  * the specific CPU.
  */
 
-void arch_perfmon_setup_counters(void)
+static void arch_perfmon_setup_counters(void)
 {
 	union cpuid10_eax eax;
 
@@ -253,7 +253,14 @@ void arch_perfmon_setup_counters(void)
 	op_arch_perfmon_spec.num_controls = num_counters;
 }
 
+static int arch_perfmon_init(struct oprofile_operations *ignore)
+{
+	arch_perfmon_setup_counters();
+	return 0;
+}
+
 struct op_x86_model_spec op_arch_perfmon_spec = {
+	.init			= &arch_perfmon_init,
 	/* num_counters/num_controls filled in at runtime */
 	.fill_in_addresses	= &ppro_fill_in_addresses,
 	/* user space does the cpuid check for available events */
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 2317149c94f..ed27783bb0d 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -51,6 +51,4 @@ extern struct op_x86_model_spec const op_p4_ht2_spec;
 extern struct op_x86_model_spec const op_amd_spec;
 extern struct op_x86_model_spec op_arch_perfmon_spec;
 
-extern void arch_perfmon_setup_counters(void);
-
 #endif /* OP_X86_MODEL_H */
-- 
cgit v1.2.3


From 06552ccc36abeb12e37efc16c384dc7f30794f85 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 28 May 2009 02:12:36 +0200
Subject: x86/oprofile: minor style changes in struct op_x86_model_spec

Some vertical alignments. Variables are now located in the beginning
of the struct.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_x86_model.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index ed27783bb0d..bd8157d12ff 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -32,17 +32,17 @@ struct pt_regs;
  * various x86 CPU models' perfctr support.
  */
 struct op_x86_model_spec {
-	int (*init)(struct oprofile_operations *ops);
-	void (*exit)(void);
-	unsigned int num_counters;
-	unsigned int num_controls;
-	void (*fill_in_addresses)(struct op_msrs * const msrs);
-	void (*setup_ctrs)(struct op_msrs const * const msrs);
-	int (*check_ctrs)(struct pt_regs * const regs,
-		struct op_msrs const * const msrs);
-	void (*start)(struct op_msrs const * const msrs);
-	void (*stop)(struct op_msrs const * const msrs);
-	void (*shutdown)(struct op_msrs const * const msrs);
+	unsigned int	num_counters;
+	unsigned int	num_controls;
+	int		(*init)(struct oprofile_operations *ops);
+	void		(*exit)(void);
+	void		(*fill_in_addresses)(struct op_msrs * const msrs);
+	void		(*setup_ctrs)(struct op_msrs const * const msrs);
+	int		(*check_ctrs)(struct pt_regs * const regs,
+				      struct op_msrs const * const msrs);
+	void		(*start)(struct op_msrs const * const msrs);
+	void		(*stop)(struct op_msrs const * const msrs);
+	void		(*shutdown)(struct op_msrs const * const msrs);
 };
 
 extern struct op_x86_model_spec const op_ppro_spec;
-- 
cgit v1.2.3


From 9063759540daac40cc1f402f83a3be6b489f8583 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Tue, 10 Mar 2009 19:15:57 +0100
Subject: x86/oprofile: remove #ifdefs in ibs functions

IBS code is moved to separate functions. This allows the removal
of #ifdefs in functions.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 80 +++++++++++++++++++++++-----------------
 1 file changed, 46 insertions(+), 34 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 8fdf06e4edf..b54c0880b7d 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -220,6 +220,50 @@ op_amd_handle_ibs(struct pt_regs * const regs,
 	return 1;
 }
 
+static inline void op_amd_start_ibs(void)
+{
+	unsigned int low, high;
+	if (has_ibs && ibs_config.fetch_enabled) {
+		low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
+		high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */
+			+ IBS_FETCH_HIGH_ENABLE;
+		wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
+	}
+
+	if (has_ibs && ibs_config.op_enabled) {
+		low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF)
+			+ ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */
+			+ IBS_OP_LOW_ENABLE;
+		high = 0;
+		wrmsr(MSR_AMD64_IBSOPCTL, low, high);
+	}
+}
+
+static void op_amd_stop_ibs(void)
+{
+	unsigned int low, high;
+	if (has_ibs && ibs_config.fetch_enabled) {
+		/* clear max count and enable */
+		low = 0;
+		high = 0;
+		wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
+	}
+
+	if (has_ibs && ibs_config.op_enabled) {
+		/* clear max count and enable */
+		low = 0;
+		high = 0;
+		wrmsr(MSR_AMD64_IBSOPCTL, low, high);
+	}
+}
+
+#else
+
+static inline int op_amd_handle_ibs(struct pt_regs * const regs,
+				    struct op_msrs const * const msrs) { }
+static inline void op_amd_start_ibs(void) { }
+static inline void op_amd_stop_ibs(void) { }
+
 #endif
 
 static int op_amd_check_ctrs(struct pt_regs * const regs,
@@ -238,9 +282,7 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
 		}
 	}
 
-#ifdef CONFIG_OPROFILE_IBS
 	op_amd_handle_ibs(regs, msrs);
-#endif
 
 	/* See op_model_ppro.c */
 	return 1;
@@ -258,25 +300,9 @@ static void op_amd_start(struct op_msrs const * const msrs)
 		}
 	}
 
-#ifdef CONFIG_OPROFILE_IBS
-	if (has_ibs && ibs_config.fetch_enabled) {
-		low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
-		high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */
-			+ IBS_FETCH_HIGH_ENABLE;
-		wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
-	}
-
-	if (has_ibs && ibs_config.op_enabled) {
-		low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF)
-			+ ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */
-			+ IBS_OP_LOW_ENABLE;
-		high = 0;
-		wrmsr(MSR_AMD64_IBSOPCTL, low, high);
-	}
-#endif
+	op_amd_start_ibs();
 }
 
-
 static void op_amd_stop(struct op_msrs const * const msrs)
 {
 	unsigned int low, high;
@@ -294,21 +320,7 @@ static void op_amd_stop(struct op_msrs const * const msrs)
 		CTRL_WRITE(low, high, msrs, i);
 	}
 
-#ifdef CONFIG_OPROFILE_IBS
-	if (has_ibs && ibs_config.fetch_enabled) {
-		/* clear max count and enable */
-		low = 0;
-		high = 0;
-		wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
-	}
-
-	if (has_ibs && ibs_config.op_enabled) {
-		/* clear max count and enable */
-		low = 0;
-		high = 0;
-		wrmsr(MSR_AMD64_IBSOPCTL, low, high);
-	}
-#endif
+	op_amd_stop_ibs();
 }
 
 static void op_amd_shutdown(struct op_msrs const * const msrs)
-- 
cgit v1.2.3


From d20f24c66011f8a397bca6c5d1a6a7c7e612d2d7 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Sun, 11 Jan 2009 13:01:16 +0100
Subject: x86/oprofile: simplify AMD cpu init code

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index dd8515301fb..ae0ab03959b 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -460,27 +460,26 @@ int __init op_nmi_init(struct oprofile_operations *ops)
 		/* Needs to be at least an Athlon (or hammer in 32bit mode) */
 
 		switch (family) {
-		default:
-			return -ENODEV;
 		case 6:
-			model = &op_amd_spec;
 			cpu_type = "i386/athlon";
 			break;
 		case 0xf:
-			model = &op_amd_spec;
-			/* Actually it could be i386/hammer too, but give
-			 user space an consistent name. */
+			/*
+			 * Actually it could be i386/hammer too, but
+			 * give user space an consistent name.
+			 */
 			cpu_type = "x86-64/hammer";
 			break;
 		case 0x10:
-			model = &op_amd_spec;
 			cpu_type = "x86-64/family10";
 			break;
 		case 0x11:
-			model = &op_amd_spec;
 			cpu_type = "x86-64/family11h";
 			break;
+		default:
+			return -ENODEV;
 		}
+		model = &op_amd_spec;
 		break;
 
 	case X86_VENDOR_INTEL:
-- 
cgit v1.2.3


From ff9faa8b676e195476b86f03fe58db0f01bda8f3 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Fri, 22 May 2009 15:36:29 +0200
Subject: x86/oprofile: move common macros to op_x86_model.h

There are duplicate macro implementations in model specific code. This
patch moves all common macros to op_x86_model.h.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c  | 8 --------
 arch/x86/oprofile/op_model_p4.c   | 2 --
 arch/x86/oprofile/op_model_ppro.c | 8 --------
 arch/x86/oprofile/op_x86_model.h  | 9 +++++++++
 4 files changed, 9 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index b54c0880b7d..4b9254a67e6 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -26,22 +26,14 @@
 #define NUM_COUNTERS 4
 #define NUM_CONTROLS 4
 
-#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
 #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0)
 #define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0)
 #define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
 
-#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
 #define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
 #define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
-#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
-#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
 #define CTRL_CLEAR_LO(x) (x &= (1<<21))
 #define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0)
-#define CTRL_SET_ENABLE(val) (val |= 1<<20)
-#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
-#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
-#define CTRL_SET_UM(val, m) (val |= (m << 8))
 #define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff))
 #define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf))
 #define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9))
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 819b131fd75..420c15e7123 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -366,8 +366,6 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
 #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
 #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
 
-#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
-#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
 #define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0)
 #define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0)
 #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index ae581196688..a922a1a815c 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -26,19 +26,11 @@
 static int num_counters = 2;
 static int counter_width = 32;
 
-#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
 #define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1))))
 
-#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
 #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
 #define CTRL_WRITE(l, h, msrs, c) do {wrmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
-#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
-#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
 #define CTRL_CLEAR(x) (x &= (1<<21))
-#define CTRL_SET_ENABLE(val) (val |= 1<<20)
-#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
-#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
-#define CTRL_SET_UM(val, m) (val |= (m << 8))
 #define CTRL_SET_EVENT(val, e) (val |= e)
 
 static u64 *reset_value;
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index bd8157d12ff..c80ec7d0999 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -11,6 +11,15 @@
 #ifndef OP_X86_MODEL_H
 #define OP_X86_MODEL_H
 
+#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
+#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
+#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
+#define CTRL_SET_ENABLE(val) (val |= 1<<20)
+#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
+#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
+#define CTRL_SET_UM(val, m) (val |= (m << 8))
+#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
+
 struct op_saved_msr {
 	unsigned int high;
 	unsigned int low;
-- 
cgit v1.2.3


From d2731a4387ad6c6bca07abfe9ed41d450fb6d665 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Fri, 22 May 2009 19:47:38 +0200
Subject: x86/oprofile: remove MSR macros for AMD cpus

The macros CTRL_READ() and CTRL_WRITE() make the code hard to read and
maintain. This patch replaces them by rdmsr()/wrmsr() functions and
simplifies the code.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 4b9254a67e6..c6181c265ae 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -26,12 +26,7 @@
 #define NUM_COUNTERS 4
 #define NUM_CONTROLS 4
 
-#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0)
-#define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0)
 #define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
-
-#define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
-#define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
 #define CTRL_CLEAR_LO(x) (x &= (1<<21))
 #define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0)
 #define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff))
@@ -101,17 +96,17 @@ static void op_amd_setup_ctrs(struct op_msrs const * const msrs)
 	for (i = 0 ; i < NUM_CONTROLS; ++i) {
 		if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
 			continue;
-		CTRL_READ(low, high, msrs, i);
+		rdmsr(msrs->controls[i].addr, low, high);
 		CTRL_CLEAR_LO(low);
 		CTRL_CLEAR_HI(high);
-		CTRL_WRITE(low, high, msrs, i);
+		wrmsr(msrs->controls[i].addr, low, high);
 	}
 
 	/* avoid a false detection of ctr overflows in NMI handler */
 	for (i = 0; i < NUM_COUNTERS; ++i) {
 		if (unlikely(!CTR_IS_RESERVED(msrs, i)))
 			continue;
-		CTR_WRITE(1, msrs, i);
+		wrmsr(msrs->counters[i].addr, -1, -1);
 	}
 
 	/* enable active counters */
@@ -119,9 +114,9 @@ static void op_amd_setup_ctrs(struct op_msrs const * const msrs)
 		if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
 			reset_value[i] = counter_config[i].count;
 
-			CTR_WRITE(counter_config[i].count, msrs, i);
+			wrmsr(msrs->counters[i].addr, -(unsigned int)counter_config[i].count, -1);
 
-			CTRL_READ(low, high, msrs, i);
+			rdmsr(msrs->controls[i].addr, low, high);
 			CTRL_CLEAR_LO(low);
 			CTRL_CLEAR_HI(high);
 			CTRL_SET_ENABLE(low);
@@ -133,7 +128,7 @@ static void op_amd_setup_ctrs(struct op_msrs const * const msrs)
 			CTRL_SET_HOST_ONLY(high, 0);
 			CTRL_SET_GUEST_ONLY(high, 0);
 
-			CTRL_WRITE(low, high, msrs, i);
+			wrmsr(msrs->controls[i].addr, low, high);
 		} else {
 			reset_value[i] = 0;
 		}
@@ -267,10 +262,10 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
 	for (i = 0 ; i < NUM_COUNTERS; ++i) {
 		if (!reset_value[i])
 			continue;
-		CTR_READ(low, high, msrs, i);
+		rdmsr(msrs->counters[i].addr, low, high);
 		if (CTR_OVERFLOWED(low)) {
 			oprofile_add_sample(regs, i);
-			CTR_WRITE(reset_value[i], msrs, i);
+			wrmsr(msrs->counters[i].addr, -(unsigned int)reset_value[i], -1);
 		}
 	}
 
@@ -286,9 +281,9 @@ static void op_amd_start(struct op_msrs const * const msrs)
 	int i;
 	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
 		if (reset_value[i]) {
-			CTRL_READ(low, high, msrs, i);
+			rdmsr(msrs->controls[i].addr, low, high);
 			CTRL_SET_ACTIVE(low);
-			CTRL_WRITE(low, high, msrs, i);
+			wrmsr(msrs->controls[i].addr, low, high);
 		}
 	}
 
@@ -307,9 +302,9 @@ static void op_amd_stop(struct op_msrs const * const msrs)
 	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
 		if (!reset_value[i])
 			continue;
-		CTRL_READ(low, high, msrs, i);
+		rdmsr(msrs->controls[i].addr, low, high);
 		CTRL_SET_INACTIVE(low);
-		CTRL_WRITE(low, high, msrs, i);
+		wrmsr(msrs->controls[i].addr, low, high);
 	}
 
 	op_amd_stop_ibs();
-- 
cgit v1.2.3


From 74c9a5c341bb1f6cbb5095b07c77230f19682ce8 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Fri, 22 May 2009 19:47:38 +0200
Subject: x86/oprofile: remove MSR macros for ppro cpus

The macros CTRL_READ() and CTRL_WRITE() make the code hard to read and
maintain. This patch replaces them by rdmsr()/wrmsr() functions and
simplifies the code.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_ppro.c | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index a922a1a815c..6c5d288c566 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -27,9 +27,6 @@ static int num_counters = 2;
 static int counter_width = 32;
 
 #define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1))))
-
-#define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
-#define CTRL_WRITE(l, h, msrs, c) do {wrmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
 #define CTRL_CLEAR(x) (x &= (1<<21))
 #define CTRL_SET_EVENT(val, e) (val |= e)
 
@@ -88,9 +85,9 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
 	for (i = 0 ; i < num_counters; ++i) {
 		if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
 			continue;
-		CTRL_READ(low, high, msrs, i);
+		rdmsr(msrs->controls[i].addr, low, high);
 		CTRL_CLEAR(low);
-		CTRL_WRITE(low, high, msrs, i);
+		wrmsr(msrs->controls[i].addr, low, high);
 	}
 
 	/* avoid a false detection of ctr overflows in NMI handler */
@@ -107,14 +104,14 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
 
 			wrmsrl(msrs->counters[i].addr, -reset_value[i]);
 
-			CTRL_READ(low, high, msrs, i);
+			rdmsr(msrs->controls[i].addr, low, high);
 			CTRL_CLEAR(low);
 			CTRL_SET_ENABLE(low);
 			CTRL_SET_USR(low, counter_config[i].user);
 			CTRL_SET_KERN(low, counter_config[i].kernel);
 			CTRL_SET_UM(low, counter_config[i].unit_mask);
 			CTRL_SET_EVENT(low, counter_config[i].event);
-			CTRL_WRITE(low, high, msrs, i);
+			wrmsr(msrs->controls[i].addr, low, high);
 		} else {
 			reset_value[i] = 0;
 		}
@@ -162,9 +159,9 @@ static void ppro_start(struct op_msrs const * const msrs)
 		return;
 	for (i = 0; i < num_counters; ++i) {
 		if (reset_value[i]) {
-			CTRL_READ(low, high, msrs, i);
+			rdmsr(msrs->controls[i].addr, low, high);
 			CTRL_SET_ACTIVE(low);
-			CTRL_WRITE(low, high, msrs, i);
+			wrmsr(msrs->controls[i].addr, low, high);
 		}
 	}
 }
@@ -180,9 +177,9 @@ static void ppro_stop(struct op_msrs const * const msrs)
 	for (i = 0; i < num_counters; ++i) {
 		if (!reset_value[i])
 			continue;
-		CTRL_READ(low, high, msrs, i);
+		rdmsr(msrs->controls[i].addr, low, high);
 		CTRL_SET_INACTIVE(low);
-		CTRL_WRITE(low, high, msrs, i);
+		wrmsr(msrs->controls[i].addr, low, high);
 	}
 }
 
-- 
cgit v1.2.3


From 1131a478245b00664ae2dbc0f68db987b51fa806 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Mon, 25 May 2009 20:23:23 +0200
Subject: x86/oprofile: remove MSR macros for p4 cpus

The macros CTRL_READ() and CTRL_WRITE() make the code hard to read and
maintain. This patch replaces them by rdmsr()/wrmsr() functions and
simplifies the code.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_p4.c | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 420c15e7123..365d8a9c03d 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -350,8 +350,6 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
 #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
 #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
 #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
-#define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
-#define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
 
 #define CCCR_RESERVED_BITS 0x38030FFF
 #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
@@ -361,13 +359,9 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
 #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
 #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
 #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
-#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
-#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
 #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
 #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
 
-#define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0)
-#define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0)
 #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
 
 
@@ -513,7 +507,7 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
 		if (ev->bindings[i].virt_counter & counter_bit) {
 
 			/* modify ESCR */
-			ESCR_READ(escr, high, ev, i);
+			rdmsr(ev->bindings[i].escr_address, escr, high);
 			ESCR_CLEAR(escr);
 			if (stag == 0) {
 				ESCR_SET_USR_0(escr, counter_config[ctr].user);
@@ -524,10 +518,11 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
 			}
 			ESCR_SET_EVENT_SELECT(escr, ev->event_select);
 			ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
-			ESCR_WRITE(escr, high, ev, i);
+			wrmsr(ev->bindings[i].escr_address, escr, high);
 
 			/* modify CCCR */
-			CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
+			rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
+			      cccr, high);
 			CCCR_CLEAR(cccr);
 			CCCR_SET_REQUIRED_BITS(cccr);
 			CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
@@ -535,7 +530,8 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
 				CCCR_SET_PMI_OVF_0(cccr);
 			else
 				CCCR_SET_PMI_OVF_1(cccr);
-			CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
+			wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
+			      cccr, high);
 			return;
 		}
 	}
@@ -582,7 +578,8 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs)
 		if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) {
 			reset_value[i] = counter_config[i].count;
 			pmc_setup_one_p4_counter(i);
-			CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
+			wrmsr(p4_counters[VIRT_CTR(stag, i)].counter_address,
+			      -(u32)counter_config[i].count, -1);
 		} else {
 			reset_value[i] = 0;
 		}
@@ -622,14 +619,16 @@ static int p4_check_ctrs(struct pt_regs * const regs,
 
 		real = VIRT_CTR(stag, i);
 
-		CCCR_READ(low, high, real);
-		CTR_READ(ctr, high, real);
+		rdmsr(p4_counters[real].cccr_address, low, high);
+		rdmsr(p4_counters[real].counter_address, ctr, high);
 		if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
 			oprofile_add_sample(regs, i);
-			CTR_WRITE(reset_value[i], real);
+			wrmsr(p4_counters[real].counter_address,
+			      -(u32)reset_value[i], -1);
 			CCCR_CLEAR_OVF(low);
-			CCCR_WRITE(low, high, real);
-			CTR_WRITE(reset_value[i], real);
+			wrmsr(p4_counters[real].cccr_address, low, high);
+			wrmsr(p4_counters[real].counter_address,
+			      -(u32)reset_value[i], -1);
 		}
 	}
 
@@ -651,9 +650,9 @@ static void p4_start(struct op_msrs const * const msrs)
 	for (i = 0; i < num_counters; ++i) {
 		if (!reset_value[i])
 			continue;
-		CCCR_READ(low, high, VIRT_CTR(stag, i));
+		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 		CCCR_SET_ENABLE(low);
-		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 	}
 }
 
@@ -668,9 +667,9 @@ static void p4_stop(struct op_msrs const * const msrs)
 	for (i = 0; i < num_counters; ++i) {
 		if (!reset_value[i])
 			continue;
-		CCCR_READ(low, high, VIRT_CTR(stag, i));
+		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 		CCCR_SET_DISABLE(low);
-		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 	}
 }
 
-- 
cgit v1.2.3


From ec064c093e254f4433afb17dcef7f964c76436af Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Mon, 25 May 2009 15:05:50 +0200
Subject: x86/oprofile: fix and cleanup CTRL_SET_* macros

This patch fixes missing braces around macro parameters. Macro
definitions from intel_arch_perfmon.h are used where possible.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_ppro.c |  1 -
 arch/x86/oprofile/op_x86_model.h  | 18 ++++++++++--------
 2 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 6c5d288c566..61ee8f64053 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -18,7 +18,6 @@
 #include <asm/msr.h>
 #include <asm/apic.h>
 #include <asm/nmi.h>
-#include <asm/intel_arch_perfmon.h>
 
 #include "op_x86_model.h"
 #include "op_counter.h"
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index c80ec7d0999..a207b1c46e2 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -11,14 +11,16 @@
 #ifndef OP_X86_MODEL_H
 #define OP_X86_MODEL_H
 
-#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
-#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
-#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
-#define CTRL_SET_ENABLE(val) (val |= 1<<20)
-#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
-#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
-#define CTRL_SET_UM(val, m) (val |= (m << 8))
-#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
+#include <asm/intel_arch_perfmon.h>
+
+#define CTR_IS_RESERVED(msrs, c)	((msrs)->counters[(c)].addr ? 1 : 0)
+#define CTRL_IS_RESERVED(msrs, c)	((msrs)->controls[(c)].addr ? 1 : 0)
+#define CTRL_SET_ACTIVE(val)		((val) |= ARCH_PERFMON_EVENTSEL0_ENABLE)
+#define CTRL_SET_ENABLE(val)		((val) |= ARCH_PERFMON_EVENTSEL_INT)
+#define CTRL_SET_INACTIVE(val)		((val) &= ~ARCH_PERFMON_EVENTSEL0_ENABLE)
+#define CTRL_SET_KERN(val, k)		((val) |= ((k) ? ARCH_PERFMON_EVENTSEL_OS : 0))
+#define CTRL_SET_USR(val, u)		((val) |= ((u) ? ARCH_PERFMON_EVENTSEL_USR : 0))
+#define CTRL_SET_UM(val, m)		((val) |= ((m) << 8))
 
 struct op_saved_msr {
 	unsigned int high;
-- 
cgit v1.2.3


From 9c59354b48ce9cf28048b02fea73dd0236f876ea Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Mon, 25 May 2009 18:16:43 +0200
Subject: x86/oprofile: remove unused macros for AMD virtualization profiling

The use of the macros has no effect. The oprofilefs has to be extended
first to support these features.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index c6181c265ae..aaa7ffaed6b 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -31,8 +31,6 @@
 #define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0)
 #define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff))
 #define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf))
-#define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9))
-#define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8))
 
 static unsigned long reset_value[NUM_COUNTERS];
 
@@ -125,9 +123,6 @@ static void op_amd_setup_ctrs(struct op_msrs const * const msrs)
 			CTRL_SET_UM(low, counter_config[i].unit_mask);
 			CTRL_SET_EVENT_LOW(low, counter_config[i].event);
 			CTRL_SET_EVENT_HIGH(high, counter_config[i].event);
-			CTRL_SET_HOST_ONLY(high, 0);
-			CTRL_SET_GUEST_ONLY(high, 0);
-
 			wrmsr(msrs->controls[i].addr, low, high);
 		} else {
 			reset_value[i] = 0;
-- 
cgit v1.2.3


From ef8828ddf828174785421af67c281144d4b8e796 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Mon, 25 May 2009 19:31:44 +0200
Subject: x86/oprofile: pass the model to setup_ctrs() functions

In follow-on patches the setup_ctrs() functions will need data that
describes the model. This patch extends the function argument list to
pass a pointer of the model to these function.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c       | 2 +-
 arch/x86/oprofile/op_model_amd.c  | 3 ++-
 arch/x86/oprofile/op_model_p4.c   | 3 ++-
 arch/x86/oprofile/op_model_ppro.c | 3 ++-
 arch/x86/oprofile/op_x86_model.h  | 3 ++-
 5 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index ae0ab03959b..c31f87bbf43 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -125,7 +125,7 @@ static void nmi_cpu_setup(void *dummy)
 	int cpu = smp_processor_id();
 	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
 	spin_lock(&oprofilefs_lock);
-	model->setup_ctrs(msrs);
+	model->setup_ctrs(model, msrs);
 	spin_unlock(&oprofilefs_lock);
 	per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index aaa7ffaed6b..86e0a01ba12 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -85,7 +85,8 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
 }
 
 
-static void op_amd_setup_ctrs(struct op_msrs const * const msrs)
+static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
+			      struct op_msrs const * const msrs)
 {
 	unsigned int low, high;
 	int i;
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 365d8a9c03d..05ba0287b1f 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -542,7 +542,8 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
 }
 
 
-static void p4_setup_ctrs(struct op_msrs const * const msrs)
+static void p4_setup_ctrs(struct op_x86_model_spec const *model,
+			  struct op_msrs const * const msrs)
 {
 	unsigned int i;
 	unsigned int low, high;
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 61ee8f64053..40b44ee521d 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -51,7 +51,8 @@ static void ppro_fill_in_addresses(struct op_msrs * const msrs)
 }
 
 
-static void ppro_setup_ctrs(struct op_msrs const * const msrs)
+static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
+			    struct op_msrs const * const msrs)
 {
 	unsigned int low, high;
 	int i;
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index a207b1c46e2..6161c7f0e7f 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -48,7 +48,8 @@ struct op_x86_model_spec {
 	int		(*init)(struct oprofile_operations *ops);
 	void		(*exit)(void);
 	void		(*fill_in_addresses)(struct op_msrs * const msrs);
-	void		(*setup_ctrs)(struct op_msrs const * const msrs);
+	void		(*setup_ctrs)(struct op_x86_model_spec const *model,
+				      struct op_msrs const * const msrs);
 	int		(*check_ctrs)(struct pt_regs * const regs,
 				      struct op_msrs const * const msrs);
 	void		(*start)(struct op_msrs const * const msrs);
-- 
cgit v1.2.3


From 3370d358569755625aba4d9a846a040ce691d9ed Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Mon, 25 May 2009 15:10:32 +0200
Subject: x86/oprofile: replace macros to calculate control register

This patch introduces op_x86_get_ctrl() to calculate the value of the
performance control register. This is generic code usable for all
models. The event and reserved masks are model specific and stored in
struct op_x86_model_spec. 64 bit MSR functions are used now. The patch
removes many hard to read macros used for ctrl calculation.

The function op_x86_get_ctrl() is common code and the first step to
further merge performance counter implementations for x86 models.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c       | 20 +++++++++++++++++++
 arch/x86/oprofile/op_model_amd.c  | 41 +++++++++++++++------------------------
 arch/x86/oprofile/op_model_ppro.c | 29 +++++++++++++--------------
 arch/x86/oprofile/op_x86_model.h  | 15 ++++++++++----
 4 files changed, 60 insertions(+), 45 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index c31f87bbf43..388ee15e0e4 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -31,6 +31,26 @@ static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
 /* 0 == registered but off, 1 == registered and on */
 static int nmi_enabled = 0;
 
+/* common functions */
+
+u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
+		    struct op_counter_config *counter_config)
+{
+	u64 val = 0;
+	u16 event = (u16)counter_config->event;
+
+	val |= ARCH_PERFMON_EVENTSEL_INT;
+	val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0;
+	val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0;
+	val |= (counter_config->unit_mask & 0xFF) << 8;
+	event &= model->event_mask ? model->event_mask : 0xFF;
+	val |= event & 0xFF;
+	val |= (event & 0x0F00) << 24;
+
+	return val;
+}
+
+
 static int profile_exceptions_notify(struct notifier_block *self,
 				     unsigned long val, void *data)
 {
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 86e0a01ba12..2406ab86360 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -25,12 +25,11 @@
 
 #define NUM_COUNTERS 4
 #define NUM_CONTROLS 4
+#define OP_EVENT_MASK			0x0FFF
+
+#define MSR_AMD_EVENTSEL_RESERVED	((0xFFFFFCF0ULL<<32)|(1ULL<<21))
 
 #define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
-#define CTRL_CLEAR_LO(x) (x &= (1<<21))
-#define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0)
-#define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff))
-#define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf))
 
 static unsigned long reset_value[NUM_COUNTERS];
 
@@ -84,21 +83,19 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
 	}
 }
 
-
 static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 			      struct op_msrs const * const msrs)
 {
-	unsigned int low, high;
+	u64 val;
 	int i;
 
 	/* clear all counters */
 	for (i = 0 ; i < NUM_CONTROLS; ++i) {
 		if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
 			continue;
-		rdmsr(msrs->controls[i].addr, low, high);
-		CTRL_CLEAR_LO(low);
-		CTRL_CLEAR_HI(high);
-		wrmsr(msrs->controls[i].addr, low, high);
+		rdmsrl(msrs->controls[i].addr, val);
+		val &= model->reserved;
+		wrmsrl(msrs->controls[i].addr, val);
 	}
 
 	/* avoid a false detection of ctr overflows in NMI handler */
@@ -112,19 +109,11 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 	for (i = 0; i < NUM_COUNTERS; ++i) {
 		if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
 			reset_value[i] = counter_config[i].count;
-
 			wrmsr(msrs->counters[i].addr, -(unsigned int)counter_config[i].count, -1);
-
-			rdmsr(msrs->controls[i].addr, low, high);
-			CTRL_CLEAR_LO(low);
-			CTRL_CLEAR_HI(high);
-			CTRL_SET_ENABLE(low);
-			CTRL_SET_USR(low, counter_config[i].user);
-			CTRL_SET_KERN(low, counter_config[i].kernel);
-			CTRL_SET_UM(low, counter_config[i].unit_mask);
-			CTRL_SET_EVENT_LOW(low, counter_config[i].event);
-			CTRL_SET_EVENT_HIGH(high, counter_config[i].event);
-			wrmsr(msrs->controls[i].addr, low, high);
+			rdmsrl(msrs->controls[i].addr, val);
+			val &= model->reserved;
+			val |= op_x86_get_ctrl(model, &counter_config[i]);
+			wrmsrl(msrs->controls[i].addr, val);
 		} else {
 			reset_value[i] = 0;
 		}
@@ -486,14 +475,16 @@ static void op_amd_exit(void) {}
 #endif /* CONFIG_OPROFILE_IBS */
 
 struct op_x86_model_spec const op_amd_spec = {
-	.init			= op_amd_init,
-	.exit			= op_amd_exit,
 	.num_counters		= NUM_COUNTERS,
 	.num_controls		= NUM_CONTROLS,
+	.reserved		= MSR_AMD_EVENTSEL_RESERVED,
+	.event_mask		= OP_EVENT_MASK,
+	.init			= op_amd_init,
+	.exit			= op_amd_exit,
 	.fill_in_addresses	= &op_amd_fill_in_addresses,
 	.setup_ctrs		= &op_amd_setup_ctrs,
 	.check_ctrs		= &op_amd_check_ctrs,
 	.start			= &op_amd_start,
 	.stop			= &op_amd_stop,
-	.shutdown		= &op_amd_shutdown
+	.shutdown		= &op_amd_shutdown,
 };
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 40b44ee521d..3092f998baf 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -10,6 +10,7 @@
  * @author Philippe Elie
  * @author Graydon Hoare
  * @author Andi Kleen
+ * @author Robert Richter <robert.richter@amd.com>
  */
 
 #include <linux/oprofile.h>
@@ -26,8 +27,8 @@ static int num_counters = 2;
 static int counter_width = 32;
 
 #define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1))))
-#define CTRL_CLEAR(x) (x &= (1<<21))
-#define CTRL_SET_EVENT(val, e) (val |= e)
+
+#define MSR_PPRO_EVENTSEL_RESERVED	((0xFFFFFFFFULL<<32)|(1ULL<<21))
 
 static u64 *reset_value;
 
@@ -54,7 +55,7 @@ static void ppro_fill_in_addresses(struct op_msrs * const msrs)
 static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
 			    struct op_msrs const * const msrs)
 {
-	unsigned int low, high;
+	u64 val;
 	int i;
 
 	if (!reset_value) {
@@ -85,9 +86,9 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
 	for (i = 0 ; i < num_counters; ++i) {
 		if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
 			continue;
-		rdmsr(msrs->controls[i].addr, low, high);
-		CTRL_CLEAR(low);
-		wrmsr(msrs->controls[i].addr, low, high);
+		rdmsrl(msrs->controls[i].addr, val);
+		val &= model->reserved;
+		wrmsrl(msrs->controls[i].addr, val);
 	}
 
 	/* avoid a false detection of ctr overflows in NMI handler */
@@ -101,17 +102,11 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
 	for (i = 0; i < num_counters; ++i) {
 		if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
 			reset_value[i] = counter_config[i].count;
-
 			wrmsrl(msrs->counters[i].addr, -reset_value[i]);
-
-			rdmsr(msrs->controls[i].addr, low, high);
-			CTRL_CLEAR(low);
-			CTRL_SET_ENABLE(low);
-			CTRL_SET_USR(low, counter_config[i].user);
-			CTRL_SET_KERN(low, counter_config[i].kernel);
-			CTRL_SET_UM(low, counter_config[i].unit_mask);
-			CTRL_SET_EVENT(low, counter_config[i].event);
-			wrmsr(msrs->controls[i].addr, low, high);
+			rdmsrl(msrs->controls[i].addr, val);
+			val &= model->reserved;
+			val |= op_x86_get_ctrl(model, &counter_config[i]);
+			wrmsrl(msrs->controls[i].addr, val);
 		} else {
 			reset_value[i] = 0;
 		}
@@ -205,6 +200,7 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
 struct op_x86_model_spec const op_ppro_spec = {
 	.num_counters		= 2,
 	.num_controls		= 2,
+	.reserved		= MSR_PPRO_EVENTSEL_RESERVED,
 	.fill_in_addresses	= &ppro_fill_in_addresses,
 	.setup_ctrs		= &ppro_setup_ctrs,
 	.check_ctrs		= &ppro_check_ctrs,
@@ -249,6 +245,7 @@ static int arch_perfmon_init(struct oprofile_operations *ignore)
 }
 
 struct op_x86_model_spec op_arch_perfmon_spec = {
+	.reserved		= MSR_PPRO_EVENTSEL_RESERVED,
 	.init			= &arch_perfmon_init,
 	/* num_counters/num_controls filled in at runtime */
 	.fill_in_addresses	= &ppro_fill_in_addresses,
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 6161c7f0e7f..3220d4ce632 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -6,21 +6,19 @@
  * @remark Read the file COPYING
  *
  * @author Graydon Hoare
+ * @author Robert Richter <robert.richter@amd.com>
  */
 
 #ifndef OP_X86_MODEL_H
 #define OP_X86_MODEL_H
 
+#include <asm/types.h>
 #include <asm/intel_arch_perfmon.h>
 
 #define CTR_IS_RESERVED(msrs, c)	((msrs)->counters[(c)].addr ? 1 : 0)
 #define CTRL_IS_RESERVED(msrs, c)	((msrs)->controls[(c)].addr ? 1 : 0)
 #define CTRL_SET_ACTIVE(val)		((val) |= ARCH_PERFMON_EVENTSEL0_ENABLE)
-#define CTRL_SET_ENABLE(val)		((val) |= ARCH_PERFMON_EVENTSEL_INT)
 #define CTRL_SET_INACTIVE(val)		((val) &= ~ARCH_PERFMON_EVENTSEL0_ENABLE)
-#define CTRL_SET_KERN(val, k)		((val) |= ((k) ? ARCH_PERFMON_EVENTSEL_OS : 0))
-#define CTRL_SET_USR(val, u)		((val) |= ((u) ? ARCH_PERFMON_EVENTSEL_USR : 0))
-#define CTRL_SET_UM(val, m)		((val) |= ((m) << 8))
 
 struct op_saved_msr {
 	unsigned int high;
@@ -39,12 +37,16 @@ struct op_msrs {
 
 struct pt_regs;
 
+struct oprofile_operations;
+
 /* The model vtable abstracts the differences between
  * various x86 CPU models' perfctr support.
  */
 struct op_x86_model_spec {
 	unsigned int	num_counters;
 	unsigned int	num_controls;
+	u64		reserved;
+	u16		event_mask;
 	int		(*init)(struct oprofile_operations *ops);
 	void		(*exit)(void);
 	void		(*fill_in_addresses)(struct op_msrs * const msrs);
@@ -57,6 +59,11 @@ struct op_x86_model_spec {
 	void		(*shutdown)(struct op_msrs const * const msrs);
 };
 
+struct op_counter_config;
+
+extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
+			   struct op_counter_config *counter_config);
+
 extern struct op_x86_model_spec const op_ppro_spec;
 extern struct op_x86_model_spec const op_p4_spec;
 extern struct op_x86_model_spec const op_p4_ht2_spec;
-- 
cgit v1.2.3


From 42399adb239d4f1413899cc618ecf640779e79df Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Mon, 25 May 2009 17:59:06 +0200
Subject: x86/oprofile: replace CTR_OVERFLOWED macros

The patch replaces all CTR_OVERFLOWED macros. 64 bit MSR functions and
64 bit counter values are used now. Thus, it will be easier to later
extend the models to use more than 32 bit width counters.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c  | 16 ++++++++--------
 arch/x86/oprofile/op_model_p4.c   |  6 +++---
 arch/x86/oprofile/op_model_ppro.c | 10 ++++------
 3 files changed, 15 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 2406ab86360..b5d678fbf03 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -26,11 +26,10 @@
 #define NUM_COUNTERS 4
 #define NUM_CONTROLS 4
 #define OP_EVENT_MASK			0x0FFF
+#define OP_CTR_OVERFLOW			(1ULL<<31)
 
 #define MSR_AMD_EVENTSEL_RESERVED	((0xFFFFFCF0ULL<<32)|(1ULL<<21))
 
-#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
-
 static unsigned long reset_value[NUM_COUNTERS];
 
 #ifdef CONFIG_OPROFILE_IBS
@@ -241,17 +240,18 @@ static inline void op_amd_stop_ibs(void) { }
 static int op_amd_check_ctrs(struct pt_regs * const regs,
 			     struct op_msrs const * const msrs)
 {
-	unsigned int low, high;
+	u64 val;
 	int i;
 
 	for (i = 0 ; i < NUM_COUNTERS; ++i) {
 		if (!reset_value[i])
 			continue;
-		rdmsr(msrs->counters[i].addr, low, high);
-		if (CTR_OVERFLOWED(low)) {
-			oprofile_add_sample(regs, i);
-			wrmsr(msrs->counters[i].addr, -(unsigned int)reset_value[i], -1);
-		}
+		rdmsrl(msrs->counters[i].addr, val);
+		/* bit is clear if overflowed: */
+		if (val & OP_CTR_OVERFLOW)
+			continue;
+		oprofile_add_sample(regs, i);
+		wrmsr(msrs->counters[i].addr, -(unsigned int)reset_value[i], -1);
 	}
 
 	op_amd_handle_ibs(regs, msrs);
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 05ba0287b1f..ac4ca28b9ed 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -32,6 +32,8 @@
 #define NUM_CCCRS_HT2 9
 #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
 
+#define OP_CTR_OVERFLOW			(1ULL<<31)
+
 static unsigned int num_counters = NUM_COUNTERS_NON_HT;
 static unsigned int num_controls = NUM_CONTROLS_NON_HT;
 
@@ -362,8 +364,6 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
 #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
 #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
 
-#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
-
 
 /* this assigns a "stagger" to the current CPU, which is used throughout
    the code in this module as an extra array offset, to select the "even"
@@ -622,7 +622,7 @@ static int p4_check_ctrs(struct pt_regs * const regs,
 
 		rdmsr(p4_counters[real].cccr_address, low, high);
 		rdmsr(p4_counters[real].counter_address, ctr, high);
-		if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
+		if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) {
 			oprofile_add_sample(regs, i);
 			wrmsr(p4_counters[real].counter_address,
 			      -(u32)reset_value[i], -1);
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 3092f998baf..82db396dc3e 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -26,8 +26,6 @@
 static int num_counters = 2;
 static int counter_width = 32;
 
-#define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1))))
-
 #define MSR_PPRO_EVENTSEL_RESERVED	((0xFFFFFFFFULL<<32)|(1ULL<<21))
 
 static u64 *reset_value;
@@ -124,10 +122,10 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
 		if (!reset_value[i])
 			continue;
 		rdmsrl(msrs->counters[i].addr, val);
-		if (CTR_OVERFLOWED(val)) {
-			oprofile_add_sample(regs, i);
-			wrmsrl(msrs->counters[i].addr, -reset_value[i]);
-		}
+		if (val & (1ULL << (counter_width - 1)))
+			continue;
+		oprofile_add_sample(regs, i);
+		wrmsrl(msrs->counters[i].addr, -reset_value[i]);
 	}
 
 	/* Only P6 based Pentium M need to re-unmask the apic vector but it
-- 
cgit v1.2.3


From dea3766ca052a4f572b16a23a322553c064d75af Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Mon, 25 May 2009 18:11:52 +0200
Subject: x86/oprofile: replace CTRL_SET_*ACTIVE macros

The patch replaces all CTRL_SET_*ACTIVE macros. 64 bit MSR functions
and 64 bit counter values are used now. The code uses bit masks from
<asm/intel_arch_perfmon.h>.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c  | 16 ++++++++--------
 arch/x86/oprofile/op_model_ppro.c | 16 ++++++++--------
 arch/x86/oprofile/op_x86_model.h  |  2 --
 3 files changed, 16 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index b5d678fbf03..4ac9d283e8d 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -262,13 +262,13 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
 
 static void op_amd_start(struct op_msrs const * const msrs)
 {
-	unsigned int low, high;
+	u64 val;
 	int i;
 	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
 		if (reset_value[i]) {
-			rdmsr(msrs->controls[i].addr, low, high);
-			CTRL_SET_ACTIVE(low);
-			wrmsr(msrs->controls[i].addr, low, high);
+			rdmsrl(msrs->controls[i].addr, val);
+			val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+			wrmsrl(msrs->controls[i].addr, val);
 		}
 	}
 
@@ -277,7 +277,7 @@ static void op_amd_start(struct op_msrs const * const msrs)
 
 static void op_amd_stop(struct op_msrs const * const msrs)
 {
-	unsigned int low, high;
+	u64 val;
 	int i;
 
 	/*
@@ -287,9 +287,9 @@ static void op_amd_stop(struct op_msrs const * const msrs)
 	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
 		if (!reset_value[i])
 			continue;
-		rdmsr(msrs->controls[i].addr, low, high);
-		CTRL_SET_INACTIVE(low);
-		wrmsr(msrs->controls[i].addr, low, high);
+		rdmsrl(msrs->controls[i].addr, val);
+		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+		wrmsrl(msrs->controls[i].addr, val);
 	}
 
 	op_amd_stop_ibs();
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 82db396dc3e..566b43f0b6c 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -145,16 +145,16 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
 
 static void ppro_start(struct op_msrs const * const msrs)
 {
-	unsigned int low, high;
+	u64 val;
 	int i;
 
 	if (!reset_value)
 		return;
 	for (i = 0; i < num_counters; ++i) {
 		if (reset_value[i]) {
-			rdmsr(msrs->controls[i].addr, low, high);
-			CTRL_SET_ACTIVE(low);
-			wrmsr(msrs->controls[i].addr, low, high);
+			rdmsrl(msrs->controls[i].addr, val);
+			val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+			wrmsrl(msrs->controls[i].addr, val);
 		}
 	}
 }
@@ -162,7 +162,7 @@ static void ppro_start(struct op_msrs const * const msrs)
 
 static void ppro_stop(struct op_msrs const * const msrs)
 {
-	unsigned int low, high;
+	u64 val;
 	int i;
 
 	if (!reset_value)
@@ -170,9 +170,9 @@ static void ppro_stop(struct op_msrs const * const msrs)
 	for (i = 0; i < num_counters; ++i) {
 		if (!reset_value[i])
 			continue;
-		rdmsr(msrs->controls[i].addr, low, high);
-		CTRL_SET_INACTIVE(low);
-		wrmsr(msrs->controls[i].addr, low, high);
+		rdmsrl(msrs->controls[i].addr, val);
+		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+		wrmsrl(msrs->controls[i].addr, val);
 	}
 }
 
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 3220d4ce632..1c4577795a9 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -17,8 +17,6 @@
 
 #define CTR_IS_RESERVED(msrs, c)	((msrs)->counters[(c)].addr ? 1 : 0)
 #define CTRL_IS_RESERVED(msrs, c)	((msrs)->controls[(c)].addr ? 1 : 0)
-#define CTRL_SET_ACTIVE(val)		((val) |= ARCH_PERFMON_EVENTSEL0_ENABLE)
-#define CTRL_SET_INACTIVE(val)		((val) &= ~ARCH_PERFMON_EVENTSEL0_ENABLE)
 
 struct op_saved_msr {
 	unsigned int high;
-- 
cgit v1.2.3


From 217d3cfb959756cb493fc03106c0253baa420ce8 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 4 Jun 2009 02:36:44 +0200
Subject: x86/oprofile: replace CTR*_IS_RESERVED macros

The patch replaces all CTR*_IS_RESERVED macros.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c  | 10 +++++-----
 arch/x86/oprofile/op_model_p4.c   | 10 +++++-----
 arch/x86/oprofile/op_model_ppro.c | 10 +++++-----
 arch/x86/oprofile/op_x86_model.h  |  3 ---
 4 files changed, 15 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 4ac9d283e8d..c5c5eec2fa7 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -90,7 +90,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 
 	/* clear all counters */
 	for (i = 0 ; i < NUM_CONTROLS; ++i) {
-		if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
+		if (unlikely(!msrs->controls[i].addr))
 			continue;
 		rdmsrl(msrs->controls[i].addr, val);
 		val &= model->reserved;
@@ -99,14 +99,14 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 
 	/* avoid a false detection of ctr overflows in NMI handler */
 	for (i = 0; i < NUM_COUNTERS; ++i) {
-		if (unlikely(!CTR_IS_RESERVED(msrs, i)))
+		if (unlikely(!msrs->counters[i].addr))
 			continue;
 		wrmsr(msrs->counters[i].addr, -1, -1);
 	}
 
 	/* enable active counters */
 	for (i = 0; i < NUM_COUNTERS; ++i) {
-		if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
+		if (counter_config[i].enabled && msrs->counters[i].addr) {
 			reset_value[i] = counter_config[i].count;
 			wrmsr(msrs->counters[i].addr, -(unsigned int)counter_config[i].count, -1);
 			rdmsrl(msrs->controls[i].addr, val);
@@ -300,11 +300,11 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
 	int i;
 
 	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
-		if (CTR_IS_RESERVED(msrs, i))
+		if (msrs->counters[i].addr)
 			release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
 	}
 	for (i = 0 ; i < NUM_CONTROLS ; ++i) {
-		if (CTRL_IS_RESERVED(msrs, i))
+		if (msrs->controls[i].addr)
 			release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
 	}
 }
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index ac4ca28b9ed..9db0ca9af76 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -559,7 +559,7 @@ static void p4_setup_ctrs(struct op_x86_model_spec const *model,
 
 	/* clear the cccrs we will use */
 	for (i = 0 ; i < num_counters ; i++) {
-		if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
+		if (unlikely(!msrs->controls[i].addr))
 			continue;
 		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 		CCCR_CLEAR(low);
@@ -569,14 +569,14 @@ static void p4_setup_ctrs(struct op_x86_model_spec const *model,
 
 	/* clear all escrs (including those outside our concern) */
 	for (i = num_counters; i < num_controls; i++) {
-		if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
+		if (unlikely(!msrs->controls[i].addr))
 			continue;
 		wrmsr(msrs->controls[i].addr, 0, 0);
 	}
 
 	/* setup all counters */
 	for (i = 0 ; i < num_counters ; ++i) {
-		if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) {
+		if (counter_config[i].enabled && msrs->controls[i].addr) {
 			reset_value[i] = counter_config[i].count;
 			pmc_setup_one_p4_counter(i);
 			wrmsr(p4_counters[VIRT_CTR(stag, i)].counter_address,
@@ -679,7 +679,7 @@ static void p4_shutdown(struct op_msrs const * const msrs)
 	int i;
 
 	for (i = 0 ; i < num_counters ; ++i) {
-		if (CTR_IS_RESERVED(msrs, i))
+		if (msrs->counters[i].addr)
 			release_perfctr_nmi(msrs->counters[i].addr);
 	}
 	/*
@@ -688,7 +688,7 @@ static void p4_shutdown(struct op_msrs const * const msrs)
 	 * This saves a few bits.
 	 */
 	for (i = num_counters ; i < num_controls ; ++i) {
-		if (CTRL_IS_RESERVED(msrs, i))
+		if (msrs->controls[i].addr)
 			release_evntsel_nmi(msrs->controls[i].addr);
 	}
 }
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 566b43f0b6c..0a261a5c696 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -82,7 +82,7 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
 
 	/* clear all counters */
 	for (i = 0 ; i < num_counters; ++i) {
-		if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
+		if (unlikely(!msrs->controls[i].addr))
 			continue;
 		rdmsrl(msrs->controls[i].addr, val);
 		val &= model->reserved;
@@ -91,14 +91,14 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
 
 	/* avoid a false detection of ctr overflows in NMI handler */
 	for (i = 0; i < num_counters; ++i) {
-		if (unlikely(!CTR_IS_RESERVED(msrs, i)))
+		if (unlikely(!msrs->counters[i].addr))
 			continue;
 		wrmsrl(msrs->counters[i].addr, -1LL);
 	}
 
 	/* enable active counters */
 	for (i = 0; i < num_counters; ++i) {
-		if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
+		if (counter_config[i].enabled && msrs->counters[i].addr) {
 			reset_value[i] = counter_config[i].count;
 			wrmsrl(msrs->counters[i].addr, -reset_value[i]);
 			rdmsrl(msrs->controls[i].addr, val);
@@ -181,11 +181,11 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
 	int i;
 
 	for (i = 0 ; i < num_counters ; ++i) {
-		if (CTR_IS_RESERVED(msrs, i))
+		if (msrs->counters[i].addr)
 			release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
 	}
 	for (i = 0 ; i < num_counters ; ++i) {
-		if (CTRL_IS_RESERVED(msrs, i))
+		if (msrs->controls[i].addr)
 			release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
 	}
 	if (reset_value) {
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 1c4577795a9..69f1eb46e1b 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -15,9 +15,6 @@
 #include <asm/types.h>
 #include <asm/intel_arch_perfmon.h>
 
-#define CTR_IS_RESERVED(msrs, c)	((msrs)->counters[(c)].addr ? 1 : 0)
-#define CTRL_IS_RESERVED(msrs, c)	((msrs)->controls[(c)].addr ? 1 : 0)
-
 struct op_saved_msr {
 	unsigned int high;
 	unsigned int low;
-- 
cgit v1.2.3


From bbc5986d2db427fdd61b6116ff8b9ed988e663a8 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Mon, 25 May 2009 17:38:19 +0200
Subject: x86/oprofile: use 64 bit wrmsr functions

This patch replaces some wrmsr() functions with wrmsrl().

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c |  7 ++++---
 arch/x86/oprofile/op_model_p4.c  | 12 ++++++------
 2 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index c5c5eec2fa7..9bf90176241 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -101,14 +101,15 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 	for (i = 0; i < NUM_COUNTERS; ++i) {
 		if (unlikely(!msrs->counters[i].addr))
 			continue;
-		wrmsr(msrs->counters[i].addr, -1, -1);
+		wrmsrl(msrs->counters[i].addr, -1LL);
 	}
 
 	/* enable active counters */
 	for (i = 0; i < NUM_COUNTERS; ++i) {
 		if (counter_config[i].enabled && msrs->counters[i].addr) {
 			reset_value[i] = counter_config[i].count;
-			wrmsr(msrs->counters[i].addr, -(unsigned int)counter_config[i].count, -1);
+			wrmsrl(msrs->counters[i].addr,
+			       -(s64)counter_config[i].count);
 			rdmsrl(msrs->controls[i].addr, val);
 			val &= model->reserved;
 			val |= op_x86_get_ctrl(model, &counter_config[i]);
@@ -251,7 +252,7 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
 		if (val & OP_CTR_OVERFLOW)
 			continue;
 		oprofile_add_sample(regs, i);
-		wrmsr(msrs->counters[i].addr, -(unsigned int)reset_value[i], -1);
+		wrmsrl(msrs->counters[i].addr, -(s64)reset_value[i]);
 	}
 
 	op_amd_handle_ibs(regs, msrs);
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 9db0ca9af76..f01e53b118f 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -579,8 +579,8 @@ static void p4_setup_ctrs(struct op_x86_model_spec const *model,
 		if (counter_config[i].enabled && msrs->controls[i].addr) {
 			reset_value[i] = counter_config[i].count;
 			pmc_setup_one_p4_counter(i);
-			wrmsr(p4_counters[VIRT_CTR(stag, i)].counter_address,
-			      -(u32)counter_config[i].count, -1);
+			wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address,
+			       -(s64)counter_config[i].count);
 		} else {
 			reset_value[i] = 0;
 		}
@@ -624,12 +624,12 @@ static int p4_check_ctrs(struct pt_regs * const regs,
 		rdmsr(p4_counters[real].counter_address, ctr, high);
 		if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) {
 			oprofile_add_sample(regs, i);
-			wrmsr(p4_counters[real].counter_address,
-			      -(u32)reset_value[i], -1);
+			wrmsrl(p4_counters[real].counter_address,
+			       -(s64)reset_value[i]);
 			CCCR_CLEAR_OVF(low);
 			wrmsr(p4_counters[real].cccr_address, low, high);
-			wrmsr(p4_counters[real].counter_address,
-			      -(u32)reset_value[i], -1);
+			wrmsrl(p4_counters[real].counter_address,
+			       -(s64)reset_value[i]);
 		}
 	}
 
-- 
cgit v1.2.3


From 95e74e62c1540b1115fe8cec5b592f22960f2bb2 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 3 Jun 2009 19:09:27 +0200
Subject: x86/oprofile: use 64 bit values to save MSR states

This patch removes struct op_saved_msr and replaces it by an u64
variable. This makes code easier and it is possible to use 64 bit MSR
functions.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c      | 28 ++++++++--------------------
 arch/x86/oprofile/op_x86_model.h |  9 ++-------
 2 files changed, 10 insertions(+), 27 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 388ee15e0e4..3b84b789de0 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -78,19 +78,13 @@ static void nmi_cpu_save_registers(struct op_msrs *msrs)
 	unsigned int i;
 
 	for (i = 0; i < nr_ctrs; ++i) {
-		if (counters[i].addr) {
-			rdmsr(counters[i].addr,
-				counters[i].saved.low,
-				counters[i].saved.high);
-		}
+		if (counters[i].addr)
+			rdmsrl(counters[i].addr, counters[i].saved);
 	}
 
 	for (i = 0; i < nr_ctrls; ++i) {
-		if (controls[i].addr) {
-			rdmsr(controls[i].addr,
-				controls[i].saved.low,
-				controls[i].saved.high);
-		}
+		if (controls[i].addr)
+			rdmsrl(controls[i].addr, controls[i].saved);
 	}
 }
 
@@ -204,19 +198,13 @@ static void nmi_restore_registers(struct op_msrs *msrs)
 	unsigned int i;
 
 	for (i = 0; i < nr_ctrls; ++i) {
-		if (controls[i].addr) {
-			wrmsr(controls[i].addr,
-				controls[i].saved.low,
-				controls[i].saved.high);
-		}
+		if (controls[i].addr)
+			wrmsrl(controls[i].addr, controls[i].saved);
 	}
 
 	for (i = 0; i < nr_ctrs; ++i) {
-		if (counters[i].addr) {
-			wrmsr(counters[i].addr,
-				counters[i].saved.low,
-				counters[i].saved.high);
-		}
+		if (counters[i].addr)
+			wrmsrl(counters[i].addr, counters[i].saved);
 	}
 }
 
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 69f1eb46e1b..fda52b4c1b9 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -15,14 +15,9 @@
 #include <asm/types.h>
 #include <asm/intel_arch_perfmon.h>
 
-struct op_saved_msr {
-	unsigned int high;
-	unsigned int low;
-};
-
 struct op_msr {
-	unsigned long addr;
-	struct op_saved_msr saved;
+	unsigned long	addr;
+	u64		saved;
 };
 
 struct op_msrs {
-- 
cgit v1.2.3


From 1a245c45343651a87ff63afc5ddeb8e24d731835 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Fri, 5 Jun 2009 15:54:24 +0200
Subject: x86/oprofile: remove some local variables in MSR save/restore
 functions

The patch removes some local variables in these functions.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 3b84b789de0..80b63d5db50 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -71,18 +71,16 @@ static int profile_exceptions_notify(struct notifier_block *self,
 
 static void nmi_cpu_save_registers(struct op_msrs *msrs)
 {
-	unsigned int const nr_ctrs = model->num_counters;
-	unsigned int const nr_ctrls = model->num_controls;
 	struct op_msr *counters = msrs->counters;
 	struct op_msr *controls = msrs->controls;
 	unsigned int i;
 
-	for (i = 0; i < nr_ctrs; ++i) {
+	for (i = 0; i < model->num_counters; ++i) {
 		if (counters[i].addr)
 			rdmsrl(counters[i].addr, counters[i].saved);
 	}
 
-	for (i = 0; i < nr_ctrls; ++i) {
+	for (i = 0; i < model->num_controls; ++i) {
 		if (controls[i].addr)
 			rdmsrl(controls[i].addr, controls[i].saved);
 	}
@@ -191,18 +189,16 @@ static int nmi_setup(void)
 
 static void nmi_restore_registers(struct op_msrs *msrs)
 {
-	unsigned int const nr_ctrs = model->num_counters;
-	unsigned int const nr_ctrls = model->num_controls;
 	struct op_msr *counters = msrs->counters;
 	struct op_msr *controls = msrs->controls;
 	unsigned int i;
 
-	for (i = 0; i < nr_ctrls; ++i) {
+	for (i = 0; i < model->num_controls; ++i) {
 		if (controls[i].addr)
 			wrmsrl(controls[i].addr, controls[i].saved);
 	}
 
-	for (i = 0; i < nr_ctrs; ++i) {
+	for (i = 0; i < model->num_counters; ++i) {
 		if (counters[i].addr)
 			wrmsrl(counters[i].addr, counters[i].saved);
 	}
-- 
cgit v1.2.3


From c572ae4efd1b0a5cc76c5e6aae05c1b182b6a69c Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 3 Jun 2009 20:10:39 +0200
Subject: x86/oprofile: use 64 bit values in IBS functions

The IBS code internally uses 32 bit values (a low and a high value) to
represent a 64 bit value. This patch changes this and now 64 bit
values are used instead. 64 bit MSR functions can be used now.

No functional changes.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 131 ++++++++++++++++++---------------------
 1 file changed, 61 insertions(+), 70 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 9bf90176241..6493ef7ae9a 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -35,16 +35,18 @@ static unsigned long reset_value[NUM_COUNTERS];
 #ifdef CONFIG_OPROFILE_IBS
 
 /* IbsFetchCtl bits/masks */
-#define IBS_FETCH_HIGH_VALID_BIT	(1UL << 17)	/* bit 49 */
-#define IBS_FETCH_HIGH_ENABLE		(1UL << 16)	/* bit 48 */
-#define IBS_FETCH_LOW_MAX_CNT_MASK	0x0000FFFFUL	/* MaxCnt mask */
+#define IBS_FETCH_RAND_EN		(1ULL<<57)
+#define IBS_FETCH_VAL			(1ULL<<49)
+#define IBS_FETCH_ENABLE		(1ULL<<48)
+#define IBS_FETCH_CNT_MASK		0xFFFF0000ULL
 
 /*IbsOpCtl bits */
-#define IBS_OP_LOW_VALID_BIT		(1ULL<<18)	/* bit 18 */
-#define IBS_OP_LOW_ENABLE		(1ULL<<17)	/* bit 17 */
+#define IBS_OP_CNT_CTL			(1ULL<<19)
+#define IBS_OP_VAL			(1ULL<<18)
+#define IBS_OP_ENABLE			(1ULL<<17)
 
-#define IBS_FETCH_SIZE	6
-#define IBS_OP_SIZE	12
+#define IBS_FETCH_SIZE			6
+#define IBS_OP_SIZE			12
 
 static int has_ibs;	/* AMD Family10h and later */
 
@@ -126,66 +128,63 @@ static inline int
 op_amd_handle_ibs(struct pt_regs * const regs,
 		  struct op_msrs const * const msrs)
 {
-	u32 low, high;
-	u64 msr;
+	u64 val, ctl;
 	struct op_entry entry;
 
 	if (!has_ibs)
 		return 1;
 
 	if (ibs_config.fetch_enabled) {
-		rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
-		if (high & IBS_FETCH_HIGH_VALID_BIT) {
-			rdmsrl(MSR_AMD64_IBSFETCHLINAD, msr);
-			oprofile_write_reserve(&entry, regs, msr,
+		rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
+		if (ctl & IBS_FETCH_VAL) {
+			rdmsrl(MSR_AMD64_IBSFETCHLINAD, val);
+			oprofile_write_reserve(&entry, regs, val,
 					       IBS_FETCH_CODE, IBS_FETCH_SIZE);
-			oprofile_add_data(&entry, (u32)msr);
-			oprofile_add_data(&entry, (u32)(msr >> 32));
-			oprofile_add_data(&entry, low);
-			oprofile_add_data(&entry, high);
-			rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, msr);
-			oprofile_add_data(&entry, (u32)msr);
-			oprofile_add_data(&entry, (u32)(msr >> 32));
+			oprofile_add_data(&entry, (u32)val);
+			oprofile_add_data(&entry, (u32)(val >> 32));
+			oprofile_add_data(&entry, (u32)ctl);
+			oprofile_add_data(&entry, (u32)(ctl >> 32));
+			rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val);
+			oprofile_add_data(&entry, (u32)val);
+			oprofile_add_data(&entry, (u32)(val >> 32));
 			oprofile_write_commit(&entry);
 
 			/* reenable the IRQ */
-			high &= ~IBS_FETCH_HIGH_VALID_BIT;
-			high |= IBS_FETCH_HIGH_ENABLE;
-			low &= IBS_FETCH_LOW_MAX_CNT_MASK;
-			wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
+			ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT_MASK);
+			ctl |= IBS_FETCH_ENABLE;
+			wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
 		}
 	}
 
 	if (ibs_config.op_enabled) {
-		rdmsr(MSR_AMD64_IBSOPCTL, low, high);
-		if (low & IBS_OP_LOW_VALID_BIT) {
-			rdmsrl(MSR_AMD64_IBSOPRIP, msr);
-			oprofile_write_reserve(&entry, regs, msr,
+		rdmsrl(MSR_AMD64_IBSOPCTL, ctl);
+		if (ctl & IBS_OP_VAL) {
+			rdmsrl(MSR_AMD64_IBSOPRIP, val);
+			oprofile_write_reserve(&entry, regs, val,
 					       IBS_OP_CODE, IBS_OP_SIZE);
-			oprofile_add_data(&entry, (u32)msr);
-			oprofile_add_data(&entry, (u32)(msr >> 32));
-			rdmsrl(MSR_AMD64_IBSOPDATA, msr);
-			oprofile_add_data(&entry, (u32)msr);
-			oprofile_add_data(&entry, (u32)(msr >> 32));
-			rdmsrl(MSR_AMD64_IBSOPDATA2, msr);
-			oprofile_add_data(&entry, (u32)msr);
-			oprofile_add_data(&entry, (u32)(msr >> 32));
-			rdmsrl(MSR_AMD64_IBSOPDATA3, msr);
-			oprofile_add_data(&entry, (u32)msr);
-			oprofile_add_data(&entry, (u32)(msr >> 32));
-			rdmsrl(MSR_AMD64_IBSDCLINAD, msr);
-			oprofile_add_data(&entry, (u32)msr);
-			oprofile_add_data(&entry, (u32)(msr >> 32));
-			rdmsrl(MSR_AMD64_IBSDCPHYSAD, msr);
-			oprofile_add_data(&entry, (u32)msr);
-			oprofile_add_data(&entry, (u32)(msr >> 32));
+			oprofile_add_data(&entry, (u32)val);
+			oprofile_add_data(&entry, (u32)(val >> 32));
+			rdmsrl(MSR_AMD64_IBSOPDATA, val);
+			oprofile_add_data(&entry, (u32)val);
+			oprofile_add_data(&entry, (u32)(val >> 32));
+			rdmsrl(MSR_AMD64_IBSOPDATA2, val);
+			oprofile_add_data(&entry, (u32)val);
+			oprofile_add_data(&entry, (u32)(val >> 32));
+			rdmsrl(MSR_AMD64_IBSOPDATA3, val);
+			oprofile_add_data(&entry, (u32)val);
+			oprofile_add_data(&entry, (u32)(val >> 32));
+			rdmsrl(MSR_AMD64_IBSDCLINAD, val);
+			oprofile_add_data(&entry, (u32)val);
+			oprofile_add_data(&entry, (u32)(val >> 32));
+			rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
+			oprofile_add_data(&entry, (u32)val);
+			oprofile_add_data(&entry, (u32)(val >> 32));
 			oprofile_write_commit(&entry);
 
 			/* reenable the IRQ */
-			high = 0;
-			low &= ~IBS_OP_LOW_VALID_BIT;
-			low |= IBS_OP_LOW_ENABLE;
-			wrmsr(MSR_AMD64_IBSOPCTL, low, high);
+			ctl &= ~IBS_OP_VAL & 0xFFFFFFFF;
+			ctl |= IBS_OP_ENABLE;
+			wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
 		}
 	}
 
@@ -194,39 +193,31 @@ op_amd_handle_ibs(struct pt_regs * const regs,
 
 static inline void op_amd_start_ibs(void)
 {
-	unsigned int low, high;
+	u64 val;
 	if (has_ibs && ibs_config.fetch_enabled) {
-		low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
-		high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */
-			+ IBS_FETCH_HIGH_ENABLE;
-		wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
+		val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
+		val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
+		val |= IBS_FETCH_ENABLE;
+		wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
 	}
 
 	if (has_ibs && ibs_config.op_enabled) {
-		low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF)
-			+ ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */
-			+ IBS_OP_LOW_ENABLE;
-		high = 0;
-		wrmsr(MSR_AMD64_IBSOPCTL, low, high);
+		val = (ibs_config.max_cnt_op >> 4) & 0xFFFF;
+		val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;
+		val |= IBS_OP_ENABLE;
+		wrmsrl(MSR_AMD64_IBSOPCTL, val);
 	}
 }
 
 static void op_amd_stop_ibs(void)
 {
-	unsigned int low, high;
-	if (has_ibs && ibs_config.fetch_enabled) {
+	if (has_ibs && ibs_config.fetch_enabled)
 		/* clear max count and enable */
-		low = 0;
-		high = 0;
-		wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
-	}
+		wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);
 
-	if (has_ibs && ibs_config.op_enabled) {
+	if (has_ibs && ibs_config.op_enabled)
 		/* clear max count and enable */
-		low = 0;
-		high = 0;
-		wrmsr(MSR_AMD64_IBSOPCTL, low, high);
-	}
+		wrmsrl(MSR_AMD64_IBSOPCTL, 0);
 }
 
 #else
-- 
cgit v1.2.3


From 51563a0e5650d0d76539625388d72d62b34c726e Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 3 Jun 2009 20:54:56 +0200
Subject: x86/oprofile: introduce oprofile_add_data64()

The IBS implemention writes 64 bit register values to the cpu buffer
by writing two 32 values using oprofile_add_data(). This patch
introduces oprofile_add_data64() to write a single 64 bit value to the
buffer.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 27 +++++++++------------------
 1 file changed, 9 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 6493ef7ae9a..cc930467575 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -140,13 +140,10 @@ op_amd_handle_ibs(struct pt_regs * const regs,
 			rdmsrl(MSR_AMD64_IBSFETCHLINAD, val);
 			oprofile_write_reserve(&entry, regs, val,
 					       IBS_FETCH_CODE, IBS_FETCH_SIZE);
-			oprofile_add_data(&entry, (u32)val);
-			oprofile_add_data(&entry, (u32)(val >> 32));
-			oprofile_add_data(&entry, (u32)ctl);
-			oprofile_add_data(&entry, (u32)(ctl >> 32));
+			oprofile_add_data64(&entry, val);
+			oprofile_add_data64(&entry, ctl);
 			rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val);
-			oprofile_add_data(&entry, (u32)val);
-			oprofile_add_data(&entry, (u32)(val >> 32));
+			oprofile_add_data64(&entry, val);
 			oprofile_write_commit(&entry);
 
 			/* reenable the IRQ */
@@ -162,23 +159,17 @@ op_amd_handle_ibs(struct pt_regs * const regs,
 			rdmsrl(MSR_AMD64_IBSOPRIP, val);
 			oprofile_write_reserve(&entry, regs, val,
 					       IBS_OP_CODE, IBS_OP_SIZE);
-			oprofile_add_data(&entry, (u32)val);
-			oprofile_add_data(&entry, (u32)(val >> 32));
+			oprofile_add_data64(&entry, val);
 			rdmsrl(MSR_AMD64_IBSOPDATA, val);
-			oprofile_add_data(&entry, (u32)val);
-			oprofile_add_data(&entry, (u32)(val >> 32));
+			oprofile_add_data64(&entry, val);
 			rdmsrl(MSR_AMD64_IBSOPDATA2, val);
-			oprofile_add_data(&entry, (u32)val);
-			oprofile_add_data(&entry, (u32)(val >> 32));
+			oprofile_add_data64(&entry, val);
 			rdmsrl(MSR_AMD64_IBSOPDATA3, val);
-			oprofile_add_data(&entry, (u32)val);
-			oprofile_add_data(&entry, (u32)(val >> 32));
+			oprofile_add_data64(&entry, val);
 			rdmsrl(MSR_AMD64_IBSDCLINAD, val);
-			oprofile_add_data(&entry, (u32)val);
-			oprofile_add_data(&entry, (u32)(val >> 32));
+			oprofile_add_data64(&entry, val);
 			rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
-			oprofile_add_data(&entry, (u32)val);
-			oprofile_add_data(&entry, (u32)(val >> 32));
+			oprofile_add_data64(&entry, val);
 			oprofile_write_commit(&entry);
 
 			/* reenable the IRQ */
-- 
cgit v1.2.3


From 802070f5474af1a49435a9528aede47bb18abd47 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Fri, 12 Jun 2009 18:32:07 +0200
Subject: x86/oprofile: fix initialization of arch_perfmon for core_i7

Commit:

 e419294 x86/oprofile: moving arch_perfmon counter setup to op_x86_model_spec.init

introduced a bug in the initialization of core_i7 leading to the
incorrect model setup to &op_ppro_spec. This patch fixes this.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 7826dfcc842..28ee490c1b8 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -406,6 +406,7 @@ module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
 static int __init ppro_init(char **cpu_type)
 {
 	__u8 cpu_model = boot_cpu_data.x86_model;
+	struct op_x86_model_spec const *spec = &op_ppro_spec;	/* default */
 
 	if (force_arch_perfmon && cpu_has_arch_perfmon)
 		return 0;
@@ -432,7 +433,7 @@ static int __init ppro_init(char **cpu_type)
 		*cpu_type = "i386/core_2";
 		break;
 	case 26:
-		model = &op_arch_perfmon_spec;
+		spec = &op_arch_perfmon_spec;
 		*cpu_type = "i386/core_i7";
 		break;
 	case 28:
@@ -443,7 +444,7 @@ static int __init ppro_init(char **cpu_type)
 		return 0;
 	}
 
-	model = &op_ppro_spec;
+	model = spec;
 	return 1;
 }
 
-- 
cgit v1.2.3


From c64b04fe6e0cb7c78e01751a44ef56cf20344e87 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Sun, 14 Jun 2009 00:59:50 +0530
Subject: x86, cpu: cpu/proc.c display cache alignment and address sizes for 32
 bit

32 bits can also access x86_cache_alignment, x86_phys_bits and
x86_virt_bits, make them available to user space just as on 64 bits.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
LKML-Reference: <1244921390.11733.30.camel@ht.satnam>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/proc.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index d5e30397246..f82706a3901 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -116,11 +116,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
 #endif
 	seq_printf(m, "clflush size\t: %u\n", c->x86_clflush_size);
-#ifdef CONFIG_X86_64
 	seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
 	seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
 		   c->x86_phys_bits, c->x86_virt_bits);
-#endif
 
 	seq_printf(m, "power management:");
 	for (i = 0; i < 32; i++) {
-- 
cgit v1.2.3


From ac5672f82c39ff2f8dce81bf3e68b1dfc41f366f Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Tue, 14 Apr 2009 14:29:44 -0700
Subject: x86/paravirt: split paravirt definitions into paravirt_types.h

Split the monolithic asm/paravirt.h into separate paravirt.h (inlines and other
"active" definitions), and paravirt_types.h (types, constants and other "passive"
definitions).  This makes it easier to use the type/constant definitions without
pulling in everything else and causing circular dependency problems.

[ Impact: cleanup ]

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/include/asm/paravirt.h       | 711 +--------------------------------
 arch/x86/include/asm/paravirt_types.h | 720 ++++++++++++++++++++++++++++++++++
 2 files changed, 721 insertions(+), 710 deletions(-)
 create mode 100644 arch/x86/include/asm/paravirt_types.h

(limited to 'arch')

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 4fb37c8a083..6a07af432c8 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -7,689 +7,11 @@
 #include <asm/pgtable_types.h>
 #include <asm/asm.h>
 
-/* Bitmask of what can be clobbered: usually at least eax. */
-#define CLBR_NONE 0
-#define CLBR_EAX  (1 << 0)
-#define CLBR_ECX  (1 << 1)
-#define CLBR_EDX  (1 << 2)
-#define CLBR_EDI  (1 << 3)
-
-#ifdef CONFIG_X86_32
-/* CLBR_ANY should match all regs platform has. For i386, that's just it */
-#define CLBR_ANY  ((1 << 4) - 1)
-
-#define CLBR_ARG_REGS	(CLBR_EAX | CLBR_EDX | CLBR_ECX)
-#define CLBR_RET_REG	(CLBR_EAX | CLBR_EDX)
-#define CLBR_SCRATCH	(0)
-#else
-#define CLBR_RAX  CLBR_EAX
-#define CLBR_RCX  CLBR_ECX
-#define CLBR_RDX  CLBR_EDX
-#define CLBR_RDI  CLBR_EDI
-#define CLBR_RSI  (1 << 4)
-#define CLBR_R8   (1 << 5)
-#define CLBR_R9   (1 << 6)
-#define CLBR_R10  (1 << 7)
-#define CLBR_R11  (1 << 8)
-
-#define CLBR_ANY  ((1 << 9) - 1)
-
-#define CLBR_ARG_REGS	(CLBR_RDI | CLBR_RSI | CLBR_RDX | \
-			 CLBR_RCX | CLBR_R8 | CLBR_R9)
-#define CLBR_RET_REG	(CLBR_RAX)
-#define CLBR_SCRATCH	(CLBR_R10 | CLBR_R11)
-
-#include <asm/desc_defs.h>
-#endif /* X86_64 */
-
-#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG)
+#include <asm/paravirt_types.h>
 
 #ifndef __ASSEMBLY__
 #include <linux/types.h>
 #include <linux/cpumask.h>
-#include <asm/kmap_types.h>
-#include <asm/desc_defs.h>
-
-struct page;
-struct thread_struct;
-struct desc_ptr;
-struct tss_struct;
-struct mm_struct;
-struct desc_struct;
-struct task_struct;
-
-/*
- * Wrapper type for pointers to code which uses the non-standard
- * calling convention.  See PV_CALL_SAVE_REGS_THUNK below.
- */
-struct paravirt_callee_save {
-	void *func;
-};
-
-/* general info */
-struct pv_info {
-	unsigned int kernel_rpl;
-	int shared_kernel_pmd;
-	int paravirt_enabled;
-	const char *name;
-};
-
-struct pv_init_ops {
-	/*
-	 * Patch may replace one of the defined code sequences with
-	 * arbitrary code, subject to the same register constraints.
-	 * This generally means the code is not free to clobber any
-	 * registers other than EAX.  The patch function should return
-	 * the number of bytes of code generated, as we nop pad the
-	 * rest in generic code.
-	 */
-	unsigned (*patch)(u8 type, u16 clobber, void *insnbuf,
-			  unsigned long addr, unsigned len);
-
-	/* Basic arch-specific setup */
-	void (*arch_setup)(void);
-	char *(*memory_setup)(void);
-	void (*post_allocator_init)(void);
-
-	/* Print a banner to identify the environment */
-	void (*banner)(void);
-};
-
-
-struct pv_lazy_ops {
-	/* Set deferred update mode, used for batching operations. */
-	void (*enter)(void);
-	void (*leave)(void);
-};
-
-struct pv_time_ops {
-	void (*time_init)(void);
-
-	/* Set and set time of day */
-	unsigned long (*get_wallclock)(void);
-	int (*set_wallclock)(unsigned long);
-
-	unsigned long long (*sched_clock)(void);
-	unsigned long (*get_tsc_khz)(void);
-};
-
-struct pv_cpu_ops {
-	/* hooks for various privileged instructions */
-	unsigned long (*get_debugreg)(int regno);
-	void (*set_debugreg)(int regno, unsigned long value);
-
-	void (*clts)(void);
-
-	unsigned long (*read_cr0)(void);
-	void (*write_cr0)(unsigned long);
-
-	unsigned long (*read_cr4_safe)(void);
-	unsigned long (*read_cr4)(void);
-	void (*write_cr4)(unsigned long);
-
-#ifdef CONFIG_X86_64
-	unsigned long (*read_cr8)(void);
-	void (*write_cr8)(unsigned long);
-#endif
-
-	/* Segment descriptor handling */
-	void (*load_tr_desc)(void);
-	void (*load_gdt)(const struct desc_ptr *);
-	void (*load_idt)(const struct desc_ptr *);
-	void (*store_gdt)(struct desc_ptr *);
-	void (*store_idt)(struct desc_ptr *);
-	void (*set_ldt)(const void *desc, unsigned entries);
-	unsigned long (*store_tr)(void);
-	void (*load_tls)(struct thread_struct *t, unsigned int cpu);
-#ifdef CONFIG_X86_64
-	void (*load_gs_index)(unsigned int idx);
-#endif
-	void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum,
-				const void *desc);
-	void (*write_gdt_entry)(struct desc_struct *,
-				int entrynum, const void *desc, int size);
-	void (*write_idt_entry)(gate_desc *,
-				int entrynum, const gate_desc *gate);
-	void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
-	void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
-
-	void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
-
-	void (*set_iopl_mask)(unsigned mask);
-
-	void (*wbinvd)(void);
-	void (*io_delay)(void);
-
-	/* cpuid emulation, mostly so that caps bits can be disabled */
-	void (*cpuid)(unsigned int *eax, unsigned int *ebx,
-		      unsigned int *ecx, unsigned int *edx);
-
-	/* MSR, PMC and TSR operations.
-	   err = 0/-EFAULT.  wrmsr returns 0/-EFAULT. */
-	u64 (*read_msr_amd)(unsigned int msr, int *err);
-	u64 (*read_msr)(unsigned int msr, int *err);
-	int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
-
-	u64 (*read_tsc)(void);
-	u64 (*read_pmc)(int counter);
-	unsigned long long (*read_tscp)(unsigned int *aux);
-
-	/*
-	 * Atomically enable interrupts and return to userspace.  This
-	 * is only ever used to return to 32-bit processes; in a
-	 * 64-bit kernel, it's used for 32-on-64 compat processes, but
-	 * never native 64-bit processes.  (Jump, not call.)
-	 */
-	void (*irq_enable_sysexit)(void);
-
-	/*
-	 * Switch to usermode gs and return to 64-bit usermode using
-	 * sysret.  Only used in 64-bit kernels to return to 64-bit
-	 * processes.  Usermode register state, including %rsp, must
-	 * already be restored.
-	 */
-	void (*usergs_sysret64)(void);
-
-	/*
-	 * Switch to usermode gs and return to 32-bit usermode using
-	 * sysret.  Used to return to 32-on-64 compat processes.
-	 * Other usermode register state, including %esp, must already
-	 * be restored.
-	 */
-	void (*usergs_sysret32)(void);
-
-	/* Normal iret.  Jump to this with the standard iret stack
-	   frame set up. */
-	void (*iret)(void);
-
-	void (*swapgs)(void);
-
-	void (*start_context_switch)(struct task_struct *prev);
-	void (*end_context_switch)(struct task_struct *next);
-};
-
-struct pv_irq_ops {
-	void (*init_IRQ)(void);
-
-	/*
-	 * Get/set interrupt state.  save_fl and restore_fl are only
-	 * expected to use X86_EFLAGS_IF; all other bits
-	 * returned from save_fl are undefined, and may be ignored by
-	 * restore_fl.
-	 *
-	 * NOTE: These functions callers expect the callee to preserve
-	 * more registers than the standard C calling convention.
-	 */
-	struct paravirt_callee_save save_fl;
-	struct paravirt_callee_save restore_fl;
-	struct paravirt_callee_save irq_disable;
-	struct paravirt_callee_save irq_enable;
-
-	void (*safe_halt)(void);
-	void (*halt)(void);
-
-#ifdef CONFIG_X86_64
-	void (*adjust_exception_frame)(void);
-#endif
-};
-
-struct pv_apic_ops {
-#ifdef CONFIG_X86_LOCAL_APIC
-	void (*setup_boot_clock)(void);
-	void (*setup_secondary_clock)(void);
-
-	void (*startup_ipi_hook)(int phys_apicid,
-				 unsigned long start_eip,
-				 unsigned long start_esp);
-#endif
-};
-
-struct pv_mmu_ops {
-	/*
-	 * Called before/after init_mm pagetable setup. setup_start
-	 * may reset %cr3, and may pre-install parts of the pagetable;
-	 * pagetable setup is expected to preserve any existing
-	 * mapping.
-	 */
-	void (*pagetable_setup_start)(pgd_t *pgd_base);
-	void (*pagetable_setup_done)(pgd_t *pgd_base);
-
-	unsigned long (*read_cr2)(void);
-	void (*write_cr2)(unsigned long);
-
-	unsigned long (*read_cr3)(void);
-	void (*write_cr3)(unsigned long);
-
-	/*
-	 * Hooks for intercepting the creation/use/destruction of an
-	 * mm_struct.
-	 */
-	void (*activate_mm)(struct mm_struct *prev,
-			    struct mm_struct *next);
-	void (*dup_mmap)(struct mm_struct *oldmm,
-			 struct mm_struct *mm);
-	void (*exit_mmap)(struct mm_struct *mm);
-
-
-	/* TLB operations */
-	void (*flush_tlb_user)(void);
-	void (*flush_tlb_kernel)(void);
-	void (*flush_tlb_single)(unsigned long addr);
-	void (*flush_tlb_others)(const struct cpumask *cpus,
-				 struct mm_struct *mm,
-				 unsigned long va);
-
-	/* Hooks for allocating and freeing a pagetable top-level */
-	int  (*pgd_alloc)(struct mm_struct *mm);
-	void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd);
-
-	/*
-	 * Hooks for allocating/releasing pagetable pages when they're
-	 * attached to a pagetable
-	 */
-	void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn);
-	void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn);
-	void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count);
-	void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn);
-	void (*release_pte)(unsigned long pfn);
-	void (*release_pmd)(unsigned long pfn);
-	void (*release_pud)(unsigned long pfn);
-
-	/* Pagetable manipulation functions */
-	void (*set_pte)(pte_t *ptep, pte_t pteval);
-	void (*set_pte_at)(struct mm_struct *mm, unsigned long addr,
-			   pte_t *ptep, pte_t pteval);
-	void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
-	void (*pte_update)(struct mm_struct *mm, unsigned long addr,
-			   pte_t *ptep);
-	void (*pte_update_defer)(struct mm_struct *mm,
-				 unsigned long addr, pte_t *ptep);
-
-	pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
-					pte_t *ptep);
-	void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr,
-					pte_t *ptep, pte_t pte);
-
-	struct paravirt_callee_save pte_val;
-	struct paravirt_callee_save make_pte;
-
-	struct paravirt_callee_save pgd_val;
-	struct paravirt_callee_save make_pgd;
-
-#if PAGETABLE_LEVELS >= 3
-#ifdef CONFIG_X86_PAE
-	void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
-	void (*pte_clear)(struct mm_struct *mm, unsigned long addr,
-			  pte_t *ptep);
-	void (*pmd_clear)(pmd_t *pmdp);
-
-#endif	/* CONFIG_X86_PAE */
-
-	void (*set_pud)(pud_t *pudp, pud_t pudval);
-
-	struct paravirt_callee_save pmd_val;
-	struct paravirt_callee_save make_pmd;
-
-#if PAGETABLE_LEVELS == 4
-	struct paravirt_callee_save pud_val;
-	struct paravirt_callee_save make_pud;
-
-	void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
-#endif	/* PAGETABLE_LEVELS == 4 */
-#endif	/* PAGETABLE_LEVELS >= 3 */
-
-#ifdef CONFIG_HIGHPTE
-	void *(*kmap_atomic_pte)(struct page *page, enum km_type type);
-#endif
-
-	struct pv_lazy_ops lazy_mode;
-
-	/* dom0 ops */
-
-	/* Sometimes the physical address is a pfn, and sometimes its
-	   an mfn.  We can tell which is which from the index. */
-	void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx,
-			   phys_addr_t phys, pgprot_t flags);
-};
-
-struct raw_spinlock;
-struct pv_lock_ops {
-	int (*spin_is_locked)(struct raw_spinlock *lock);
-	int (*spin_is_contended)(struct raw_spinlock *lock);
-	void (*spin_lock)(struct raw_spinlock *lock);
-	void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags);
-	int (*spin_trylock)(struct raw_spinlock *lock);
-	void (*spin_unlock)(struct raw_spinlock *lock);
-};
-
-/* This contains all the paravirt structures: we get a convenient
- * number for each function using the offset which we use to indicate
- * what to patch. */
-struct paravirt_patch_template {
-	struct pv_init_ops pv_init_ops;
-	struct pv_time_ops pv_time_ops;
-	struct pv_cpu_ops pv_cpu_ops;
-	struct pv_irq_ops pv_irq_ops;
-	struct pv_apic_ops pv_apic_ops;
-	struct pv_mmu_ops pv_mmu_ops;
-	struct pv_lock_ops pv_lock_ops;
-};
-
-extern struct pv_info pv_info;
-extern struct pv_init_ops pv_init_ops;
-extern struct pv_time_ops pv_time_ops;
-extern struct pv_cpu_ops pv_cpu_ops;
-extern struct pv_irq_ops pv_irq_ops;
-extern struct pv_apic_ops pv_apic_ops;
-extern struct pv_mmu_ops pv_mmu_ops;
-extern struct pv_lock_ops pv_lock_ops;
-
-#define PARAVIRT_PATCH(x)					\
-	(offsetof(struct paravirt_patch_template, x) / sizeof(void *))
-
-#define paravirt_type(op)				\
-	[paravirt_typenum] "i" (PARAVIRT_PATCH(op)),	\
-	[paravirt_opptr] "i" (&(op))
-#define paravirt_clobber(clobber)		\
-	[paravirt_clobber] "i" (clobber)
-
-/*
- * Generate some code, and mark it as patchable by the
- * apply_paravirt() alternate instruction patcher.
- */
-#define _paravirt_alt(insn_string, type, clobber)	\
-	"771:\n\t" insn_string "\n" "772:\n"		\
-	".pushsection .parainstructions,\"a\"\n"	\
-	_ASM_ALIGN "\n"					\
-	_ASM_PTR " 771b\n"				\
-	"  .byte " type "\n"				\
-	"  .byte 772b-771b\n"				\
-	"  .short " clobber "\n"			\
-	".popsection\n"
-
-/* Generate patchable code, with the default asm parameters. */
-#define paravirt_alt(insn_string)					\
-	_paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]")
-
-/* Simple instruction patching code. */
-#define DEF_NATIVE(ops, name, code) 					\
-	extern const char start_##ops##_##name[], end_##ops##_##name[];	\
-	asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
-
-unsigned paravirt_patch_nop(void);
-unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len);
-unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
-unsigned paravirt_patch_ignore(unsigned len);
-unsigned paravirt_patch_call(void *insnbuf,
-			     const void *target, u16 tgt_clobbers,
-			     unsigned long addr, u16 site_clobbers,
-			     unsigned len);
-unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
-			    unsigned long addr, unsigned len);
-unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
-				unsigned long addr, unsigned len);
-
-unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
-			      const char *start, const char *end);
-
-unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
-		      unsigned long addr, unsigned len);
-
-int paravirt_disable_iospace(void);
-
-/*
- * This generates an indirect call based on the operation type number.
- * The type number, computed in PARAVIRT_PATCH, is derived from the
- * offset into the paravirt_patch_template structure, and can therefore be
- * freely converted back into a structure offset.
- */
-#define PARAVIRT_CALL	"call *%c[paravirt_opptr];"
-
-/*
- * These macros are intended to wrap calls through one of the paravirt
- * ops structs, so that they can be later identified and patched at
- * runtime.
- *
- * Normally, a call to a pv_op function is a simple indirect call:
- * (pv_op_struct.operations)(args...).
- *
- * Unfortunately, this is a relatively slow operation for modern CPUs,
- * because it cannot necessarily determine what the destination
- * address is.  In this case, the address is a runtime constant, so at
- * the very least we can patch the call to e a simple direct call, or
- * ideally, patch an inline implementation into the callsite.  (Direct
- * calls are essentially free, because the call and return addresses
- * are completely predictable.)
- *
- * For i386, these macros rely on the standard gcc "regparm(3)" calling
- * convention, in which the first three arguments are placed in %eax,
- * %edx, %ecx (in that order), and the remaining arguments are placed
- * on the stack.  All caller-save registers (eax,edx,ecx) are expected
- * to be modified (either clobbered or used for return values).
- * X86_64, on the other hand, already specifies a register-based calling
- * conventions, returning at %rax, with parameteres going on %rdi, %rsi,
- * %rdx, and %rcx. Note that for this reason, x86_64 does not need any
- * special handling for dealing with 4 arguments, unlike i386.
- * However, x86_64 also have to clobber all caller saved registers, which
- * unfortunately, are quite a bit (r8 - r11)
- *
- * The call instruction itself is marked by placing its start address
- * and size into the .parainstructions section, so that
- * apply_paravirt() in arch/i386/kernel/alternative.c can do the
- * appropriate patching under the control of the backend pv_init_ops
- * implementation.
- *
- * Unfortunately there's no way to get gcc to generate the args setup
- * for the call, and then allow the call itself to be generated by an
- * inline asm.  Because of this, we must do the complete arg setup and
- * return value handling from within these macros.  This is fairly
- * cumbersome.
- *
- * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments.
- * It could be extended to more arguments, but there would be little
- * to be gained from that.  For each number of arguments, there are
- * the two VCALL and CALL variants for void and non-void functions.
- *
- * When there is a return value, the invoker of the macro must specify
- * the return type.  The macro then uses sizeof() on that type to
- * determine whether its a 32 or 64 bit value, and places the return
- * in the right register(s) (just %eax for 32-bit, and %edx:%eax for
- * 64-bit). For x86_64 machines, it just returns at %rax regardless of
- * the return value size.
- *
- * 64-bit arguments are passed as a pair of adjacent 32-bit arguments
- * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments
- * in low,high order
- *
- * Small structures are passed and returned in registers.  The macro
- * calling convention can't directly deal with this, so the wrapper
- * functions must do this.
- *
- * These PVOP_* macros are only defined within this header.  This
- * means that all uses must be wrapped in inline functions.  This also
- * makes sure the incoming and outgoing types are always correct.
- */
-#ifdef CONFIG_X86_32
-#define PVOP_VCALL_ARGS				\
-	unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx
-#define PVOP_CALL_ARGS			PVOP_VCALL_ARGS
-
-#define PVOP_CALL_ARG1(x)		"a" ((unsigned long)(x))
-#define PVOP_CALL_ARG2(x)		"d" ((unsigned long)(x))
-#define PVOP_CALL_ARG3(x)		"c" ((unsigned long)(x))
-
-#define PVOP_VCALL_CLOBBERS		"=a" (__eax), "=d" (__edx),	\
-					"=c" (__ecx)
-#define PVOP_CALL_CLOBBERS		PVOP_VCALL_CLOBBERS
-
-#define PVOP_VCALLEE_CLOBBERS		"=a" (__eax), "=d" (__edx)
-#define PVOP_CALLEE_CLOBBERS		PVOP_VCALLEE_CLOBBERS
-
-#define EXTRA_CLOBBERS
-#define VEXTRA_CLOBBERS
-#else  /* CONFIG_X86_64 */
-#define PVOP_VCALL_ARGS					\
-	unsigned long __edi = __edi, __esi = __esi,	\
-		__edx = __edx, __ecx = __ecx
-#define PVOP_CALL_ARGS		PVOP_VCALL_ARGS, __eax
-
-#define PVOP_CALL_ARG1(x)		"D" ((unsigned long)(x))
-#define PVOP_CALL_ARG2(x)		"S" ((unsigned long)(x))
-#define PVOP_CALL_ARG3(x)		"d" ((unsigned long)(x))
-#define PVOP_CALL_ARG4(x)		"c" ((unsigned long)(x))
-
-#define PVOP_VCALL_CLOBBERS	"=D" (__edi),				\
-				"=S" (__esi), "=d" (__edx),		\
-				"=c" (__ecx)
-#define PVOP_CALL_CLOBBERS	PVOP_VCALL_CLOBBERS, "=a" (__eax)
-
-#define PVOP_VCALLEE_CLOBBERS	"=a" (__eax)
-#define PVOP_CALLEE_CLOBBERS	PVOP_VCALLEE_CLOBBERS
-
-#define EXTRA_CLOBBERS	 , "r8", "r9", "r10", "r11"
-#define VEXTRA_CLOBBERS	 , "rax", "r8", "r9", "r10", "r11"
-#endif	/* CONFIG_X86_32 */
-
-#ifdef CONFIG_PARAVIRT_DEBUG
-#define PVOP_TEST_NULL(op)	BUG_ON(op == NULL)
-#else
-#define PVOP_TEST_NULL(op)	((void)op)
-#endif
-
-#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr,		\
-		      pre, post, ...)					\
-	({								\
-		rettype __ret;						\
-		PVOP_CALL_ARGS;						\
-		PVOP_TEST_NULL(op);					\
-		/* This is 32-bit specific, but is okay in 64-bit */	\
-		/* since this condition will never hold */		\
-		if (sizeof(rettype) > sizeof(unsigned long)) {		\
-			asm volatile(pre				\
-				     paravirt_alt(PARAVIRT_CALL)	\
-				     post				\
-				     : call_clbr			\
-				     : paravirt_type(op),		\
-				       paravirt_clobber(clbr),		\
-				       ##__VA_ARGS__			\
-				     : "memory", "cc" extra_clbr);	\
-			__ret = (rettype)((((u64)__edx) << 32) | __eax); \
-		} else {						\
-			asm volatile(pre				\
-				     paravirt_alt(PARAVIRT_CALL)	\
-				     post				\
-				     : call_clbr			\
-				     : paravirt_type(op),		\
-				       paravirt_clobber(clbr),		\
-				       ##__VA_ARGS__			\
-				     : "memory", "cc" extra_clbr);	\
-			__ret = (rettype)__eax;				\
-		}							\
-		__ret;							\
-	})
-
-#define __PVOP_CALL(rettype, op, pre, post, ...)			\
-	____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS,	\
-		      EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__)
-
-#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...)			\
-	____PVOP_CALL(rettype, op.func, CLBR_RET_REG,			\
-		      PVOP_CALLEE_CLOBBERS, ,				\
-		      pre, post, ##__VA_ARGS__)
-
-
-#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...)	\
-	({								\
-		PVOP_VCALL_ARGS;					\
-		PVOP_TEST_NULL(op);					\
-		asm volatile(pre					\
-			     paravirt_alt(PARAVIRT_CALL)		\
-			     post					\
-			     : call_clbr				\
-			     : paravirt_type(op),			\
-			       paravirt_clobber(clbr),			\
-			       ##__VA_ARGS__				\
-			     : "memory", "cc" extra_clbr);		\
-	})
-
-#define __PVOP_VCALL(op, pre, post, ...)				\
-	____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS,		\
-		       VEXTRA_CLOBBERS,					\
-		       pre, post, ##__VA_ARGS__)
-
-#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...)			\
-	____PVOP_CALL(rettype, op.func, CLBR_RET_REG,			\
-		      PVOP_VCALLEE_CLOBBERS, ,				\
-		      pre, post, ##__VA_ARGS__)
-
-
-
-#define PVOP_CALL0(rettype, op)						\
-	__PVOP_CALL(rettype, op, "", "")
-#define PVOP_VCALL0(op)							\
-	__PVOP_VCALL(op, "", "")
-
-#define PVOP_CALLEE0(rettype, op)					\
-	__PVOP_CALLEESAVE(rettype, op, "", "")
-#define PVOP_VCALLEE0(op)						\
-	__PVOP_VCALLEESAVE(op, "", "")
-
-
-#define PVOP_CALL1(rettype, op, arg1)					\
-	__PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
-#define PVOP_VCALL1(op, arg1)						\
-	__PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1))
-
-#define PVOP_CALLEE1(rettype, op, arg1)					\
-	__PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
-#define PVOP_VCALLEE1(op, arg1)						\
-	__PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1))
-
-
-#define PVOP_CALL2(rettype, op, arg1, arg2)				\
-	__PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1),		\
-		    PVOP_CALL_ARG2(arg2))
-#define PVOP_VCALL2(op, arg1, arg2)					\
-	__PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1),			\
-		     PVOP_CALL_ARG2(arg2))
-
-#define PVOP_CALLEE2(rettype, op, arg1, arg2)				\
-	__PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1),	\
-			  PVOP_CALL_ARG2(arg2))
-#define PVOP_VCALLEE2(op, arg1, arg2)					\
-	__PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1),		\
-			   PVOP_CALL_ARG2(arg2))
-
-
-#define PVOP_CALL3(rettype, op, arg1, arg2, arg3)			\
-	__PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1),		\
-		    PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
-#define PVOP_VCALL3(op, arg1, arg2, arg3)				\
-	__PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1),			\
-		     PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
-
-/* This is the only difference in x86_64. We can make it much simpler */
-#ifdef CONFIG_X86_32
-#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4)			\
-	__PVOP_CALL(rettype, op,					\
-		    "push %[_arg4];", "lea 4(%%esp),%%esp;",		\
-		    PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),		\
-		    PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4)))
-#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4)				\
-	__PVOP_VCALL(op,						\
-		    "push %[_arg4];", "lea 4(%%esp),%%esp;",		\
-		    "0" ((u32)(arg1)), "1" ((u32)(arg2)),		\
-		    "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
-#else
-#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4)			\
-	__PVOP_CALL(rettype, op, "", "",				\
-		    PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),		\
-		    PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
-#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4)				\
-	__PVOP_VCALL(op, "", "",					\
-		     PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),	\
-		     PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
-#endif
 
 static inline int paravirt_enabled(void)
 {
@@ -1393,20 +715,6 @@ static inline void pmd_clear(pmd_t *pmdp)
 }
 #endif	/* CONFIG_X86_PAE */
 
-/* Lazy mode for batching updates / context switch */
-enum paravirt_lazy_mode {
-	PARAVIRT_LAZY_NONE,
-	PARAVIRT_LAZY_MMU,
-	PARAVIRT_LAZY_CPU,
-};
-
-enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
-void paravirt_start_context_switch(struct task_struct *prev);
-void paravirt_end_context_switch(struct task_struct *next);
-
-void paravirt_enter_lazy_mmu(void);
-void paravirt_leave_lazy_mmu(void);
-
 #define  __HAVE_ARCH_START_CONTEXT_SWITCH
 static inline void arch_start_context_switch(struct task_struct *prev)
 {
@@ -1437,12 +745,6 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
 	pv_mmu_ops.set_fixmap(idx, phys, flags);
 }
 
-void _paravirt_nop(void);
-u32 _paravirt_ident_32(u32);
-u64 _paravirt_ident_64(u64);
-
-#define paravirt_nop	((void *)_paravirt_nop)
-
 #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
 
 static inline int __raw_spin_is_locked(struct raw_spinlock *lock)
@@ -1479,17 +781,6 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
 
 #endif
 
-/* These all sit in the .parainstructions section to tell us what to patch. */
-struct paravirt_patch_site {
-	u8 *instr; 		/* original instructions */
-	u8 instrtype;		/* type of this instruction */
-	u8 len;			/* length of original instruction */
-	u16 clobbers;		/* what registers you may clobber */
-};
-
-extern struct paravirt_patch_site __parainstructions[],
-	__parainstructions_end[];
-
 #ifdef CONFIG_X86_32
 #define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
 #define PV_RESTORE_REGS "popl %edx; popl %ecx;"
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
new file mode 100644
index 00000000000..2b3371bae29
--- /dev/null
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -0,0 +1,720 @@
+#ifndef _ASM_X86_PARAVIRT_TYPES_H
+#define _ASM_X86_PARAVIRT_TYPES_H
+
+/* Bitmask of what can be clobbered: usually at least eax. */
+#define CLBR_NONE 0
+#define CLBR_EAX  (1 << 0)
+#define CLBR_ECX  (1 << 1)
+#define CLBR_EDX  (1 << 2)
+#define CLBR_EDI  (1 << 3)
+
+#ifdef CONFIG_X86_32
+/* CLBR_ANY should match all regs platform has. For i386, that's just it */
+#define CLBR_ANY  ((1 << 4) - 1)
+
+#define CLBR_ARG_REGS	(CLBR_EAX | CLBR_EDX | CLBR_ECX)
+#define CLBR_RET_REG	(CLBR_EAX | CLBR_EDX)
+#define CLBR_SCRATCH	(0)
+#else
+#define CLBR_RAX  CLBR_EAX
+#define CLBR_RCX  CLBR_ECX
+#define CLBR_RDX  CLBR_EDX
+#define CLBR_RDI  CLBR_EDI
+#define CLBR_RSI  (1 << 4)
+#define CLBR_R8   (1 << 5)
+#define CLBR_R9   (1 << 6)
+#define CLBR_R10  (1 << 7)
+#define CLBR_R11  (1 << 8)
+
+#define CLBR_ANY  ((1 << 9) - 1)
+
+#define CLBR_ARG_REGS	(CLBR_RDI | CLBR_RSI | CLBR_RDX | \
+			 CLBR_RCX | CLBR_R8 | CLBR_R9)
+#define CLBR_RET_REG	(CLBR_RAX)
+#define CLBR_SCRATCH	(CLBR_R10 | CLBR_R11)
+
+#endif /* X86_64 */
+
+#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG)
+
+#ifndef __ASSEMBLY__
+
+#include <asm/desc_defs.h>
+#include <asm/kmap_types.h>
+
+struct page;
+struct thread_struct;
+struct desc_ptr;
+struct tss_struct;
+struct mm_struct;
+struct desc_struct;
+struct task_struct;
+struct cpumask;
+
+/*
+ * Wrapper type for pointers to code which uses the non-standard
+ * calling convention.  See PV_CALL_SAVE_REGS_THUNK below.
+ */
+struct paravirt_callee_save {
+	void *func;
+};
+
+/* general info */
+struct pv_info {
+	unsigned int kernel_rpl;
+	int shared_kernel_pmd;
+	int paravirt_enabled;
+	const char *name;
+};
+
+struct pv_init_ops {
+	/*
+	 * Patch may replace one of the defined code sequences with
+	 * arbitrary code, subject to the same register constraints.
+	 * This generally means the code is not free to clobber any
+	 * registers other than EAX.  The patch function should return
+	 * the number of bytes of code generated, as we nop pad the
+	 * rest in generic code.
+	 */
+	unsigned (*patch)(u8 type, u16 clobber, void *insnbuf,
+			  unsigned long addr, unsigned len);
+
+	/* Basic arch-specific setup */
+	void (*arch_setup)(void);
+	char *(*memory_setup)(void);
+	void (*post_allocator_init)(void);
+
+	/* Print a banner to identify the environment */
+	void (*banner)(void);
+};
+
+
+struct pv_lazy_ops {
+	/* Set deferred update mode, used for batching operations. */
+	void (*enter)(void);
+	void (*leave)(void);
+};
+
+struct pv_time_ops {
+	void (*time_init)(void);
+
+	/* Set and set time of day */
+	unsigned long (*get_wallclock)(void);
+	int (*set_wallclock)(unsigned long);
+
+	unsigned long long (*sched_clock)(void);
+	unsigned long (*get_tsc_khz)(void);
+};
+
+struct pv_cpu_ops {
+	/* hooks for various privileged instructions */
+	unsigned long (*get_debugreg)(int regno);
+	void (*set_debugreg)(int regno, unsigned long value);
+
+	void (*clts)(void);
+
+	unsigned long (*read_cr0)(void);
+	void (*write_cr0)(unsigned long);
+
+	unsigned long (*read_cr4_safe)(void);
+	unsigned long (*read_cr4)(void);
+	void (*write_cr4)(unsigned long);
+
+#ifdef CONFIG_X86_64
+	unsigned long (*read_cr8)(void);
+	void (*write_cr8)(unsigned long);
+#endif
+
+	/* Segment descriptor handling */
+	void (*load_tr_desc)(void);
+	void (*load_gdt)(const struct desc_ptr *);
+	void (*load_idt)(const struct desc_ptr *);
+	void (*store_gdt)(struct desc_ptr *);
+	void (*store_idt)(struct desc_ptr *);
+	void (*set_ldt)(const void *desc, unsigned entries);
+	unsigned long (*store_tr)(void);
+	void (*load_tls)(struct thread_struct *t, unsigned int cpu);
+#ifdef CONFIG_X86_64
+	void (*load_gs_index)(unsigned int idx);
+#endif
+	void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum,
+				const void *desc);
+	void (*write_gdt_entry)(struct desc_struct *,
+				int entrynum, const void *desc, int size);
+	void (*write_idt_entry)(gate_desc *,
+				int entrynum, const gate_desc *gate);
+	void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
+	void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
+
+	void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
+
+	void (*set_iopl_mask)(unsigned mask);
+
+	void (*wbinvd)(void);
+	void (*io_delay)(void);
+
+	/* cpuid emulation, mostly so that caps bits can be disabled */
+	void (*cpuid)(unsigned int *eax, unsigned int *ebx,
+		      unsigned int *ecx, unsigned int *edx);
+
+	/* MSR, PMC and TSR operations.
+	   err = 0/-EFAULT.  wrmsr returns 0/-EFAULT. */
+	u64 (*read_msr_amd)(unsigned int msr, int *err);
+	u64 (*read_msr)(unsigned int msr, int *err);
+	int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
+
+	u64 (*read_tsc)(void);
+	u64 (*read_pmc)(int counter);
+	unsigned long long (*read_tscp)(unsigned int *aux);
+
+	/*
+	 * Atomically enable interrupts and return to userspace.  This
+	 * is only ever used to return to 32-bit processes; in a
+	 * 64-bit kernel, it's used for 32-on-64 compat processes, but
+	 * never native 64-bit processes.  (Jump, not call.)
+	 */
+	void (*irq_enable_sysexit)(void);
+
+	/*
+	 * Switch to usermode gs and return to 64-bit usermode using
+	 * sysret.  Only used in 64-bit kernels to return to 64-bit
+	 * processes.  Usermode register state, including %rsp, must
+	 * already be restored.
+	 */
+	void (*usergs_sysret64)(void);
+
+	/*
+	 * Switch to usermode gs and return to 32-bit usermode using
+	 * sysret.  Used to return to 32-on-64 compat processes.
+	 * Other usermode register state, including %esp, must already
+	 * be restored.
+	 */
+	void (*usergs_sysret32)(void);
+
+	/* Normal iret.  Jump to this with the standard iret stack
+	   frame set up. */
+	void (*iret)(void);
+
+	void (*swapgs)(void);
+
+	void (*start_context_switch)(struct task_struct *prev);
+	void (*end_context_switch)(struct task_struct *next);
+};
+
+struct pv_irq_ops {
+	void (*init_IRQ)(void);
+
+	/*
+	 * Get/set interrupt state.  save_fl and restore_fl are only
+	 * expected to use X86_EFLAGS_IF; all other bits
+	 * returned from save_fl are undefined, and may be ignored by
+	 * restore_fl.
+	 *
+	 * NOTE: These functions callers expect the callee to preserve
+	 * more registers than the standard C calling convention.
+	 */
+	struct paravirt_callee_save save_fl;
+	struct paravirt_callee_save restore_fl;
+	struct paravirt_callee_save irq_disable;
+	struct paravirt_callee_save irq_enable;
+
+	void (*safe_halt)(void);
+	void (*halt)(void);
+
+#ifdef CONFIG_X86_64
+	void (*adjust_exception_frame)(void);
+#endif
+};
+
+struct pv_apic_ops {
+#ifdef CONFIG_X86_LOCAL_APIC
+	void (*setup_boot_clock)(void);
+	void (*setup_secondary_clock)(void);
+
+	void (*startup_ipi_hook)(int phys_apicid,
+				 unsigned long start_eip,
+				 unsigned long start_esp);
+#endif
+};
+
+struct pv_mmu_ops {
+	/*
+	 * Called before/after init_mm pagetable setup. setup_start
+	 * may reset %cr3, and may pre-install parts of the pagetable;
+	 * pagetable setup is expected to preserve any existing
+	 * mapping.
+	 */
+	void (*pagetable_setup_start)(pgd_t *pgd_base);
+	void (*pagetable_setup_done)(pgd_t *pgd_base);
+
+	unsigned long (*read_cr2)(void);
+	void (*write_cr2)(unsigned long);
+
+	unsigned long (*read_cr3)(void);
+	void (*write_cr3)(unsigned long);
+
+	/*
+	 * Hooks for intercepting the creation/use/destruction of an
+	 * mm_struct.
+	 */
+	void (*activate_mm)(struct mm_struct *prev,
+			    struct mm_struct *next);
+	void (*dup_mmap)(struct mm_struct *oldmm,
+			 struct mm_struct *mm);
+	void (*exit_mmap)(struct mm_struct *mm);
+
+
+	/* TLB operations */
+	void (*flush_tlb_user)(void);
+	void (*flush_tlb_kernel)(void);
+	void (*flush_tlb_single)(unsigned long addr);
+	void (*flush_tlb_others)(const struct cpumask *cpus,
+				 struct mm_struct *mm,
+				 unsigned long va);
+
+	/* Hooks for allocating and freeing a pagetable top-level */
+	int  (*pgd_alloc)(struct mm_struct *mm);
+	void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd);
+
+	/*
+	 * Hooks for allocating/releasing pagetable pages when they're
+	 * attached to a pagetable
+	 */
+	void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn);
+	void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn);
+	void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count);
+	void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn);
+	void (*release_pte)(unsigned long pfn);
+	void (*release_pmd)(unsigned long pfn);
+	void (*release_pud)(unsigned long pfn);
+
+	/* Pagetable manipulation functions */
+	void (*set_pte)(pte_t *ptep, pte_t pteval);
+	void (*set_pte_at)(struct mm_struct *mm, unsigned long addr,
+			   pte_t *ptep, pte_t pteval);
+	void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
+	void (*pte_update)(struct mm_struct *mm, unsigned long addr,
+			   pte_t *ptep);
+	void (*pte_update_defer)(struct mm_struct *mm,
+				 unsigned long addr, pte_t *ptep);
+
+	pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
+					pte_t *ptep);
+	void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr,
+					pte_t *ptep, pte_t pte);
+
+	struct paravirt_callee_save pte_val;
+	struct paravirt_callee_save make_pte;
+
+	struct paravirt_callee_save pgd_val;
+	struct paravirt_callee_save make_pgd;
+
+#if PAGETABLE_LEVELS >= 3
+#ifdef CONFIG_X86_PAE
+	void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
+	void (*pte_clear)(struct mm_struct *mm, unsigned long addr,
+			  pte_t *ptep);
+	void (*pmd_clear)(pmd_t *pmdp);
+
+#endif	/* CONFIG_X86_PAE */
+
+	void (*set_pud)(pud_t *pudp, pud_t pudval);
+
+	struct paravirt_callee_save pmd_val;
+	struct paravirt_callee_save make_pmd;
+
+#if PAGETABLE_LEVELS == 4
+	struct paravirt_callee_save pud_val;
+	struct paravirt_callee_save make_pud;
+
+	void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
+#endif	/* PAGETABLE_LEVELS == 4 */
+#endif	/* PAGETABLE_LEVELS >= 3 */
+
+#ifdef CONFIG_HIGHPTE
+	void *(*kmap_atomic_pte)(struct page *page, enum km_type type);
+#endif
+
+	struct pv_lazy_ops lazy_mode;
+
+	/* dom0 ops */
+
+	/* Sometimes the physical address is a pfn, and sometimes its
+	   an mfn.  We can tell which is which from the index. */
+	void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx,
+			   phys_addr_t phys, pgprot_t flags);
+};
+
+struct raw_spinlock;
+struct pv_lock_ops {
+	int (*spin_is_locked)(struct raw_spinlock *lock);
+	int (*spin_is_contended)(struct raw_spinlock *lock);
+	void (*spin_lock)(struct raw_spinlock *lock);
+	void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags);
+	int (*spin_trylock)(struct raw_spinlock *lock);
+	void (*spin_unlock)(struct raw_spinlock *lock);
+};
+
+/* This contains all the paravirt structures: we get a convenient
+ * number for each function using the offset which we use to indicate
+ * what to patch. */
+struct paravirt_patch_template {
+	struct pv_init_ops pv_init_ops;
+	struct pv_time_ops pv_time_ops;
+	struct pv_cpu_ops pv_cpu_ops;
+	struct pv_irq_ops pv_irq_ops;
+	struct pv_apic_ops pv_apic_ops;
+	struct pv_mmu_ops pv_mmu_ops;
+	struct pv_lock_ops pv_lock_ops;
+};
+
+extern struct pv_info pv_info;
+extern struct pv_init_ops pv_init_ops;
+extern struct pv_time_ops pv_time_ops;
+extern struct pv_cpu_ops pv_cpu_ops;
+extern struct pv_irq_ops pv_irq_ops;
+extern struct pv_apic_ops pv_apic_ops;
+extern struct pv_mmu_ops pv_mmu_ops;
+extern struct pv_lock_ops pv_lock_ops;
+
+#define PARAVIRT_PATCH(x)					\
+	(offsetof(struct paravirt_patch_template, x) / sizeof(void *))
+
+#define paravirt_type(op)				\
+	[paravirt_typenum] "i" (PARAVIRT_PATCH(op)),	\
+	[paravirt_opptr] "i" (&(op))
+#define paravirt_clobber(clobber)		\
+	[paravirt_clobber] "i" (clobber)
+
+/*
+ * Generate some code, and mark it as patchable by the
+ * apply_paravirt() alternate instruction patcher.
+ */
+#define _paravirt_alt(insn_string, type, clobber)	\
+	"771:\n\t" insn_string "\n" "772:\n"		\
+	".pushsection .parainstructions,\"a\"\n"	\
+	_ASM_ALIGN "\n"					\
+	_ASM_PTR " 771b\n"				\
+	"  .byte " type "\n"				\
+	"  .byte 772b-771b\n"				\
+	"  .short " clobber "\n"			\
+	".popsection\n"
+
+/* Generate patchable code, with the default asm parameters. */
+#define paravirt_alt(insn_string)					\
+	_paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]")
+
+/* Simple instruction patching code. */
+#define DEF_NATIVE(ops, name, code) 					\
+	extern const char start_##ops##_##name[], end_##ops##_##name[];	\
+	asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
+
+unsigned paravirt_patch_nop(void);
+unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len);
+unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
+unsigned paravirt_patch_ignore(unsigned len);
+unsigned paravirt_patch_call(void *insnbuf,
+			     const void *target, u16 tgt_clobbers,
+			     unsigned long addr, u16 site_clobbers,
+			     unsigned len);
+unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
+			    unsigned long addr, unsigned len);
+unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
+				unsigned long addr, unsigned len);
+
+unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
+			      const char *start, const char *end);
+
+unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
+		      unsigned long addr, unsigned len);
+
+int paravirt_disable_iospace(void);
+
+/*
+ * This generates an indirect call based on the operation type number.
+ * The type number, computed in PARAVIRT_PATCH, is derived from the
+ * offset into the paravirt_patch_template structure, and can therefore be
+ * freely converted back into a structure offset.
+ */
+#define PARAVIRT_CALL	"call *%c[paravirt_opptr];"
+
+/*
+ * These macros are intended to wrap calls through one of the paravirt
+ * ops structs, so that they can be later identified and patched at
+ * runtime.
+ *
+ * Normally, a call to a pv_op function is a simple indirect call:
+ * (pv_op_struct.operations)(args...).
+ *
+ * Unfortunately, this is a relatively slow operation for modern CPUs,
+ * because it cannot necessarily determine what the destination
+ * address is.  In this case, the address is a runtime constant, so at
+ * the very least we can patch the call to e a simple direct call, or
+ * ideally, patch an inline implementation into the callsite.  (Direct
+ * calls are essentially free, because the call and return addresses
+ * are completely predictable.)
+ *
+ * For i386, these macros rely on the standard gcc "regparm(3)" calling
+ * convention, in which the first three arguments are placed in %eax,
+ * %edx, %ecx (in that order), and the remaining arguments are placed
+ * on the stack.  All caller-save registers (eax,edx,ecx) are expected
+ * to be modified (either clobbered or used for return values).
+ * X86_64, on the other hand, already specifies a register-based calling
+ * conventions, returning at %rax, with parameteres going on %rdi, %rsi,
+ * %rdx, and %rcx. Note that for this reason, x86_64 does not need any
+ * special handling for dealing with 4 arguments, unlike i386.
+ * However, x86_64 also have to clobber all caller saved registers, which
+ * unfortunately, are quite a bit (r8 - r11)
+ *
+ * The call instruction itself is marked by placing its start address
+ * and size into the .parainstructions section, so that
+ * apply_paravirt() in arch/i386/kernel/alternative.c can do the
+ * appropriate patching under the control of the backend pv_init_ops
+ * implementation.
+ *
+ * Unfortunately there's no way to get gcc to generate the args setup
+ * for the call, and then allow the call itself to be generated by an
+ * inline asm.  Because of this, we must do the complete arg setup and
+ * return value handling from within these macros.  This is fairly
+ * cumbersome.
+ *
+ * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments.
+ * It could be extended to more arguments, but there would be little
+ * to be gained from that.  For each number of arguments, there are
+ * the two VCALL and CALL variants for void and non-void functions.
+ *
+ * When there is a return value, the invoker of the macro must specify
+ * the return type.  The macro then uses sizeof() on that type to
+ * determine whether its a 32 or 64 bit value, and places the return
+ * in the right register(s) (just %eax for 32-bit, and %edx:%eax for
+ * 64-bit). For x86_64 machines, it just returns at %rax regardless of
+ * the return value size.
+ *
+ * 64-bit arguments are passed as a pair of adjacent 32-bit arguments
+ * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments
+ * in low,high order
+ *
+ * Small structures are passed and returned in registers.  The macro
+ * calling convention can't directly deal with this, so the wrapper
+ * functions must do this.
+ *
+ * These PVOP_* macros are only defined within this header.  This
+ * means that all uses must be wrapped in inline functions.  This also
+ * makes sure the incoming and outgoing types are always correct.
+ */
+#ifdef CONFIG_X86_32
+#define PVOP_VCALL_ARGS				\
+	unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx
+#define PVOP_CALL_ARGS			PVOP_VCALL_ARGS
+
+#define PVOP_CALL_ARG1(x)		"a" ((unsigned long)(x))
+#define PVOP_CALL_ARG2(x)		"d" ((unsigned long)(x))
+#define PVOP_CALL_ARG3(x)		"c" ((unsigned long)(x))
+
+#define PVOP_VCALL_CLOBBERS		"=a" (__eax), "=d" (__edx),	\
+					"=c" (__ecx)
+#define PVOP_CALL_CLOBBERS		PVOP_VCALL_CLOBBERS
+
+#define PVOP_VCALLEE_CLOBBERS		"=a" (__eax), "=d" (__edx)
+#define PVOP_CALLEE_CLOBBERS		PVOP_VCALLEE_CLOBBERS
+
+#define EXTRA_CLOBBERS
+#define VEXTRA_CLOBBERS
+#else  /* CONFIG_X86_64 */
+#define PVOP_VCALL_ARGS					\
+	unsigned long __edi = __edi, __esi = __esi,	\
+		__edx = __edx, __ecx = __ecx
+#define PVOP_CALL_ARGS		PVOP_VCALL_ARGS, __eax
+
+#define PVOP_CALL_ARG1(x)		"D" ((unsigned long)(x))
+#define PVOP_CALL_ARG2(x)		"S" ((unsigned long)(x))
+#define PVOP_CALL_ARG3(x)		"d" ((unsigned long)(x))
+#define PVOP_CALL_ARG4(x)		"c" ((unsigned long)(x))
+
+#define PVOP_VCALL_CLOBBERS	"=D" (__edi),				\
+				"=S" (__esi), "=d" (__edx),		\
+				"=c" (__ecx)
+#define PVOP_CALL_CLOBBERS	PVOP_VCALL_CLOBBERS, "=a" (__eax)
+
+#define PVOP_VCALLEE_CLOBBERS	"=a" (__eax)
+#define PVOP_CALLEE_CLOBBERS	PVOP_VCALLEE_CLOBBERS
+
+#define EXTRA_CLOBBERS	 , "r8", "r9", "r10", "r11"
+#define VEXTRA_CLOBBERS	 , "rax", "r8", "r9", "r10", "r11"
+#endif	/* CONFIG_X86_32 */
+
+#ifdef CONFIG_PARAVIRT_DEBUG
+#define PVOP_TEST_NULL(op)	BUG_ON(op == NULL)
+#else
+#define PVOP_TEST_NULL(op)	((void)op)
+#endif
+
+#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr,		\
+		      pre, post, ...)					\
+	({								\
+		rettype __ret;						\
+		PVOP_CALL_ARGS;						\
+		PVOP_TEST_NULL(op);					\
+		/* This is 32-bit specific, but is okay in 64-bit */	\
+		/* since this condition will never hold */		\
+		if (sizeof(rettype) > sizeof(unsigned long)) {		\
+			asm volatile(pre				\
+				     paravirt_alt(PARAVIRT_CALL)	\
+				     post				\
+				     : call_clbr			\
+				     : paravirt_type(op),		\
+				       paravirt_clobber(clbr),		\
+				       ##__VA_ARGS__			\
+				     : "memory", "cc" extra_clbr);	\
+			__ret = (rettype)((((u64)__edx) << 32) | __eax); \
+		} else {						\
+			asm volatile(pre				\
+				     paravirt_alt(PARAVIRT_CALL)	\
+				     post				\
+				     : call_clbr			\
+				     : paravirt_type(op),		\
+				       paravirt_clobber(clbr),		\
+				       ##__VA_ARGS__			\
+				     : "memory", "cc" extra_clbr);	\
+			__ret = (rettype)__eax;				\
+		}							\
+		__ret;							\
+	})
+
+#define __PVOP_CALL(rettype, op, pre, post, ...)			\
+	____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS,	\
+		      EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__)
+
+#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...)			\
+	____PVOP_CALL(rettype, op.func, CLBR_RET_REG,			\
+		      PVOP_CALLEE_CLOBBERS, ,				\
+		      pre, post, ##__VA_ARGS__)
+
+
+#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...)	\
+	({								\
+		PVOP_VCALL_ARGS;					\
+		PVOP_TEST_NULL(op);					\
+		asm volatile(pre					\
+			     paravirt_alt(PARAVIRT_CALL)		\
+			     post					\
+			     : call_clbr				\
+			     : paravirt_type(op),			\
+			       paravirt_clobber(clbr),			\
+			       ##__VA_ARGS__				\
+			     : "memory", "cc" extra_clbr);		\
+	})
+
+#define __PVOP_VCALL(op, pre, post, ...)				\
+	____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS,		\
+		       VEXTRA_CLOBBERS,					\
+		       pre, post, ##__VA_ARGS__)
+
+#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...)			\
+	____PVOP_CALL(rettype, op.func, CLBR_RET_REG,			\
+		      PVOP_VCALLEE_CLOBBERS, ,				\
+		      pre, post, ##__VA_ARGS__)
+
+
+
+#define PVOP_CALL0(rettype, op)						\
+	__PVOP_CALL(rettype, op, "", "")
+#define PVOP_VCALL0(op)							\
+	__PVOP_VCALL(op, "", "")
+
+#define PVOP_CALLEE0(rettype, op)					\
+	__PVOP_CALLEESAVE(rettype, op, "", "")
+#define PVOP_VCALLEE0(op)						\
+	__PVOP_VCALLEESAVE(op, "", "")
+
+
+#define PVOP_CALL1(rettype, op, arg1)					\
+	__PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
+#define PVOP_VCALL1(op, arg1)						\
+	__PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1))
+
+#define PVOP_CALLEE1(rettype, op, arg1)					\
+	__PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
+#define PVOP_VCALLEE1(op, arg1)						\
+	__PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1))
+
+
+#define PVOP_CALL2(rettype, op, arg1, arg2)				\
+	__PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1),		\
+		    PVOP_CALL_ARG2(arg2))
+#define PVOP_VCALL2(op, arg1, arg2)					\
+	__PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1),			\
+		     PVOP_CALL_ARG2(arg2))
+
+#define PVOP_CALLEE2(rettype, op, arg1, arg2)				\
+	__PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1),	\
+			  PVOP_CALL_ARG2(arg2))
+#define PVOP_VCALLEE2(op, arg1, arg2)					\
+	__PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1),		\
+			   PVOP_CALL_ARG2(arg2))
+
+
+#define PVOP_CALL3(rettype, op, arg1, arg2, arg3)			\
+	__PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1),		\
+		    PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
+#define PVOP_VCALL3(op, arg1, arg2, arg3)				\
+	__PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1),			\
+		     PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
+
+/* This is the only difference in x86_64. We can make it much simpler */
+#ifdef CONFIG_X86_32
+#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4)			\
+	__PVOP_CALL(rettype, op,					\
+		    "push %[_arg4];", "lea 4(%%esp),%%esp;",		\
+		    PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),		\
+		    PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4)))
+#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4)				\
+	__PVOP_VCALL(op,						\
+		    "push %[_arg4];", "lea 4(%%esp),%%esp;",		\
+		    "0" ((u32)(arg1)), "1" ((u32)(arg2)),		\
+		    "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
+#else
+#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4)			\
+	__PVOP_CALL(rettype, op, "", "",				\
+		    PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),		\
+		    PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
+#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4)				\
+	__PVOP_VCALL(op, "", "",					\
+		     PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),	\
+		     PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
+#endif
+
+/* Lazy mode for batching updates / context switch */
+enum paravirt_lazy_mode {
+	PARAVIRT_LAZY_NONE,
+	PARAVIRT_LAZY_MMU,
+	PARAVIRT_LAZY_CPU,
+};
+
+enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
+void paravirt_start_context_switch(struct task_struct *prev);
+void paravirt_end_context_switch(struct task_struct *next);
+
+void paravirt_enter_lazy_mmu(void);
+void paravirt_leave_lazy_mmu(void);
+
+void _paravirt_nop(void);
+u32 _paravirt_ident_32(u32);
+u64 _paravirt_ident_64(u64);
+
+#define paravirt_nop	((void *)_paravirt_nop)
+
+/* These all sit in the .parainstructions section to tell us what to patch. */
+struct paravirt_patch_site {
+	u8 *instr; 		/* original instructions */
+	u8 instrtype;		/* type of this instruction */
+	u8 len;			/* length of original instruction */
+	u16 clobbers;		/* what registers you may clobber */
+};
+
+extern struct paravirt_patch_site __parainstructions[],
+	__parainstructions_end[];
+
+#endif	/* __ASSEMBLY__ */
+
+#endif	/* _ASM_X86_PARAVIRT_TYPES_H */
-- 
cgit v1.2.3


From e6e9cac8c3417b43498b243c1f8f11780e157168 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Fri, 24 Apr 2009 00:40:59 -0700
Subject: x86: split out core __math_state_restore

Split the core fpu state restoration out into __math_state_restore, which
assumes that cr0.TS is clear and that the fpu context has been initialized.

This will be used during context switch.  There are two reasons this is
desireable:

- There's a small clarification.  When __switch_to() calls math_state_restore,
  it relies on the fact that tsk_used_math() returns true, and so will
  never do a blocking init_fpu().  __math_state_restore() does not have
  (or need) that logic, so the question never arises.

- It allows the clts() to be moved earler in __switch_to() so it can be performed
  while cpu context updates are batched (will be done in a later patch).

[ Impact: refactor code to make reuse cleaner; no functional change ]

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/x86/include/asm/i387.h |  1 +
 arch/x86/kernel/traps.c     | 33 +++++++++++++++++++++++----------
 2 files changed, 24 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 175adf58dd4..2e7529295f5 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -26,6 +26,7 @@ extern void fpu_init(void);
 extern void mxcsr_feature_mask_init(void);
 extern int init_fpu(struct task_struct *child);
 extern asmlinkage void math_state_restore(void);
+extern void __math_state_restore(void);
 extern void init_thread_xstate(void);
 extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
 
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 5f935f0d586..71b91669ad1 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -813,6 +813,28 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
 {
 }
 
+/*
+ * __math_state_restore assumes that cr0.TS is already clear and the
+ * fpu state is all ready for use.  Used during context switch.
+ */
+void __math_state_restore(void)
+{
+	struct thread_info *thread = current_thread_info();
+	struct task_struct *tsk = thread->task;
+
+	/*
+	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
+	 */
+	if (unlikely(restore_fpu_checking(tsk))) {
+		stts();
+		force_sig(SIGSEGV, tsk);
+		return;
+	}
+
+	thread->status |= TS_USEDFPU;	/* So we fnsave on switch_to() */
+	tsk->fpu_counter++;
+}
+
 /*
  * 'math_state_restore()' saves the current math information in the
  * old math state array, and gets the new ones from the current task
@@ -844,17 +866,8 @@ asmlinkage void math_state_restore(void)
 	}
 
 	clts();				/* Allow maths ops (or we recurse) */
-	/*
-	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
-	 */
-	if (unlikely(restore_fpu_checking(tsk))) {
-		stts();
-		force_sig(SIGSEGV, tsk);
-		return;
-	}
 
-	thread->status |= TS_USEDFPU;	/* So we fnsave on switch_to() */
-	tsk->fpu_counter++;
+	__math_state_restore();
 }
 EXPORT_SYMBOL_GPL(math_state_restore);
 
-- 
cgit v1.2.3


From 2fcddce10f6771cfa0c56fd1e826d50d67d100b7 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Fri, 24 Apr 2009 00:45:26 -0700
Subject: x86-32: make sure clts is batched during context switch

If we're preloading the fpu state during context switch, make sure the clts
happens while we're batching the cpu context update, then do the actual
__math_state_restore once the updates are flushed.

This allows more efficient context switches when running paravirtualized,
as all the hypercalls can be folded together into one.

[ Impact: optimise paravirtual FPU context switch ]

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/x86/kernel/process_32.c | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 59f4524984a..a80eddd4165 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -350,14 +350,21 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 				 *next = &next_p->thread;
 	int cpu = smp_processor_id();
 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
+	bool preload_fpu;
 
 	/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
 
-	__unlazy_fpu(prev_p);
+	/*
+	 * If the task has used fpu the last 5 timeslices, just do a full
+	 * restore of the math state immediately to avoid the trap; the
+	 * chances of needing FPU soon are obviously high now
+	 */
+	preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
 
+	__unlazy_fpu(prev_p);
 
 	/* we're going to use this soon, after a few expensive things */
-	if (next_p->fpu_counter > 5)
+	if (preload_fpu)
 		prefetch(next->xstate);
 
 	/*
@@ -398,6 +405,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 		     task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
 		__switch_to_xtra(prev_p, next_p, tss);
 
+	/* If we're going to preload the fpu context, make sure clts
+	   is run while we're batching the cpu state updates. */
+	if (preload_fpu)
+		clts();
+
 	/*
 	 * Leave lazy mode, flushing any hypercalls made here.
 	 * This must be done before restoring TLS segments so
@@ -407,15 +419,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 */
 	arch_end_context_switch(next_p);
 
-	/* If the task has used fpu the last 5 timeslices, just do a full
-	 * restore of the math state immediately to avoid the trap; the
-	 * chances of needing FPU soon are obviously high now
-	 *
-	 * tsk_used_math() checks prevent calling math_state_restore(),
-	 * which can sleep in the case of !tsk_used_math()
-	 */
-	if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
-		math_state_restore();
+	if (preload_fpu)
+		__math_state_restore();
 
 	/*
 	 * Restore %gs if needed (which is common)
-- 
cgit v1.2.3


From 16d9dbf0c2bd167fdd942b83592d59696c7b73bd Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Fri, 24 Apr 2009 00:50:27 -0700
Subject: x86-64: move unlazy_fpu() into lazy cpu state part of context switch

Make sure that unlazy_fpu()'s stts gets batched along with the other
cpu state changes during context switch.  (32-bit already does this.)

This makes sure it gets batched when running paravirtualized.

[ Impact: optimise paravirtual FPU context switch ]

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/x86/kernel/process_64.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ebefb5407b9..c9b8904736d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -419,6 +419,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 
 	load_TLS(next, cpu);
 
+	/* Must be after DS reload */
+	unlazy_fpu(prev_p);
+
 	/*
 	 * Leave lazy mode, flushing any hypercalls made here.
 	 * This must be done before restoring TLS segments so
@@ -459,9 +462,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
 	prev->gsindex = gsindex;
 
-	/* Must be after DS reload */
-	unlazy_fpu(prev_p);
-
 	/*
 	 * Switch the PDA and FPU contexts.
 	 */
-- 
cgit v1.2.3


From 17950c5b243f99cbabef173415ee988c52104d7e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Fri, 24 Apr 2009 01:01:01 -0700
Subject: x86-64: move clts into batch cpu state updates when preloading fpu

When a task is likely to be using the fpu, we preload its state during
the context switch, rather than waiting for it to run an fpu instruction.
Make sure the clts() happens while we're doing batched fpu state updates
to optimise paravirtualized context switches.

[ Impact: optimise paravirtual FPU context switch ]

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/x86/kernel/process_64.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c9b8904736d..a28279dbb07 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -386,9 +386,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	int cpu = smp_processor_id();
 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
 	unsigned fsindex, gsindex;
+	bool preload_fpu;
+
+	/*
+	 * If the task has used fpu the last 5 timeslices, just do a full
+	 * restore of the math state immediately to avoid the trap; the
+	 * chances of needing FPU soon are obviously high now
+	 */
+	preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
 
 	/* we're going to use this soon, after a few expensive things */
-	if (next_p->fpu_counter > 5)
+	if (preload_fpu)
 		prefetch(next->xstate);
 
 	/*
@@ -422,6 +430,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	/* Must be after DS reload */
 	unlazy_fpu(prev_p);
 
+	/* Make sure cpu is ready for new context */
+	if (preload_fpu)
+		clts();
+
 	/*
 	 * Leave lazy mode, flushing any hypercalls made here.
 	 * This must be done before restoring TLS segments so
@@ -480,15 +492,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
 		__switch_to_xtra(prev_p, next_p, tss);
 
-	/* If the task has used fpu the last 5 timeslices, just do a full
-	 * restore of the math state immediately to avoid the trap; the
-	 * chances of needing FPU soon are obviously high now
-	 *
-	 * tsk_used_math() checks prevent calling math_state_restore(),
-	 * which can sleep in the case of !tsk_used_math()
+	/*
+	 * Preload the FPU context, now that we've determined that the
+	 * task is likely to be using it. 
 	 */
-	if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
-		math_state_restore();
+	if (preload_fpu)
+		__math_state_restore();
 	return prev_p;
 }
 
-- 
cgit v1.2.3


From 03b56ce54143a3a69d4fea6ff8130b1c903a47ce Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Thu, 18 Jun 2009 19:52:59 +0800
Subject: crypto: des_s390 - Permit weak keys unless REQ_WEAK_KEY set

Just started running fips cavs test vectors through an s390x system
for giggles, and discovered that I missed patching s390's arch-specific
des3 implementation w/an earlier des3 patch to permit weak keys.

This change adds the same flag tweaks as
ad79cdd77fc1466e45cf923890f66bcfe7c43f12 (crypto: des3_ede - permit
weak keys unless REQ_WEAK_KEY set) for s390's des3 implementation,
yields expected test results now.

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/crypto/des_s390.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 4aba83b3159..2bc479ab3a6 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -250,8 +250,9 @@ static int des3_128_setkey(struct crypto_tfm *tfm, const u8 *key,
 	const u8 *temp_key = key;
 	u32 *flags = &tfm->crt_flags;
 
-	if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE))) {
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_SCHED;
+	if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE)) &&
+	    (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
 	}
 	for (i = 0; i < 2; i++, temp_key += DES_KEY_SIZE) {
@@ -411,9 +412,9 @@ static int des3_192_setkey(struct crypto_tfm *tfm, const u8 *key,
 
 	if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
 	    memcmp(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2],
-		   DES_KEY_SIZE))) {
-
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_SCHED;
+		   DES_KEY_SIZE)) &&
+	    (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
 	}
 	for (i = 0; i < 3; i++, temp_key += DES_KEY_SIZE) {
-- 
cgit v1.2.3


From 21e70878215f620fe99ea7d7c74bc641aeec932f Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Thu, 18 Jun 2009 17:09:27 +0530
Subject: x86: oprofile/op_model_amd.c set return values for
 op_amd_handle_ibs()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

op_amd_handle_ibs() should return 0 when IBS is not present or not defined.

Fix compilation warning:
 CC [M]  arch/x86/oprofile/op_model_amd.o
 arch/x86/oprofile/op_model_amd.c: In function ‘op_amd_handle_ibs’:
 arch/x86/oprofile/op_model_amd.c:217: warning: no return statement in function returning non-void

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index cc930467575..e95268eb922 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -132,7 +132,7 @@ op_amd_handle_ibs(struct pt_regs * const regs,
 	struct op_entry entry;
 
 	if (!has_ibs)
-		return 1;
+		return 0;
 
 	if (ibs_config.fetch_enabled) {
 		rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
@@ -214,7 +214,10 @@ static void op_amd_stop_ibs(void)
 #else
 
 static inline int op_amd_handle_ibs(struct pt_regs * const regs,
-				    struct op_msrs const * const msrs) { }
+				    struct op_msrs const * const msrs)
+{
+	return 0;
+}
 static inline void op_amd_start_ibs(void) { }
 static inline void op_amd_stop_ibs(void) { }
 
-- 
cgit v1.2.3


From 4adc667593f83a18a8e54ce94f250fd166a272ac Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 18 Jun 2009 21:48:16 +0200
Subject: x86: add copies of some headers to convert to asm-generic

Just an intermediate step to make reviewing easier.
These files are identical copies of the existing headers.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
LKML-Reference: <cover.1245354003.git.arnd@arndb.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/generic-mman.h        | 20 ++++++++
 arch/x86/include/asm/generic-module.h      | 80 ++++++++++++++++++++++++++++++
 arch/x86/include/asm/generic-scatterlist.h | 33 ++++++++++++
 arch/x86/include/asm/generic-types.h       | 30 +++++++++++
 arch/x86/include/asm/generic-ucontext.h    | 18 +++++++
 5 files changed, 181 insertions(+)
 create mode 100644 arch/x86/include/asm/generic-mman.h
 create mode 100644 arch/x86/include/asm/generic-module.h
 create mode 100644 arch/x86/include/asm/generic-scatterlist.h
 create mode 100644 arch/x86/include/asm/generic-types.h
 create mode 100644 arch/x86/include/asm/generic-ucontext.h

(limited to 'arch')

diff --git a/arch/x86/include/asm/generic-mman.h b/arch/x86/include/asm/generic-mman.h
new file mode 100644
index 00000000000..751af2550ed
--- /dev/null
+++ b/arch/x86/include/asm/generic-mman.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_X86_MMAN_H
+#define _ASM_X86_MMAN_H
+
+#include <asm-generic/mman-common.h>
+
+#define MAP_32BIT	0x40		/* only give out 32bit addresses */
+
+#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
+#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
+#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
+#define MAP_LOCKED	0x2000		/* pages are locked */
+#define MAP_NORESERVE	0x4000		/* don't check for reservations */
+#define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
+#define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+
+#define MCL_CURRENT	1		/* lock all current mappings */
+#define MCL_FUTURE	2		/* lock all future mappings */
+
+#endif /* _ASM_X86_MMAN_H */
diff --git a/arch/x86/include/asm/generic-module.h b/arch/x86/include/asm/generic-module.h
new file mode 100644
index 00000000000..47d62743c4d
--- /dev/null
+++ b/arch/x86/include/asm/generic-module.h
@@ -0,0 +1,80 @@
+#ifndef _ASM_X86_MODULE_H
+#define _ASM_X86_MODULE_H
+
+/* x86_32/64 are simple */
+struct mod_arch_specific {};
+
+#ifdef CONFIG_X86_32
+# define Elf_Shdr Elf32_Shdr
+# define Elf_Sym Elf32_Sym
+# define Elf_Ehdr Elf32_Ehdr
+#else
+# define Elf_Shdr Elf64_Shdr
+# define Elf_Sym Elf64_Sym
+# define Elf_Ehdr Elf64_Ehdr
+#endif
+
+#ifdef CONFIG_X86_64
+/* X86_64 does not define MODULE_PROC_FAMILY */
+#elif defined CONFIG_M386
+#define MODULE_PROC_FAMILY "386 "
+#elif defined CONFIG_M486
+#define MODULE_PROC_FAMILY "486 "
+#elif defined CONFIG_M586
+#define MODULE_PROC_FAMILY "586 "
+#elif defined CONFIG_M586TSC
+#define MODULE_PROC_FAMILY "586TSC "
+#elif defined CONFIG_M586MMX
+#define MODULE_PROC_FAMILY "586MMX "
+#elif defined CONFIG_MCORE2
+#define MODULE_PROC_FAMILY "CORE2 "
+#elif defined CONFIG_M686
+#define MODULE_PROC_FAMILY "686 "
+#elif defined CONFIG_MPENTIUMII
+#define MODULE_PROC_FAMILY "PENTIUMII "
+#elif defined CONFIG_MPENTIUMIII
+#define MODULE_PROC_FAMILY "PENTIUMIII "
+#elif defined CONFIG_MPENTIUMM
+#define MODULE_PROC_FAMILY "PENTIUMM "
+#elif defined CONFIG_MPENTIUM4
+#define MODULE_PROC_FAMILY "PENTIUM4 "
+#elif defined CONFIG_MK6
+#define MODULE_PROC_FAMILY "K6 "
+#elif defined CONFIG_MK7
+#define MODULE_PROC_FAMILY "K7 "
+#elif defined CONFIG_MK8
+#define MODULE_PROC_FAMILY "K8 "
+#elif defined CONFIG_X86_ELAN
+#define MODULE_PROC_FAMILY "ELAN "
+#elif defined CONFIG_MCRUSOE
+#define MODULE_PROC_FAMILY "CRUSOE "
+#elif defined CONFIG_MEFFICEON
+#define MODULE_PROC_FAMILY "EFFICEON "
+#elif defined CONFIG_MWINCHIPC6
+#define MODULE_PROC_FAMILY "WINCHIPC6 "
+#elif defined CONFIG_MWINCHIP3D
+#define MODULE_PROC_FAMILY "WINCHIP3D "
+#elif defined CONFIG_MCYRIXIII
+#define MODULE_PROC_FAMILY "CYRIXIII "
+#elif defined CONFIG_MVIAC3_2
+#define MODULE_PROC_FAMILY "VIAC3-2 "
+#elif defined CONFIG_MVIAC7
+#define MODULE_PROC_FAMILY "VIAC7 "
+#elif defined CONFIG_MGEODEGX1
+#define MODULE_PROC_FAMILY "GEODEGX1 "
+#elif defined CONFIG_MGEODE_LX
+#define MODULE_PROC_FAMILY "GEODE "
+#else
+#error unknown processor family
+#endif
+
+#ifdef CONFIG_X86_32
+# ifdef CONFIG_4KSTACKS
+#  define MODULE_STACKSIZE "4KSTACKS "
+# else
+#  define MODULE_STACKSIZE ""
+# endif
+# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE
+#endif
+
+#endif /* _ASM_X86_MODULE_H */
diff --git a/arch/x86/include/asm/generic-scatterlist.h b/arch/x86/include/asm/generic-scatterlist.h
new file mode 100644
index 00000000000..263d397d2ee
--- /dev/null
+++ b/arch/x86/include/asm/generic-scatterlist.h
@@ -0,0 +1,33 @@
+#ifndef _ASM_X86_SCATTERLIST_H
+#define _ASM_X86_SCATTERLIST_H
+
+#include <asm/types.h>
+
+struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+	unsigned long	sg_magic;
+#endif
+	unsigned long	page_link;
+	unsigned int	offset;
+	unsigned int	length;
+	dma_addr_t	dma_address;
+	unsigned int	dma_length;
+};
+
+#define ARCH_HAS_SG_CHAIN
+#define ISA_DMA_THRESHOLD (0x00ffffff)
+
+/*
+ * These macros should be used after a pci_map_sg call has been done
+ * to get bus addresses of each of the SG entries and their lengths.
+ * You should only work with the number of sg entries pci_map_sg
+ * returns.
+ */
+#define sg_dma_address(sg)	((sg)->dma_address)
+#ifdef CONFIG_X86_32
+# define sg_dma_len(sg)		((sg)->length)
+#else
+# define sg_dma_len(sg)		((sg)->dma_length)
+#endif
+
+#endif /* _ASM_X86_SCATTERLIST_H */
diff --git a/arch/x86/include/asm/generic-types.h b/arch/x86/include/asm/generic-types.h
new file mode 100644
index 00000000000..09b97745772
--- /dev/null
+++ b/arch/x86/include/asm/generic-types.h
@@ -0,0 +1,30 @@
+#ifndef _ASM_X86_TYPES_H
+#define _ASM_X86_TYPES_H
+
+#include <asm-generic/int-ll64.h>
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned short umode_t;
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * These aren't exported outside the kernel to avoid name space clashes
+ */
+#ifdef __KERNEL__
+
+#ifndef __ASSEMBLY__
+
+typedef u64 dma64_addr_t;
+#if defined(CONFIG_X86_64) || defined(CONFIG_HIGHMEM64G)
+/* DMA addresses come in 32-bit and 64-bit flavours. */
+typedef u64 dma_addr_t;
+#else
+typedef u32 dma_addr_t;
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_X86_TYPES_H */
diff --git a/arch/x86/include/asm/generic-ucontext.h b/arch/x86/include/asm/generic-ucontext.h
new file mode 100644
index 00000000000..87324cf439d
--- /dev/null
+++ b/arch/x86/include/asm/generic-ucontext.h
@@ -0,0 +1,18 @@
+#ifndef _ASM_X86_UCONTEXT_H
+#define _ASM_X86_UCONTEXT_H
+
+#define UC_FP_XSTATE	0x1	/* indicates the presence of extended state
+				 * information in the memory layout pointed
+				 * by the fpstate pointer in the ucontext's
+				 * sigcontext struct (uc_mcontext).
+				 */
+
+struct ucontext {
+	unsigned long	  uc_flags;
+	struct ucontext  *uc_link;
+	stack_t		  uc_stack;
+	struct sigcontext uc_mcontext;
+	sigset_t	  uc_sigmask;	/* mask last for extensibility */
+};
+
+#endif /* _ASM_X86_UCONTEXT_H */
-- 
cgit v1.2.3


From 7bfd124d6dae7d394e73753300594a81a022fe7d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 18 Jun 2009 21:48:17 +0200
Subject: x86: convert trivial headers to asm-generic version

For these nine header files, the asm-generic version should
be semantically identical to what is in x86. Change the
contents to be binary identical, for better review.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
LKML-Reference: <cover.1245354003.git.arnd@arndb.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/ioctls.h   | 40 ++++++++++++++++++++++++++++------------
 arch/x86/include/asm/ipcbuf.h   | 15 ++++++++++-----
 arch/x86/include/asm/msgbuf.h   | 30 +++++++++++++++++++-----------
 arch/x86/include/asm/param.h    |  8 +++++---
 arch/x86/include/asm/shmbuf.h   | 28 ++++++++++++++++++----------
 arch/x86/include/asm/socket.h   | 10 +++++-----
 arch/x86/include/asm/sockios.h  |  6 +++---
 arch/x86/include/asm/termbits.h |  8 ++++----
 8 files changed, 92 insertions(+), 53 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/ioctls.h b/arch/x86/include/asm/ioctls.h
index 0d5b23b7b06..a799e20a769 100644
--- a/arch/x86/include/asm/ioctls.h
+++ b/arch/x86/include/asm/ioctls.h
@@ -1,12 +1,23 @@
-#ifndef _ASM_X86_IOCTLS_H
-#define _ASM_X86_IOCTLS_H
+#ifndef __ASM_GENERIC_IOCTLS_H
+#define __ASM_GENERIC_IOCTLS_H
 
-#include <asm/ioctl.h>
+#include <linux/ioctl.h>
+
+/*
+ * These are the most common definitions for tty ioctl numbers.
+ * Most of them do not use the recommended _IOC(), but there is
+ * probably some source code out there hardcoding the number,
+ * so we might as well use them for all new platforms.
+ *
+ * The architectures that use different values here typically
+ * try to be compatible with some Unix variants for the same
+ * architecture.
+ */
 
 /* 0x54 is just a magic number to make these relatively unique ('T') */
 
 #define TCGETS		0x5401
-#define TCSETS		0x5402 /* Clashes with SNDCTL_TMR_START sound ioctl */
+#define TCSETS		0x5402
 #define TCSETSW		0x5403
 #define TCSETSF		0x5404
 #define TCGETA		0x5405
@@ -43,7 +54,6 @@
 #define TIOCSETD	0x5423
 #define TIOCGETD	0x5424
 #define TCSBRKP		0x5425	/* Needed for POSIX tcsendbreak() */
-/* #define TIOCTTYGSTRUCT 0x5426 - Former debugging-only ioctl */
 #define TIOCSBRK	0x5427  /* BSD compatibility */
 #define TIOCCBRK	0x5428  /* BSD compatibility */
 #define TIOCGSID	0x5429  /* Return the session ID of FD */
@@ -53,8 +63,7 @@
 #define TCSETSF2	_IOW('T', 0x2D, struct termios2)
 #define TIOCGRS485	0x542E
 #define TIOCSRS485	0x542F
-#define TIOCGPTN	_IOR('T', 0x30, unsigned int)
-				/* Get Pty Number (of pty-mux device) */
+#define TIOCGPTN	_IOR('T', 0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
 #define TIOCSPTLCK	_IOW('T', 0x31, int)  /* Lock/unlock Pty */
 #define TCGETX		0x5432 /* SYS5 TCGETX compatibility */
 #define TCSETX		0x5433
@@ -76,9 +85,16 @@
 
 #define TIOCMIWAIT	0x545C	/* wait for a change on serial input line(s) */
 #define TIOCGICOUNT	0x545D	/* read serial port inline interrupt counts */
-#define TIOCGHAYESESP   0x545E  /* Get Hayes ESP configuration */
-#define TIOCSHAYESESP   0x545F  /* Set Hayes ESP configuration */
-#define FIOQSIZE	0x5460
+
+/*
+ * some architectures define FIOQSIZE as 0x545E, which is used for
+ * TIOCGHAYESESP on others
+ */
+#ifndef FIOQSIZE
+# define TIOCGHAYESESP	0x545E  /* Get Hayes ESP configuration */
+# define TIOCSHAYESESP	0x545F  /* Set Hayes ESP configuration */
+# define FIOQSIZE	0x5460
+#endif
 
 /* Used for packet mode */
 #define TIOCPKT_DATA		 0
@@ -89,6 +105,6 @@
 #define TIOCPKT_NOSTOP		16
 #define TIOCPKT_DOSTOP		32
 
-#define TIOCSER_TEMT    0x01	/* Transmitter physically empty */
+#define TIOCSER_TEMT	0x01	/* Transmitter physically empty */
 
-#endif /* _ASM_X86_IOCTLS_H */
+#endif /* __ASM_GENERIC_IOCTLS_H */
diff --git a/arch/x86/include/asm/ipcbuf.h b/arch/x86/include/asm/ipcbuf.h
index ee678fd5159..888ca1c8f95 100644
--- a/arch/x86/include/asm/ipcbuf.h
+++ b/arch/x86/include/asm/ipcbuf.h
@@ -1,13 +1,18 @@
-#ifndef _ASM_X86_IPCBUF_H
-#define _ASM_X86_IPCBUF_H
+#ifndef __ASM_GENERIC_IPCBUF_H
+#define __ASM_GENERIC_IPCBUF_H
 
 /*
- * The ipc64_perm structure for x86 architecture.
+ * The generic ipc64_perm structure:
  * Note extra padding because this structure is passed back and forth
  * between kernel and user space.
  *
+ * ipc64_perm was originally meant to be architecture specific, but
+ * everyone just ended up making identical copies without specific
+ * optimizations, so we may just as well all use the same one.
+ *
  * Pad space is left for:
- * - 32-bit mode_t and seq
+ * - 32-bit mode_t on architectures that only had 16 bit
+ * - 32-bit seq
  * - 2 miscellaneous 32-bit values
  */
 
@@ -25,4 +30,4 @@ struct ipc64_perm {
 	unsigned long		__unused2;
 };
 
-#endif /* _ASM_X86_IPCBUF_H */
+#endif /* __ASM_GENERIC_IPCBUF_H */
diff --git a/arch/x86/include/asm/msgbuf.h b/arch/x86/include/asm/msgbuf.h
index 7e4e9481f51..aec850d9159 100644
--- a/arch/x86/include/asm/msgbuf.h
+++ b/arch/x86/include/asm/msgbuf.h
@@ -1,30 +1,38 @@
-#ifndef _ASM_X86_MSGBUF_H
-#define _ASM_X86_MSGBUF_H
+#ifndef __ASM_GENERIC_MSGBUF_H
+#define __ASM_GENERIC_MSGBUF_H
 
+#include <asm/bitsperlong.h>
 /*
- * The msqid64_ds structure for i386 architecture.
+ * generic msqid64_ds structure.
+ *
  * Note extra padding because this structure is passed back and forth
  * between kernel and user space.
  *
- * Pad space on i386 is left for:
+ * msqid64_ds was originally meant to be architecture specific, but
+ * everyone just ended up making identical copies without specific
+ * optimizations, so we may just as well all use the same one.
+ *
+ * 64 bit architectures typically define a 64 bit __kernel_time_t,
+ * so they do not need the first three padding words.
+ * On big-endian systems, the padding is in the wrong place.
+ *
+ * Pad space is left for:
  * - 64-bit time_t to solve y2038 problem
  * - 2 miscellaneous 32-bit values
- *
- * Pad space on x8664 is left for:
- * - 2 miscellaneous 64-bit values
  */
+
 struct msqid64_ds {
 	struct ipc64_perm msg_perm;
 	__kernel_time_t msg_stime;	/* last msgsnd time */
-#ifdef __i386__
+#if __BITS_PER_LONG != 64
 	unsigned long	__unused1;
 #endif
 	__kernel_time_t msg_rtime;	/* last msgrcv time */
-#ifdef __i386__
+#if __BITS_PER_LONG != 64
 	unsigned long	__unused2;
 #endif
 	__kernel_time_t msg_ctime;	/* last change time */
-#ifdef __i386__
+#if __BITS_PER_LONG != 64
 	unsigned long	__unused3;
 #endif
 	unsigned long  msg_cbytes;	/* current number of bytes on queue */
@@ -36,4 +44,4 @@ struct msqid64_ds {
 	unsigned long  __unused5;
 };
 
-#endif /* _ASM_X86_MSGBUF_H */
+#endif /* __ASM_GENERIC_MSGBUF_H */
diff --git a/arch/x86/include/asm/param.h b/arch/x86/include/asm/param.h
index 6f0d0422f4c..cdf8251bfb6 100644
--- a/arch/x86/include/asm/param.h
+++ b/arch/x86/include/asm/param.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PARAM_H
-#define _ASM_X86_PARAM_H
+#ifndef __ASM_GENERIC_PARAM_H
+#define __ASM_GENERIC_PARAM_H
 
 #ifdef __KERNEL__
 # define HZ		CONFIG_HZ	/* Internal kernel timer frequency */
@@ -11,7 +11,9 @@
 #define HZ 100
 #endif
 
+#ifndef EXEC_PAGESIZE
 #define EXEC_PAGESIZE	4096
+#endif
 
 #ifndef NOGROUP
 #define NOGROUP		(-1)
@@ -19,4 +21,4 @@
 
 #define MAXHOSTNAMELEN	64	/* max length of hostname */
 
-#endif /* _ASM_X86_PARAM_H */
+#endif /* __ASM_GENERIC_PARAM_H */
diff --git a/arch/x86/include/asm/shmbuf.h b/arch/x86/include/asm/shmbuf.h
index b51413b7497..5768fa60ac8 100644
--- a/arch/x86/include/asm/shmbuf.h
+++ b/arch/x86/include/asm/shmbuf.h
@@ -1,32 +1,40 @@
-#ifndef _ASM_X86_SHMBUF_H
-#define _ASM_X86_SHMBUF_H
+#ifndef __ASM_GENERIC_SHMBUF_H
+#define __ASM_GENERIC_SHMBUF_H
+
+#include <asm/bitsperlong.h>
 
 /*
  * The shmid64_ds structure for x86 architecture.
  * Note extra padding because this structure is passed back and forth
  * between kernel and user space.
  *
- * Pad space on 32 bit is left for:
+ * shmid64_ds was originally meant to be architecture specific, but
+ * everyone just ended up making identical copies without specific
+ * optimizations, so we may just as well all use the same one.
+ *
+ * 64 bit architectures typically define a 64 bit __kernel_time_t,
+ * so they do not need the first two padding words.
+ * On big-endian systems, the padding is in the wrong place.
+ *
+ *
+ * Pad space is left for:
  * - 64-bit time_t to solve y2038 problem
  * - 2 miscellaneous 32-bit values
- *
- * Pad space on 64 bit is left for:
- * - 2 miscellaneous 64-bit values
  */
 
 struct shmid64_ds {
 	struct ipc64_perm	shm_perm;	/* operation perms */
 	size_t			shm_segsz;	/* size of segment (bytes) */
 	__kernel_time_t		shm_atime;	/* last attach time */
-#ifdef __i386__
+#if __BITS_PER_LONG != 64
 	unsigned long		__unused1;
 #endif
 	__kernel_time_t		shm_dtime;	/* last detach time */
-#ifdef __i386__
+#if __BITS_PER_LONG != 64
 	unsigned long		__unused2;
 #endif
 	__kernel_time_t		shm_ctime;	/* last change time */
-#ifdef __i386__
+#if __BITS_PER_LONG != 64
 	unsigned long		__unused3;
 #endif
 	__kernel_pid_t		shm_cpid;	/* pid of creator */
@@ -48,4 +56,4 @@ struct shminfo64 {
 	unsigned long	__unused4;
 };
 
-#endif /* _ASM_X86_SHMBUF_H */
+#endif /* __ASM_GENERIC_SHMBUF_H */
diff --git a/arch/x86/include/asm/socket.h b/arch/x86/include/asm/socket.h
index ca8bf2cd0ba..d4ae42a06a2 100644
--- a/arch/x86/include/asm/socket.h
+++ b/arch/x86/include/asm/socket.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SOCKET_H
-#define _ASM_X86_SOCKET_H
+#ifndef __ASM_GENERIC_SOCKET_H
+#define __ASM_GENERIC_SOCKET_H
 
 #include <asm/sockios.h>
 
@@ -38,8 +38,8 @@
 #define SO_BINDTODEVICE	25
 
 /* Socket filtering */
-#define SO_ATTACH_FILTER        26
-#define SO_DETACH_FILTER        27
+#define SO_ATTACH_FILTER	26
+#define SO_DETACH_FILTER	27
 
 #define SO_PEERNAME		28
 #define SO_TIMESTAMP		29
@@ -57,4 +57,4 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
-#endif /* _ASM_X86_SOCKET_H */
+#endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/arch/x86/include/asm/sockios.h b/arch/x86/include/asm/sockios.h
index 49cc72b5d3c..9a61a369b90 100644
--- a/arch/x86/include/asm/sockios.h
+++ b/arch/x86/include/asm/sockios.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SOCKIOS_H
-#define _ASM_X86_SOCKIOS_H
+#ifndef __ASM_GENERIC_SOCKIOS_H
+#define __ASM_GENERIC_SOCKIOS_H
 
 /* Socket-level I/O control calls. */
 #define FIOSETOWN	0x8901
@@ -10,4 +10,4 @@
 #define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
 #define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
 
-#endif /* _ASM_X86_SOCKIOS_H */
+#endif /* __ASM_GENERIC_SOCKIOS_H */
diff --git a/arch/x86/include/asm/termbits.h b/arch/x86/include/asm/termbits.h
index af1b70ea440..1c9773d48cb 100644
--- a/arch/x86/include/asm/termbits.h
+++ b/arch/x86/include/asm/termbits.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_TERMBITS_H
-#define _ASM_X86_TERMBITS_H
+#ifndef __ASM_GENERIC_TERMBITS_H
+#define __ASM_GENERIC_TERMBITS_H
 
 #include <linux/posix_types.h>
 
@@ -140,7 +140,7 @@ struct ktermios {
 #define HUPCL	0002000
 #define CLOCAL	0004000
 #define CBAUDEX 0010000
-#define	   BOTHER 0010000		/* non standard rate */
+#define    BOTHER 0010000
 #define    B57600 0010001
 #define   B115200 0010002
 #define   B230400 0010003
@@ -195,4 +195,4 @@ struct ktermios {
 #define	TCSADRAIN	1
 #define	TCSAFLUSH	2
 
-#endif /* _ASM_X86_TERMBITS_H */
+#endif /* __ASM_GENERIC_TERMBITS_H */
-- 
cgit v1.2.3


From 06f5013aa8eb5895ced2c71d13f5114103605555 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 18 Jun 2009 21:48:18 +0200
Subject: x86: convert almost generic headers to asm-generic version

In x86, mman.h, module.h, scatterlist.h, types.h and ucontext.h
can use the asm-generic version by just defining the x86
specific parts locally and falling back on the generic code
for the common bits.

This patch illustrates the differences between the x86 and
asm-generic versions by changing a file that is initially
identical to the x86 version to one that is identical
to the asm-generic version.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
LKML-Reference: <cover.1245354003.git.arnd@arndb.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/generic-mman.h        |  8 +--
 arch/x86/include/asm/generic-module.h      | 92 ++++++------------------------
 arch/x86/include/asm/generic-scatterlist.h | 34 +++++++----
 arch/x86/include/asm/generic-types.h       | 32 +++++++----
 arch/x86/include/asm/generic-ucontext.h    | 12 +---
 arch/x86/include/asm/mman.h                | 14 +----
 arch/x86/include/asm/module.h              | 13 +----
 arch/x86/include/asm/scatterlist.h         | 27 +--------
 arch/x86/include/asm/types.h               | 12 +---
 arch/x86/include/asm/ucontext.h            |  8 +--
 10 files changed, 73 insertions(+), 179 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/generic-mman.h b/arch/x86/include/asm/generic-mman.h
index 751af2550ed..7cab4de2bca 100644
--- a/arch/x86/include/asm/generic-mman.h
+++ b/arch/x86/include/asm/generic-mman.h
@@ -1,10 +1,8 @@
-#ifndef _ASM_X86_MMAN_H
-#define _ASM_X86_MMAN_H
+#ifndef __ASM_GENERIC_MMAN_H
+#define __ASM_GENERIC_MMAN_H
 
 #include <asm-generic/mman-common.h>
 
-#define MAP_32BIT	0x40		/* only give out 32bit addresses */
-
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
 #define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
@@ -17,4 +15,4 @@
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
 
-#endif /* _ASM_X86_MMAN_H */
+#endif /* __ASM_GENERIC_MMAN_H */
diff --git a/arch/x86/include/asm/generic-module.h b/arch/x86/include/asm/generic-module.h
index 47d62743c4d..ed5b44de4c9 100644
--- a/arch/x86/include/asm/generic-module.h
+++ b/arch/x86/include/asm/generic-module.h
@@ -1,80 +1,22 @@
-#ifndef _ASM_X86_MODULE_H
-#define _ASM_X86_MODULE_H
+#ifndef __ASM_GENERIC_MODULE_H
+#define __ASM_GENERIC_MODULE_H
 
-/* x86_32/64 are simple */
-struct mod_arch_specific {};
+/*
+ * Many architectures just need a simple module
+ * loader without arch specific data.
+ */
+struct mod_arch_specific
+{
+};
 
-#ifdef CONFIG_X86_32
-# define Elf_Shdr Elf32_Shdr
-# define Elf_Sym Elf32_Sym
-# define Elf_Ehdr Elf32_Ehdr
+#ifdef CONFIG_64BIT
+#define Elf_Shdr Elf64_Shdr
+#define Elf_Sym Elf64_Sym
+#define Elf_Ehdr Elf64_Ehdr
 #else
-# define Elf_Shdr Elf64_Shdr
-# define Elf_Sym Elf64_Sym
-# define Elf_Ehdr Elf64_Ehdr
+#define Elf_Shdr Elf32_Shdr
+#define Elf_Sym Elf32_Sym
+#define Elf_Ehdr Elf32_Ehdr
 #endif
 
-#ifdef CONFIG_X86_64
-/* X86_64 does not define MODULE_PROC_FAMILY */
-#elif defined CONFIG_M386
-#define MODULE_PROC_FAMILY "386 "
-#elif defined CONFIG_M486
-#define MODULE_PROC_FAMILY "486 "
-#elif defined CONFIG_M586
-#define MODULE_PROC_FAMILY "586 "
-#elif defined CONFIG_M586TSC
-#define MODULE_PROC_FAMILY "586TSC "
-#elif defined CONFIG_M586MMX
-#define MODULE_PROC_FAMILY "586MMX "
-#elif defined CONFIG_MCORE2
-#define MODULE_PROC_FAMILY "CORE2 "
-#elif defined CONFIG_M686
-#define MODULE_PROC_FAMILY "686 "
-#elif defined CONFIG_MPENTIUMII
-#define MODULE_PROC_FAMILY "PENTIUMII "
-#elif defined CONFIG_MPENTIUMIII
-#define MODULE_PROC_FAMILY "PENTIUMIII "
-#elif defined CONFIG_MPENTIUMM
-#define MODULE_PROC_FAMILY "PENTIUMM "
-#elif defined CONFIG_MPENTIUM4
-#define MODULE_PROC_FAMILY "PENTIUM4 "
-#elif defined CONFIG_MK6
-#define MODULE_PROC_FAMILY "K6 "
-#elif defined CONFIG_MK7
-#define MODULE_PROC_FAMILY "K7 "
-#elif defined CONFIG_MK8
-#define MODULE_PROC_FAMILY "K8 "
-#elif defined CONFIG_X86_ELAN
-#define MODULE_PROC_FAMILY "ELAN "
-#elif defined CONFIG_MCRUSOE
-#define MODULE_PROC_FAMILY "CRUSOE "
-#elif defined CONFIG_MEFFICEON
-#define MODULE_PROC_FAMILY "EFFICEON "
-#elif defined CONFIG_MWINCHIPC6
-#define MODULE_PROC_FAMILY "WINCHIPC6 "
-#elif defined CONFIG_MWINCHIP3D
-#define MODULE_PROC_FAMILY "WINCHIP3D "
-#elif defined CONFIG_MCYRIXIII
-#define MODULE_PROC_FAMILY "CYRIXIII "
-#elif defined CONFIG_MVIAC3_2
-#define MODULE_PROC_FAMILY "VIAC3-2 "
-#elif defined CONFIG_MVIAC7
-#define MODULE_PROC_FAMILY "VIAC7 "
-#elif defined CONFIG_MGEODEGX1
-#define MODULE_PROC_FAMILY "GEODEGX1 "
-#elif defined CONFIG_MGEODE_LX
-#define MODULE_PROC_FAMILY "GEODE "
-#else
-#error unknown processor family
-#endif
-
-#ifdef CONFIG_X86_32
-# ifdef CONFIG_4KSTACKS
-#  define MODULE_STACKSIZE "4KSTACKS "
-# else
-#  define MODULE_STACKSIZE ""
-# endif
-# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE
-#endif
-
-#endif /* _ASM_X86_MODULE_H */
+#endif /* __ASM_GENERIC_MODULE_H */
diff --git a/arch/x86/include/asm/generic-scatterlist.h b/arch/x86/include/asm/generic-scatterlist.h
index 263d397d2ee..8b9454496a7 100644
--- a/arch/x86/include/asm/generic-scatterlist.h
+++ b/arch/x86/include/asm/generic-scatterlist.h
@@ -1,7 +1,7 @@
-#ifndef _ASM_X86_SCATTERLIST_H
-#define _ASM_X86_SCATTERLIST_H
+#ifndef __ASM_GENERIC_SCATTERLIST_H
+#define __ASM_GENERIC_SCATTERLIST_H
 
-#include <asm/types.h>
+#include <linux/types.h>
 
 struct scatterlist {
 #ifdef CONFIG_DEBUG_SG
@@ -14,20 +14,30 @@ struct scatterlist {
 	unsigned int	dma_length;
 };
 
-#define ARCH_HAS_SG_CHAIN
-#define ISA_DMA_THRESHOLD (0x00ffffff)
-
 /*
- * These macros should be used after a pci_map_sg call has been done
+ * These macros should be used after a dma_map_sg call has been done
  * to get bus addresses of each of the SG entries and their lengths.
  * You should only work with the number of sg entries pci_map_sg
- * returns.
+ * returns, or alternatively stop on the first sg_dma_len(sg) which
+ * is 0.
  */
 #define sg_dma_address(sg)	((sg)->dma_address)
-#ifdef CONFIG_X86_32
-# define sg_dma_len(sg)		((sg)->length)
+#ifndef sg_dma_len
+/*
+ * Normally, you have an iommu on 64 bit machines, but not on 32 bit
+ * machines. Architectures that are differnt should override this.
+ */
+#if __BITS_PER_LONG == 64
+#define sg_dma_len(sg)		((sg)->dma_length)
 #else
-# define sg_dma_len(sg)		((sg)->dma_length)
+#define sg_dma_len(sg)		((sg)->length)
+#endif /* 64 bit */
+#endif /* sg_dma_len */
+
+#ifndef ISA_DMA_THRESHOLD
+#define ISA_DMA_THRESHOLD	(~0UL)
 #endif
 
-#endif /* _ASM_X86_SCATTERLIST_H */
+#define ARCH_HAS_SG_CHAIN
+
+#endif /* __ASM_GENERIC_SCATTERLIST_H */
diff --git a/arch/x86/include/asm/generic-types.h b/arch/x86/include/asm/generic-types.h
index 09b97745772..fba7d33ca3f 100644
--- a/arch/x86/include/asm/generic-types.h
+++ b/arch/x86/include/asm/generic-types.h
@@ -1,6 +1,9 @@
-#ifndef _ASM_X86_TYPES_H
-#define _ASM_X86_TYPES_H
-
+#ifndef _ASM_GENERIC_TYPES_H
+#define _ASM_GENERIC_TYPES_H
+/*
+ * int-ll64 is used practically everywhere now,
+ * so use it as a reasonable default.
+ */
 #include <asm-generic/int-ll64.h>
 
 #ifndef __ASSEMBLY__
@@ -13,18 +16,27 @@ typedef unsigned short umode_t;
  * These aren't exported outside the kernel to avoid name space clashes
  */
 #ifdef __KERNEL__
-
 #ifndef __ASSEMBLY__
-
-typedef u64 dma64_addr_t;
-#if defined(CONFIG_X86_64) || defined(CONFIG_HIGHMEM64G)
-/* DMA addresses come in 32-bit and 64-bit flavours. */
+/*
+ * DMA addresses may be very different from physical addresses
+ * and pointers. i386 and powerpc may have 64 bit DMA on 32 bit
+ * systems, while sparc64 uses 32 bit DMA addresses for 64 bit
+ * physical addresses.
+ * This default defines dma_addr_t to have the same size as
+ * phys_addr_t, which is the most common way.
+ * Do not define the dma64_addr_t type, which never really
+ * worked.
+ */
+#ifndef dma_addr_t
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
 typedef u64 dma_addr_t;
 #else
 typedef u32 dma_addr_t;
-#endif
+#endif /* CONFIG_PHYS_ADDR_T_64BIT */
+#endif /* dma_addr_t */
 
 #endif /* __ASSEMBLY__ */
+
 #endif /* __KERNEL__ */
 
-#endif /* _ASM_X86_TYPES_H */
+#endif /* _ASM_GENERIC_TYPES_H */
diff --git a/arch/x86/include/asm/generic-ucontext.h b/arch/x86/include/asm/generic-ucontext.h
index 87324cf439d..ad77343e8a9 100644
--- a/arch/x86/include/asm/generic-ucontext.h
+++ b/arch/x86/include/asm/generic-ucontext.h
@@ -1,11 +1,5 @@
-#ifndef _ASM_X86_UCONTEXT_H
-#define _ASM_X86_UCONTEXT_H
-
-#define UC_FP_XSTATE	0x1	/* indicates the presence of extended state
-				 * information in the memory layout pointed
-				 * by the fpstate pointer in the ucontext's
-				 * sigcontext struct (uc_mcontext).
-				 */
+#ifndef __ASM_GENERIC_UCONTEXT_H
+#define __ASM_GENERIC_UCONTEXT_H
 
 struct ucontext {
 	unsigned long	  uc_flags;
@@ -15,4 +9,4 @@ struct ucontext {
 	sigset_t	  uc_sigmask;	/* mask last for extensibility */
 };
 
-#endif /* _ASM_X86_UCONTEXT_H */
+#endif /* __ASM_GENERIC_UCONTEXT_H */
diff --git a/arch/x86/include/asm/mman.h b/arch/x86/include/asm/mman.h
index 751af2550ed..063d8c9e4d6 100644
--- a/arch/x86/include/asm/mman.h
+++ b/arch/x86/include/asm/mman.h
@@ -1,20 +1,8 @@
 #ifndef _ASM_X86_MMAN_H
 #define _ASM_X86_MMAN_H
 
-#include <asm-generic/mman-common.h>
-
 #define MAP_32BIT	0x40		/* only give out 32bit addresses */
 
-#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
-#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
-#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
-#define MAP_LOCKED	0x2000		/* pages are locked */
-#define MAP_NORESERVE	0x4000		/* don't check for reservations */
-#define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
-#define MAP_NONBLOCK	0x10000		/* do not block on IO */
-#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
-
-#define MCL_CURRENT	1		/* lock all current mappings */
-#define MCL_FUTURE	2		/* lock all future mappings */
+#include <asm/generic-mman.h>
 
 #endif /* _ASM_X86_MMAN_H */
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 47d62743c4d..4a7a192910d 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -1,18 +1,7 @@
 #ifndef _ASM_X86_MODULE_H
 #define _ASM_X86_MODULE_H
 
-/* x86_32/64 are simple */
-struct mod_arch_specific {};
-
-#ifdef CONFIG_X86_32
-# define Elf_Shdr Elf32_Shdr
-# define Elf_Sym Elf32_Sym
-# define Elf_Ehdr Elf32_Ehdr
-#else
-# define Elf_Shdr Elf64_Shdr
-# define Elf_Sym Elf64_Sym
-# define Elf_Ehdr Elf64_Ehdr
-#endif
+#include <asm/generic-module.h>
 
 #ifdef CONFIG_X86_64
 /* X86_64 does not define MODULE_PROC_FAMILY */
diff --git a/arch/x86/include/asm/scatterlist.h b/arch/x86/include/asm/scatterlist.h
index 263d397d2ee..2097d686471 100644
--- a/arch/x86/include/asm/scatterlist.h
+++ b/arch/x86/include/asm/scatterlist.h
@@ -1,33 +1,8 @@
 #ifndef _ASM_X86_SCATTERLIST_H
 #define _ASM_X86_SCATTERLIST_H
 
-#include <asm/types.h>
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-	unsigned long	sg_magic;
-#endif
-	unsigned long	page_link;
-	unsigned int	offset;
-	unsigned int	length;
-	dma_addr_t	dma_address;
-	unsigned int	dma_length;
-};
-
-#define ARCH_HAS_SG_CHAIN
 #define ISA_DMA_THRESHOLD (0x00ffffff)
 
-/*
- * These macros should be used after a pci_map_sg call has been done
- * to get bus addresses of each of the SG entries and their lengths.
- * You should only work with the number of sg entries pci_map_sg
- * returns.
- */
-#define sg_dma_address(sg)	((sg)->dma_address)
-#ifdef CONFIG_X86_32
-# define sg_dma_len(sg)		((sg)->length)
-#else
-# define sg_dma_len(sg)		((sg)->dma_length)
-#endif
+#include <asm/generic-scatterlist.h>
 
 #endif /* _ASM_X86_SCATTERLIST_H */
diff --git a/arch/x86/include/asm/types.h b/arch/x86/include/asm/types.h
index 09b97745772..f2fe528e968 100644
--- a/arch/x86/include/asm/types.h
+++ b/arch/x86/include/asm/types.h
@@ -1,19 +1,11 @@
 #ifndef _ASM_X86_TYPES_H
 #define _ASM_X86_TYPES_H
 
-#include <asm-generic/int-ll64.h>
+#define dma_addr_t	dma_addr_t
 
-#ifndef __ASSEMBLY__
-
-typedef unsigned short umode_t;
+#include <asm/generic-types.h>
 
-#endif /* __ASSEMBLY__ */
-
-/*
- * These aren't exported outside the kernel to avoid name space clashes
- */
 #ifdef __KERNEL__
-
 #ifndef __ASSEMBLY__
 
 typedef u64 dma64_addr_t;
diff --git a/arch/x86/include/asm/ucontext.h b/arch/x86/include/asm/ucontext.h
index 87324cf439d..7cfc436f86d 100644
--- a/arch/x86/include/asm/ucontext.h
+++ b/arch/x86/include/asm/ucontext.h
@@ -7,12 +7,6 @@
 				 * sigcontext struct (uc_mcontext).
 				 */
 
-struct ucontext {
-	unsigned long	  uc_flags;
-	struct ucontext  *uc_link;
-	stack_t		  uc_stack;
-	struct sigcontext uc_mcontext;
-	sigset_t	  uc_sigmask;	/* mask last for extensibility */
-};
+#include <asm/generic-ucontext.h>
 
 #endif /* _ASM_X86_UCONTEXT_H */
-- 
cgit v1.2.3


From 69d5ffdaad7b77b97229b55c36afb20e5bebd29e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 18 Jun 2009 21:48:19 +0200
Subject: x86: convert termios.h to the asm-generic version

This patch turned out more controversial than expected
and may get dropped in the future. I'm including it
for reference anyway.

The user_termio_to_kernel_termios and kernel_termios_to_user_termio
functions on x86 are lacking error checking from get_user and
are not portable to big-endian systems, so the asm-generic
header has to differ in this regard.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
LKML-Reference: <cover.1245354003.git.arnd@arndb.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/termios.h | 86 +++++++++++++++++++++++++++++++-----------
 1 file changed, 63 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/termios.h b/arch/x86/include/asm/termios.h
index c4ee8056bac..d0922adc56d 100644
--- a/arch/x86/include/asm/termios.h
+++ b/arch/x86/include/asm/termios.h
@@ -1,5 +1,12 @@
-#ifndef _ASM_X86_TERMIOS_H
-#define _ASM_X86_TERMIOS_H
+#ifndef _ASM_GENERIC_TERMIOS_H
+#define _ASM_GENERIC_TERMIOS_H
+/*
+ * Most architectures have straight copies of the x86 code, with
+ * varying levels of bug fixes on top. Usually it's a good idea
+ * to use this generic version instead, but be careful to avoid
+ * ABI changes.
+ * New architectures should not provide their own version.
+ */
 
 #include <asm/termbits.h>
 #include <asm/ioctls.h>
@@ -54,37 +61,57 @@ struct termio {
 /*
  * Translate a "termio" structure into a "termios". Ugh.
  */
-#define SET_LOW_TERMIOS_BITS(termios, termio, x) { \
-	unsigned short __tmp; \
-	get_user(__tmp,&(termio)->x); \
-	*(unsigned short *) &(termios)->x = __tmp; \
-}
-
 static inline int user_termio_to_kernel_termios(struct ktermios *termios,
-						struct termio __user *termio)
+						const struct termio __user *termio)
 {
-	SET_LOW_TERMIOS_BITS(termios, termio, c_iflag);
-	SET_LOW_TERMIOS_BITS(termios, termio, c_oflag);
-	SET_LOW_TERMIOS_BITS(termios, termio, c_cflag);
-	SET_LOW_TERMIOS_BITS(termios, termio, c_lflag);
-	get_user(termios->c_line, &termio->c_line);
-	return copy_from_user(termios->c_cc, termio->c_cc, NCC);
+	unsigned short tmp;
+
+	if (get_user(tmp, &termio->c_iflag) < 0)
+		goto fault;
+	termios->c_iflag = (0xffff0000 & termios->c_iflag) | tmp;
+
+	if (get_user(tmp, &termio->c_oflag) < 0)
+		goto fault;
+	termios->c_oflag = (0xffff0000 & termios->c_oflag) | tmp;
+
+	if (get_user(tmp, &termio->c_cflag) < 0)
+		goto fault;
+	termios->c_cflag = (0xffff0000 & termios->c_cflag) | tmp;
+
+	if (get_user(tmp, &termio->c_lflag) < 0)
+		goto fault;
+	termios->c_lflag = (0xffff0000 & termios->c_lflag) | tmp;
+
+	if (get_user(termios->c_line, &termio->c_line) < 0)
+		goto fault;
+
+	if (copy_from_user(termios->c_cc, termio->c_cc, NCC) != 0)
+		goto fault;
+
+	return 0;
+
+ fault:
+	return -EFAULT;
 }
 
 /*
  * Translate a "termios" structure into a "termio". Ugh.
  */
 static inline int kernel_termios_to_user_termio(struct termio __user *termio,
-					    struct ktermios *termios)
+						struct ktermios *termios)
 {
-	put_user((termios)->c_iflag, &(termio)->c_iflag);
-	put_user((termios)->c_oflag, &(termio)->c_oflag);
-	put_user((termios)->c_cflag, &(termio)->c_cflag);
-	put_user((termios)->c_lflag, &(termio)->c_lflag);
-	put_user((termios)->c_line,  &(termio)->c_line);
-	return copy_to_user((termio)->c_cc, (termios)->c_cc, NCC);
+	if (put_user(termios->c_iflag, &termio->c_iflag) < 0 ||
+	    put_user(termios->c_oflag, &termio->c_oflag) < 0 ||
+	    put_user(termios->c_cflag, &termio->c_cflag) < 0 ||
+	    put_user(termios->c_lflag, &termio->c_lflag) < 0 ||
+	    put_user(termios->c_line,  &termio->c_line) < 0 ||
+	    copy_to_user(termio->c_cc, termios->c_cc, NCC) != 0)
+		return -EFAULT;
+
+	return 0;
 }
 
+#ifdef TCGETS2
 static inline int user_termios_to_kernel_termios(struct ktermios *k,
 						 struct termios2 __user *u)
 {
@@ -108,7 +135,20 @@ static inline int kernel_termios_to_user_termios_1(struct termios __user *u,
 {
 	return copy_to_user(u, k, sizeof(struct termios));
 }
+#else /* TCGETS2 */
+static inline int user_termios_to_kernel_termios(struct ktermios *k,
+						 struct termios __user *u)
+{
+	return copy_from_user(k, u, sizeof(struct termios));
+}
+
+static inline int kernel_termios_to_user_termios(struct termios __user *u,
+						 struct ktermios *k)
+{
+	return copy_to_user(u, k, sizeof(struct termios));
+}
+#endif /* TCGETS2 */
 
 #endif	/* __KERNEL__ */
 
-#endif /* _ASM_X86_TERMIOS_H */
+#endif /* _ASM_GENERIC_TERMIOS_H */
-- 
cgit v1.2.3


From 73a2d096fdf23aa841f7595d114a11ec85a85e4d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 18 Jun 2009 21:48:20 +0200
Subject: x86: remove all now-duplicate header files

All files that have been made identical to the asm-generic
version in the previous patches can now be removed,
guaranteeing that this does not introduce semantic changes.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
LKML-Reference: <cover.1245354003.git.arnd@arndb.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/generic-mman.h        |  18 ---
 arch/x86/include/asm/generic-module.h      |  22 ----
 arch/x86/include/asm/generic-scatterlist.h |  43 -------
 arch/x86/include/asm/generic-types.h       |  42 ------
 arch/x86/include/asm/generic-ucontext.h    |  12 --
 arch/x86/include/asm/ioctls.h              | 111 +---------------
 arch/x86/include/asm/ipcbuf.h              |  34 +----
 arch/x86/include/asm/mman.h                |   2 +-
 arch/x86/include/asm/module.h              |   2 +-
 arch/x86/include/asm/msgbuf.h              |  48 +------
 arch/x86/include/asm/param.h               |  25 +---
 arch/x86/include/asm/scatterlist.h         |   2 +-
 arch/x86/include/asm/shmbuf.h              |  60 +--------
 arch/x86/include/asm/socket.h              |  61 +--------
 arch/x86/include/asm/sockios.h             |  14 +-
 arch/x86/include/asm/termbits.h            | 199 +----------------------------
 arch/x86/include/asm/termios.h             | 155 +---------------------
 arch/x86/include/asm/types.h               |   2 +-
 arch/x86/include/asm/ucontext.h            |   2 +-
 19 files changed, 14 insertions(+), 840 deletions(-)
 delete mode 100644 arch/x86/include/asm/generic-mman.h
 delete mode 100644 arch/x86/include/asm/generic-module.h
 delete mode 100644 arch/x86/include/asm/generic-scatterlist.h
 delete mode 100644 arch/x86/include/asm/generic-types.h
 delete mode 100644 arch/x86/include/asm/generic-ucontext.h

(limited to 'arch')

diff --git a/arch/x86/include/asm/generic-mman.h b/arch/x86/include/asm/generic-mman.h
deleted file mode 100644
index 7cab4de2bca..00000000000
--- a/arch/x86/include/asm/generic-mman.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef __ASM_GENERIC_MMAN_H
-#define __ASM_GENERIC_MMAN_H
-
-#include <asm-generic/mman-common.h>
-
-#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
-#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
-#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
-#define MAP_LOCKED	0x2000		/* pages are locked */
-#define MAP_NORESERVE	0x4000		/* don't check for reservations */
-#define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
-#define MAP_NONBLOCK	0x10000		/* do not block on IO */
-#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
-
-#define MCL_CURRENT	1		/* lock all current mappings */
-#define MCL_FUTURE	2		/* lock all future mappings */
-
-#endif /* __ASM_GENERIC_MMAN_H */
diff --git a/arch/x86/include/asm/generic-module.h b/arch/x86/include/asm/generic-module.h
deleted file mode 100644
index ed5b44de4c9..00000000000
--- a/arch/x86/include/asm/generic-module.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef __ASM_GENERIC_MODULE_H
-#define __ASM_GENERIC_MODULE_H
-
-/*
- * Many architectures just need a simple module
- * loader without arch specific data.
- */
-struct mod_arch_specific
-{
-};
-
-#ifdef CONFIG_64BIT
-#define Elf_Shdr Elf64_Shdr
-#define Elf_Sym Elf64_Sym
-#define Elf_Ehdr Elf64_Ehdr
-#else
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Ehdr Elf32_Ehdr
-#endif
-
-#endif /* __ASM_GENERIC_MODULE_H */
diff --git a/arch/x86/include/asm/generic-scatterlist.h b/arch/x86/include/asm/generic-scatterlist.h
deleted file mode 100644
index 8b9454496a7..00000000000
--- a/arch/x86/include/asm/generic-scatterlist.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#ifndef __ASM_GENERIC_SCATTERLIST_H
-#define __ASM_GENERIC_SCATTERLIST_H
-
-#include <linux/types.h>
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-	unsigned long	sg_magic;
-#endif
-	unsigned long	page_link;
-	unsigned int	offset;
-	unsigned int	length;
-	dma_addr_t	dma_address;
-	unsigned int	dma_length;
-};
-
-/*
- * These macros should be used after a dma_map_sg call has been done
- * to get bus addresses of each of the SG entries and their lengths.
- * You should only work with the number of sg entries pci_map_sg
- * returns, or alternatively stop on the first sg_dma_len(sg) which
- * is 0.
- */
-#define sg_dma_address(sg)	((sg)->dma_address)
-#ifndef sg_dma_len
-/*
- * Normally, you have an iommu on 64 bit machines, but not on 32 bit
- * machines. Architectures that are differnt should override this.
- */
-#if __BITS_PER_LONG == 64
-#define sg_dma_len(sg)		((sg)->dma_length)
-#else
-#define sg_dma_len(sg)		((sg)->length)
-#endif /* 64 bit */
-#endif /* sg_dma_len */
-
-#ifndef ISA_DMA_THRESHOLD
-#define ISA_DMA_THRESHOLD	(~0UL)
-#endif
-
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* __ASM_GENERIC_SCATTERLIST_H */
diff --git a/arch/x86/include/asm/generic-types.h b/arch/x86/include/asm/generic-types.h
deleted file mode 100644
index fba7d33ca3f..00000000000
--- a/arch/x86/include/asm/generic-types.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef _ASM_GENERIC_TYPES_H
-#define _ASM_GENERIC_TYPES_H
-/*
- * int-ll64 is used practically everywhere now,
- * so use it as a reasonable default.
- */
-#include <asm-generic/int-ll64.h>
-
-#ifndef __ASSEMBLY__
-
-typedef unsigned short umode_t;
-
-#endif /* __ASSEMBLY__ */
-
-/*
- * These aren't exported outside the kernel to avoid name space clashes
- */
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
-/*
- * DMA addresses may be very different from physical addresses
- * and pointers. i386 and powerpc may have 64 bit DMA on 32 bit
- * systems, while sparc64 uses 32 bit DMA addresses for 64 bit
- * physical addresses.
- * This default defines dma_addr_t to have the same size as
- * phys_addr_t, which is the most common way.
- * Do not define the dma64_addr_t type, which never really
- * worked.
- */
-#ifndef dma_addr_t
-#ifdef CONFIG_PHYS_ADDR_T_64BIT
-typedef u64 dma_addr_t;
-#else
-typedef u32 dma_addr_t;
-#endif /* CONFIG_PHYS_ADDR_T_64BIT */
-#endif /* dma_addr_t */
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* __KERNEL__ */
-
-#endif /* _ASM_GENERIC_TYPES_H */
diff --git a/arch/x86/include/asm/generic-ucontext.h b/arch/x86/include/asm/generic-ucontext.h
deleted file mode 100644
index ad77343e8a9..00000000000
--- a/arch/x86/include/asm/generic-ucontext.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef __ASM_GENERIC_UCONTEXT_H
-#define __ASM_GENERIC_UCONTEXT_H
-
-struct ucontext {
-	unsigned long	  uc_flags;
-	struct ucontext  *uc_link;
-	stack_t		  uc_stack;
-	struct sigcontext uc_mcontext;
-	sigset_t	  uc_sigmask;	/* mask last for extensibility */
-};
-
-#endif /* __ASM_GENERIC_UCONTEXT_H */
diff --git a/arch/x86/include/asm/ioctls.h b/arch/x86/include/asm/ioctls.h
index a799e20a769..ec34c760665 100644
--- a/arch/x86/include/asm/ioctls.h
+++ b/arch/x86/include/asm/ioctls.h
@@ -1,110 +1 @@
-#ifndef __ASM_GENERIC_IOCTLS_H
-#define __ASM_GENERIC_IOCTLS_H
-
-#include <linux/ioctl.h>
-
-/*
- * These are the most common definitions for tty ioctl numbers.
- * Most of them do not use the recommended _IOC(), but there is
- * probably some source code out there hardcoding the number,
- * so we might as well use them for all new platforms.
- *
- * The architectures that use different values here typically
- * try to be compatible with some Unix variants for the same
- * architecture.
- */
-
-/* 0x54 is just a magic number to make these relatively unique ('T') */
-
-#define TCGETS		0x5401
-#define TCSETS		0x5402
-#define TCSETSW		0x5403
-#define TCSETSF		0x5404
-#define TCGETA		0x5405
-#define TCSETA		0x5406
-#define TCSETAW		0x5407
-#define TCSETAF		0x5408
-#define TCSBRK		0x5409
-#define TCXONC		0x540A
-#define TCFLSH		0x540B
-#define TIOCEXCL	0x540C
-#define TIOCNXCL	0x540D
-#define TIOCSCTTY	0x540E
-#define TIOCGPGRP	0x540F
-#define TIOCSPGRP	0x5410
-#define TIOCOUTQ	0x5411
-#define TIOCSTI		0x5412
-#define TIOCGWINSZ	0x5413
-#define TIOCSWINSZ	0x5414
-#define TIOCMGET	0x5415
-#define TIOCMBIS	0x5416
-#define TIOCMBIC	0x5417
-#define TIOCMSET	0x5418
-#define TIOCGSOFTCAR	0x5419
-#define TIOCSSOFTCAR	0x541A
-#define FIONREAD	0x541B
-#define TIOCINQ		FIONREAD
-#define TIOCLINUX	0x541C
-#define TIOCCONS	0x541D
-#define TIOCGSERIAL	0x541E
-#define TIOCSSERIAL	0x541F
-#define TIOCPKT		0x5420
-#define FIONBIO		0x5421
-#define TIOCNOTTY	0x5422
-#define TIOCSETD	0x5423
-#define TIOCGETD	0x5424
-#define TCSBRKP		0x5425	/* Needed for POSIX tcsendbreak() */
-#define TIOCSBRK	0x5427  /* BSD compatibility */
-#define TIOCCBRK	0x5428  /* BSD compatibility */
-#define TIOCGSID	0x5429  /* Return the session ID of FD */
-#define TCGETS2		_IOR('T', 0x2A, struct termios2)
-#define TCSETS2		_IOW('T', 0x2B, struct termios2)
-#define TCSETSW2	_IOW('T', 0x2C, struct termios2)
-#define TCSETSF2	_IOW('T', 0x2D, struct termios2)
-#define TIOCGRS485	0x542E
-#define TIOCSRS485	0x542F
-#define TIOCGPTN	_IOR('T', 0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
-#define TIOCSPTLCK	_IOW('T', 0x31, int)  /* Lock/unlock Pty */
-#define TCGETX		0x5432 /* SYS5 TCGETX compatibility */
-#define TCSETX		0x5433
-#define TCSETXF		0x5434
-#define TCSETXW		0x5435
-
-#define FIONCLEX	0x5450
-#define FIOCLEX		0x5451
-#define FIOASYNC	0x5452
-#define TIOCSERCONFIG	0x5453
-#define TIOCSERGWILD	0x5454
-#define TIOCSERSWILD	0x5455
-#define TIOCGLCKTRMIOS	0x5456
-#define TIOCSLCKTRMIOS	0x5457
-#define TIOCSERGSTRUCT	0x5458 /* For debugging only */
-#define TIOCSERGETLSR   0x5459 /* Get line status register */
-#define TIOCSERGETMULTI 0x545A /* Get multiport config  */
-#define TIOCSERSETMULTI 0x545B /* Set multiport config */
-
-#define TIOCMIWAIT	0x545C	/* wait for a change on serial input line(s) */
-#define TIOCGICOUNT	0x545D	/* read serial port inline interrupt counts */
-
-/*
- * some architectures define FIOQSIZE as 0x545E, which is used for
- * TIOCGHAYESESP on others
- */
-#ifndef FIOQSIZE
-# define TIOCGHAYESESP	0x545E  /* Get Hayes ESP configuration */
-# define TIOCSHAYESESP	0x545F  /* Set Hayes ESP configuration */
-# define FIOQSIZE	0x5460
-#endif
-
-/* Used for packet mode */
-#define TIOCPKT_DATA		 0
-#define TIOCPKT_FLUSHREAD	 1
-#define TIOCPKT_FLUSHWRITE	 2
-#define TIOCPKT_STOP		 4
-#define TIOCPKT_START		 8
-#define TIOCPKT_NOSTOP		16
-#define TIOCPKT_DOSTOP		32
-
-#define TIOCSER_TEMT	0x01	/* Transmitter physically empty */
-
-#endif /* __ASM_GENERIC_IOCTLS_H */
+#include <asm-generic/ioctls.h>
diff --git a/arch/x86/include/asm/ipcbuf.h b/arch/x86/include/asm/ipcbuf.h
index 888ca1c8f95..84c7e51cb6d 100644
--- a/arch/x86/include/asm/ipcbuf.h
+++ b/arch/x86/include/asm/ipcbuf.h
@@ -1,33 +1 @@
-#ifndef __ASM_GENERIC_IPCBUF_H
-#define __ASM_GENERIC_IPCBUF_H
-
-/*
- * The generic ipc64_perm structure:
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * ipc64_perm was originally meant to be architecture specific, but
- * everyone just ended up making identical copies without specific
- * optimizations, so we may just as well all use the same one.
- *
- * Pad space is left for:
- * - 32-bit mode_t on architectures that only had 16 bit
- * - 32-bit seq
- * - 2 miscellaneous 32-bit values
- */
-
-struct ipc64_perm {
-	__kernel_key_t		key;
-	__kernel_uid32_t	uid;
-	__kernel_gid32_t	gid;
-	__kernel_uid32_t	cuid;
-	__kernel_gid32_t	cgid;
-	__kernel_mode_t		mode;
-	unsigned short		__pad1;
-	unsigned short		seq;
-	unsigned short		__pad2;
-	unsigned long		__unused1;
-	unsigned long		__unused2;
-};
-
-#endif /* __ASM_GENERIC_IPCBUF_H */
+#include <asm-generic/ipcbuf.h>
diff --git a/arch/x86/include/asm/mman.h b/arch/x86/include/asm/mman.h
index 063d8c9e4d6..593e51d4643 100644
--- a/arch/x86/include/asm/mman.h
+++ b/arch/x86/include/asm/mman.h
@@ -3,6 +3,6 @@
 
 #define MAP_32BIT	0x40		/* only give out 32bit addresses */
 
-#include <asm/generic-mman.h>
+#include <asm-generic/mman.h>
 
 #endif /* _ASM_X86_MMAN_H */
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 4a7a192910d..555bc12bdcd 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_X86_MODULE_H
 #define _ASM_X86_MODULE_H
 
-#include <asm/generic-module.h>
+#include <asm-generic/module.h>
 
 #ifdef CONFIG_X86_64
 /* X86_64 does not define MODULE_PROC_FAMILY */
diff --git a/arch/x86/include/asm/msgbuf.h b/arch/x86/include/asm/msgbuf.h
index aec850d9159..809134c644a 100644
--- a/arch/x86/include/asm/msgbuf.h
+++ b/arch/x86/include/asm/msgbuf.h
@@ -1,47 +1 @@
-#ifndef __ASM_GENERIC_MSGBUF_H
-#define __ASM_GENERIC_MSGBUF_H
-
-#include <asm/bitsperlong.h>
-/*
- * generic msqid64_ds structure.
- *
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * msqid64_ds was originally meant to be architecture specific, but
- * everyone just ended up making identical copies without specific
- * optimizations, so we may just as well all use the same one.
- *
- * 64 bit architectures typically define a 64 bit __kernel_time_t,
- * so they do not need the first three padding words.
- * On big-endian systems, the padding is in the wrong place.
- *
- * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
- * - 2 miscellaneous 32-bit values
- */
-
-struct msqid64_ds {
-	struct ipc64_perm msg_perm;
-	__kernel_time_t msg_stime;	/* last msgsnd time */
-#if __BITS_PER_LONG != 64
-	unsigned long	__unused1;
-#endif
-	__kernel_time_t msg_rtime;	/* last msgrcv time */
-#if __BITS_PER_LONG != 64
-	unsigned long	__unused2;
-#endif
-	__kernel_time_t msg_ctime;	/* last change time */
-#if __BITS_PER_LONG != 64
-	unsigned long	__unused3;
-#endif
-	unsigned long  msg_cbytes;	/* current number of bytes on queue */
-	unsigned long  msg_qnum;	/* number of messages in queue */
-	unsigned long  msg_qbytes;	/* max number of bytes on queue */
-	__kernel_pid_t msg_lspid;	/* pid of last msgsnd */
-	__kernel_pid_t msg_lrpid;	/* last receive pid */
-	unsigned long  __unused4;
-	unsigned long  __unused5;
-};
-
-#endif /* __ASM_GENERIC_MSGBUF_H */
+#include <asm-generic/msgbuf.h>
diff --git a/arch/x86/include/asm/param.h b/arch/x86/include/asm/param.h
index cdf8251bfb6..965d4542797 100644
--- a/arch/x86/include/asm/param.h
+++ b/arch/x86/include/asm/param.h
@@ -1,24 +1 @@
-#ifndef __ASM_GENERIC_PARAM_H
-#define __ASM_GENERIC_PARAM_H
-
-#ifdef __KERNEL__
-# define HZ		CONFIG_HZ	/* Internal kernel timer frequency */
-# define USER_HZ	100		/* some user interfaces are */
-# define CLOCKS_PER_SEC	(USER_HZ)       /* in "ticks" like times() */
-#endif
-
-#ifndef HZ
-#define HZ 100
-#endif
-
-#ifndef EXEC_PAGESIZE
-#define EXEC_PAGESIZE	4096
-#endif
-
-#ifndef NOGROUP
-#define NOGROUP		(-1)
-#endif
-
-#define MAXHOSTNAMELEN	64	/* max length of hostname */
-
-#endif /* __ASM_GENERIC_PARAM_H */
+#include <asm-generic/param.h>
diff --git a/arch/x86/include/asm/scatterlist.h b/arch/x86/include/asm/scatterlist.h
index 2097d686471..75af592677e 100644
--- a/arch/x86/include/asm/scatterlist.h
+++ b/arch/x86/include/asm/scatterlist.h
@@ -3,6 +3,6 @@
 
 #define ISA_DMA_THRESHOLD (0x00ffffff)
 
-#include <asm/generic-scatterlist.h>
+#include <asm-generic/scatterlist.h>
 
 #endif /* _ASM_X86_SCATTERLIST_H */
diff --git a/arch/x86/include/asm/shmbuf.h b/arch/x86/include/asm/shmbuf.h
index 5768fa60ac8..83c05fc2de3 100644
--- a/arch/x86/include/asm/shmbuf.h
+++ b/arch/x86/include/asm/shmbuf.h
@@ -1,59 +1 @@
-#ifndef __ASM_GENERIC_SHMBUF_H
-#define __ASM_GENERIC_SHMBUF_H
-
-#include <asm/bitsperlong.h>
-
-/*
- * The shmid64_ds structure for x86 architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * shmid64_ds was originally meant to be architecture specific, but
- * everyone just ended up making identical copies without specific
- * optimizations, so we may just as well all use the same one.
- *
- * 64 bit architectures typically define a 64 bit __kernel_time_t,
- * so they do not need the first two padding words.
- * On big-endian systems, the padding is in the wrong place.
- *
- *
- * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
- * - 2 miscellaneous 32-bit values
- */
-
-struct shmid64_ds {
-	struct ipc64_perm	shm_perm;	/* operation perms */
-	size_t			shm_segsz;	/* size of segment (bytes) */
-	__kernel_time_t		shm_atime;	/* last attach time */
-#if __BITS_PER_LONG != 64
-	unsigned long		__unused1;
-#endif
-	__kernel_time_t		shm_dtime;	/* last detach time */
-#if __BITS_PER_LONG != 64
-	unsigned long		__unused2;
-#endif
-	__kernel_time_t		shm_ctime;	/* last change time */
-#if __BITS_PER_LONG != 64
-	unsigned long		__unused3;
-#endif
-	__kernel_pid_t		shm_cpid;	/* pid of creator */
-	__kernel_pid_t		shm_lpid;	/* pid of last operator */
-	unsigned long		shm_nattch;	/* no. of current attaches */
-	unsigned long		__unused4;
-	unsigned long		__unused5;
-};
-
-struct shminfo64 {
-	unsigned long	shmmax;
-	unsigned long	shmmin;
-	unsigned long	shmmni;
-	unsigned long	shmseg;
-	unsigned long	shmall;
-	unsigned long	__unused1;
-	unsigned long	__unused2;
-	unsigned long	__unused3;
-	unsigned long	__unused4;
-};
-
-#endif /* __ASM_GENERIC_SHMBUF_H */
+#include <asm-generic/shmbuf.h>
diff --git a/arch/x86/include/asm/socket.h b/arch/x86/include/asm/socket.h
index d4ae42a06a2..6b71384b9d8 100644
--- a/arch/x86/include/asm/socket.h
+++ b/arch/x86/include/asm/socket.h
@@ -1,60 +1 @@
-#ifndef __ASM_GENERIC_SOCKET_H
-#define __ASM_GENERIC_SOCKET_H
-
-#include <asm/sockios.h>
-
-/* For setsockopt(2) */
-#define SOL_SOCKET	1
-
-#define SO_DEBUG	1
-#define SO_REUSEADDR	2
-#define SO_TYPE		3
-#define SO_ERROR	4
-#define SO_DONTROUTE	5
-#define SO_BROADCAST	6
-#define SO_SNDBUF	7
-#define SO_RCVBUF	8
-#define SO_SNDBUFFORCE	32
-#define SO_RCVBUFFORCE	33
-#define SO_KEEPALIVE	9
-#define SO_OOBINLINE	10
-#define SO_NO_CHECK	11
-#define SO_PRIORITY	12
-#define SO_LINGER	13
-#define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
-#define SO_PASSCRED	16
-#define SO_PEERCRED	17
-#define SO_RCVLOWAT	18
-#define SO_SNDLOWAT	19
-#define SO_RCVTIMEO	20
-#define SO_SNDTIMEO	21
-
-/* Security levels - as per NRL IPv6 - don't actually do anything */
-#define SO_SECURITY_AUTHENTICATION		22
-#define SO_SECURITY_ENCRYPTION_TRANSPORT	23
-#define SO_SECURITY_ENCRYPTION_NETWORK		24
-
-#define SO_BINDTODEVICE	25
-
-/* Socket filtering */
-#define SO_ATTACH_FILTER	26
-#define SO_DETACH_FILTER	27
-
-#define SO_PEERNAME		28
-#define SO_TIMESTAMP		29
-#define SCM_TIMESTAMP		SO_TIMESTAMP
-
-#define SO_ACCEPTCONN		30
-
-#define SO_PEERSEC		31
-#define SO_PASSSEC		34
-#define SO_TIMESTAMPNS		35
-#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
-
-#define SO_MARK			36
-
-#define SO_TIMESTAMPING		37
-#define SCM_TIMESTAMPING	SO_TIMESTAMPING
-
-#endif /* __ASM_GENERIC_SOCKET_H */
+#include <asm-generic/socket.h>
diff --git a/arch/x86/include/asm/sockios.h b/arch/x86/include/asm/sockios.h
index 9a61a369b90..def6d4746ee 100644
--- a/arch/x86/include/asm/sockios.h
+++ b/arch/x86/include/asm/sockios.h
@@ -1,13 +1 @@
-#ifndef __ASM_GENERIC_SOCKIOS_H
-#define __ASM_GENERIC_SOCKIOS_H
-
-/* Socket-level I/O control calls. */
-#define FIOSETOWN	0x8901
-#define SIOCSPGRP	0x8902
-#define FIOGETOWN	0x8903
-#define SIOCGPGRP	0x8904
-#define SIOCATMARK	0x8905
-#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
-#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
-
-#endif /* __ASM_GENERIC_SOCKIOS_H */
+#include <asm-generic/sockios.h>
diff --git a/arch/x86/include/asm/termbits.h b/arch/x86/include/asm/termbits.h
index 1c9773d48cb..3935b106de7 100644
--- a/arch/x86/include/asm/termbits.h
+++ b/arch/x86/include/asm/termbits.h
@@ -1,198 +1 @@
-#ifndef __ASM_GENERIC_TERMBITS_H
-#define __ASM_GENERIC_TERMBITS_H
-
-#include <linux/posix_types.h>
-
-typedef unsigned char	cc_t;
-typedef unsigned int	speed_t;
-typedef unsigned int	tcflag_t;
-
-#define NCCS 19
-struct termios {
-	tcflag_t c_iflag;		/* input mode flags */
-	tcflag_t c_oflag;		/* output mode flags */
-	tcflag_t c_cflag;		/* control mode flags */
-	tcflag_t c_lflag;		/* local mode flags */
-	cc_t c_line;			/* line discipline */
-	cc_t c_cc[NCCS];		/* control characters */
-};
-
-struct termios2 {
-	tcflag_t c_iflag;		/* input mode flags */
-	tcflag_t c_oflag;		/* output mode flags */
-	tcflag_t c_cflag;		/* control mode flags */
-	tcflag_t c_lflag;		/* local mode flags */
-	cc_t c_line;			/* line discipline */
-	cc_t c_cc[NCCS];		/* control characters */
-	speed_t c_ispeed;		/* input speed */
-	speed_t c_ospeed;		/* output speed */
-};
-
-struct ktermios {
-	tcflag_t c_iflag;		/* input mode flags */
-	tcflag_t c_oflag;		/* output mode flags */
-	tcflag_t c_cflag;		/* control mode flags */
-	tcflag_t c_lflag;		/* local mode flags */
-	cc_t c_line;			/* line discipline */
-	cc_t c_cc[NCCS];		/* control characters */
-	speed_t c_ispeed;		/* input speed */
-	speed_t c_ospeed;		/* output speed */
-};
-
-/* c_cc characters */
-#define VINTR 0
-#define VQUIT 1
-#define VERASE 2
-#define VKILL 3
-#define VEOF 4
-#define VTIME 5
-#define VMIN 6
-#define VSWTC 7
-#define VSTART 8
-#define VSTOP 9
-#define VSUSP 10
-#define VEOL 11
-#define VREPRINT 12
-#define VDISCARD 13
-#define VWERASE 14
-#define VLNEXT 15
-#define VEOL2 16
-
-/* c_iflag bits */
-#define IGNBRK	0000001
-#define BRKINT	0000002
-#define IGNPAR	0000004
-#define PARMRK	0000010
-#define INPCK	0000020
-#define ISTRIP	0000040
-#define INLCR	0000100
-#define IGNCR	0000200
-#define ICRNL	0000400
-#define IUCLC	0001000
-#define IXON	0002000
-#define IXANY	0004000
-#define IXOFF	0010000
-#define IMAXBEL	0020000
-#define IUTF8	0040000
-
-/* c_oflag bits */
-#define OPOST	0000001
-#define OLCUC	0000002
-#define ONLCR	0000004
-#define OCRNL	0000010
-#define ONOCR	0000020
-#define ONLRET	0000040
-#define OFILL	0000100
-#define OFDEL	0000200
-#define NLDLY	0000400
-#define   NL0	0000000
-#define   NL1	0000400
-#define CRDLY	0003000
-#define   CR0	0000000
-#define   CR1	0001000
-#define   CR2	0002000
-#define   CR3	0003000
-#define TABDLY	0014000
-#define   TAB0	0000000
-#define   TAB1	0004000
-#define   TAB2	0010000
-#define   TAB3	0014000
-#define   XTABS	0014000
-#define BSDLY	0020000
-#define   BS0	0000000
-#define   BS1	0020000
-#define VTDLY	0040000
-#define   VT0	0000000
-#define   VT1	0040000
-#define FFDLY	0100000
-#define   FF0	0000000
-#define   FF1	0100000
-
-/* c_cflag bit meaning */
-#define CBAUD	0010017
-#define  B0	0000000		/* hang up */
-#define  B50	0000001
-#define  B75	0000002
-#define  B110	0000003
-#define  B134	0000004
-#define  B150	0000005
-#define  B200	0000006
-#define  B300	0000007
-#define  B600	0000010
-#define  B1200	0000011
-#define  B1800	0000012
-#define  B2400	0000013
-#define  B4800	0000014
-#define  B9600	0000015
-#define  B19200	0000016
-#define  B38400	0000017
-#define EXTA B19200
-#define EXTB B38400
-#define CSIZE	0000060
-#define   CS5	0000000
-#define   CS6	0000020
-#define   CS7	0000040
-#define   CS8	0000060
-#define CSTOPB	0000100
-#define CREAD	0000200
-#define PARENB	0000400
-#define PARODD	0001000
-#define HUPCL	0002000
-#define CLOCAL	0004000
-#define CBAUDEX 0010000
-#define    BOTHER 0010000
-#define    B57600 0010001
-#define   B115200 0010002
-#define   B230400 0010003
-#define   B460800 0010004
-#define   B500000 0010005
-#define   B576000 0010006
-#define   B921600 0010007
-#define  B1000000 0010010
-#define  B1152000 0010011
-#define  B1500000 0010012
-#define  B2000000 0010013
-#define  B2500000 0010014
-#define  B3000000 0010015
-#define  B3500000 0010016
-#define  B4000000 0010017
-#define CIBAUD	  002003600000	/* input baud rate */
-#define CMSPAR	  010000000000	/* mark or space (stick) parity */
-#define CRTSCTS	  020000000000	/* flow control */
-
-#define IBSHIFT	  16		/* Shift from CBAUD to CIBAUD */
-
-/* c_lflag bits */
-#define ISIG	0000001
-#define ICANON	0000002
-#define XCASE	0000004
-#define ECHO	0000010
-#define ECHOE	0000020
-#define ECHOK	0000040
-#define ECHONL	0000100
-#define NOFLSH	0000200
-#define TOSTOP	0000400
-#define ECHOCTL	0001000
-#define ECHOPRT	0002000
-#define ECHOKE	0004000
-#define FLUSHO	0010000
-#define PENDIN	0040000
-#define IEXTEN	0100000
-
-/* tcflow() and TCXONC use these */
-#define	TCOOFF		0
-#define	TCOON		1
-#define	TCIOFF		2
-#define	TCION		3
-
-/* tcflush() and TCFLSH use these */
-#define	TCIFLUSH	0
-#define	TCOFLUSH	1
-#define	TCIOFLUSH	2
-
-/* tcsetattr uses these */
-#define	TCSANOW		0
-#define	TCSADRAIN	1
-#define	TCSAFLUSH	2
-
-#endif /* __ASM_GENERIC_TERMBITS_H */
+#include <asm-generic/termbits.h>
diff --git a/arch/x86/include/asm/termios.h b/arch/x86/include/asm/termios.h
index d0922adc56d..280d78a9d96 100644
--- a/arch/x86/include/asm/termios.h
+++ b/arch/x86/include/asm/termios.h
@@ -1,154 +1 @@
-#ifndef _ASM_GENERIC_TERMIOS_H
-#define _ASM_GENERIC_TERMIOS_H
-/*
- * Most architectures have straight copies of the x86 code, with
- * varying levels of bug fixes on top. Usually it's a good idea
- * to use this generic version instead, but be careful to avoid
- * ABI changes.
- * New architectures should not provide their own version.
- */
-
-#include <asm/termbits.h>
-#include <asm/ioctls.h>
-
-struct winsize {
-	unsigned short ws_row;
-	unsigned short ws_col;
-	unsigned short ws_xpixel;
-	unsigned short ws_ypixel;
-};
-
-#define NCC 8
-struct termio {
-	unsigned short c_iflag;		/* input mode flags */
-	unsigned short c_oflag;		/* output mode flags */
-	unsigned short c_cflag;		/* control mode flags */
-	unsigned short c_lflag;		/* local mode flags */
-	unsigned char c_line;		/* line discipline */
-	unsigned char c_cc[NCC];	/* control characters */
-};
-
-/* modem lines */
-#define TIOCM_LE	0x001
-#define TIOCM_DTR	0x002
-#define TIOCM_RTS	0x004
-#define TIOCM_ST	0x008
-#define TIOCM_SR	0x010
-#define TIOCM_CTS	0x020
-#define TIOCM_CAR	0x040
-#define TIOCM_RNG	0x080
-#define TIOCM_DSR	0x100
-#define TIOCM_CD	TIOCM_CAR
-#define TIOCM_RI	TIOCM_RNG
-#define TIOCM_OUT1	0x2000
-#define TIOCM_OUT2	0x4000
-#define TIOCM_LOOP	0x8000
-
-/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
-
-#ifdef __KERNEL__
-
-#include <asm/uaccess.h>
-
-/*	intr=^C		quit=^\		erase=del	kill=^U
-	eof=^D		vtime=\0	vmin=\1		sxtc=\0
-	start=^Q	stop=^S		susp=^Z		eol=\0
-	reprint=^R	discard=^U	werase=^W	lnext=^V
-	eol2=\0
-*/
-#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
-
-/*
- * Translate a "termio" structure into a "termios". Ugh.
- */
-static inline int user_termio_to_kernel_termios(struct ktermios *termios,
-						const struct termio __user *termio)
-{
-	unsigned short tmp;
-
-	if (get_user(tmp, &termio->c_iflag) < 0)
-		goto fault;
-	termios->c_iflag = (0xffff0000 & termios->c_iflag) | tmp;
-
-	if (get_user(tmp, &termio->c_oflag) < 0)
-		goto fault;
-	termios->c_oflag = (0xffff0000 & termios->c_oflag) | tmp;
-
-	if (get_user(tmp, &termio->c_cflag) < 0)
-		goto fault;
-	termios->c_cflag = (0xffff0000 & termios->c_cflag) | tmp;
-
-	if (get_user(tmp, &termio->c_lflag) < 0)
-		goto fault;
-	termios->c_lflag = (0xffff0000 & termios->c_lflag) | tmp;
-
-	if (get_user(termios->c_line, &termio->c_line) < 0)
-		goto fault;
-
-	if (copy_from_user(termios->c_cc, termio->c_cc, NCC) != 0)
-		goto fault;
-
-	return 0;
-
- fault:
-	return -EFAULT;
-}
-
-/*
- * Translate a "termios" structure into a "termio". Ugh.
- */
-static inline int kernel_termios_to_user_termio(struct termio __user *termio,
-						struct ktermios *termios)
-{
-	if (put_user(termios->c_iflag, &termio->c_iflag) < 0 ||
-	    put_user(termios->c_oflag, &termio->c_oflag) < 0 ||
-	    put_user(termios->c_cflag, &termio->c_cflag) < 0 ||
-	    put_user(termios->c_lflag, &termio->c_lflag) < 0 ||
-	    put_user(termios->c_line,  &termio->c_line) < 0 ||
-	    copy_to_user(termio->c_cc, termios->c_cc, NCC) != 0)
-		return -EFAULT;
-
-	return 0;
-}
-
-#ifdef TCGETS2
-static inline int user_termios_to_kernel_termios(struct ktermios *k,
-						 struct termios2 __user *u)
-{
-	return copy_from_user(k, u, sizeof(struct termios2));
-}
-
-static inline int kernel_termios_to_user_termios(struct termios2 __user *u,
-						 struct ktermios *k)
-{
-	return copy_to_user(u, k, sizeof(struct termios2));
-}
-
-static inline int user_termios_to_kernel_termios_1(struct ktermios *k,
-						   struct termios __user *u)
-{
-	return copy_from_user(k, u, sizeof(struct termios));
-}
-
-static inline int kernel_termios_to_user_termios_1(struct termios __user *u,
-						   struct ktermios *k)
-{
-	return copy_to_user(u, k, sizeof(struct termios));
-}
-#else /* TCGETS2 */
-static inline int user_termios_to_kernel_termios(struct ktermios *k,
-						 struct termios __user *u)
-{
-	return copy_from_user(k, u, sizeof(struct termios));
-}
-
-static inline int kernel_termios_to_user_termios(struct termios __user *u,
-						 struct ktermios *k)
-{
-	return copy_to_user(u, k, sizeof(struct termios));
-}
-#endif /* TCGETS2 */
-
-#endif	/* __KERNEL__ */
-
-#endif /* _ASM_GENERIC_TERMIOS_H */
+#include <asm-generic/termios.h>
diff --git a/arch/x86/include/asm/types.h b/arch/x86/include/asm/types.h
index f2fe528e968..df1da20f453 100644
--- a/arch/x86/include/asm/types.h
+++ b/arch/x86/include/asm/types.h
@@ -3,7 +3,7 @@
 
 #define dma_addr_t	dma_addr_t
 
-#include <asm/generic-types.h>
+#include <asm-generic/types.h>
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/ucontext.h b/arch/x86/include/asm/ucontext.h
index 7cfc436f86d..b7c29c8017f 100644
--- a/arch/x86/include/asm/ucontext.h
+++ b/arch/x86/include/asm/ucontext.h
@@ -7,6 +7,6 @@
 				 * sigcontext struct (uc_mcontext).
 				 */
 
-#include <asm/generic-ucontext.h>
+#include <asm-generic/ucontext.h>
 
 #endif /* _ASM_X86_UCONTEXT_H */
-- 
cgit v1.2.3


From feaa0457ec8351cae855edc9a3052ac49322538e Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Sat, 20 Jun 2009 16:15:40 +0530
Subject: x86: ds.c fix invalid assignment

Fixes the type mixups that cause the following sparse warnings:

  CHECK   arch/x86/kernel/ds.c
arch/x86/kernel/ds.c:549:19: warning: incorrect type in argument 2 (invalid types)
arch/x86/kernel/ds.c:549:19:    expected bad type enum bts_field field
arch/x86/kernel/ds.c:549:19:    got int
arch/x86/kernel/ds.c:514:35: error: incompatible types for operation (*)
arch/x86/kernel/ds.c:514:35:    left side has type unsigned char static [unsigned] [toplevel] sizeof_ptr_field
arch/x86/kernel/ds.c:514:35:    right side has type bad type enum bts_field field
arch/x86/kernel/ds.c:514:7: error: invalid assignment
arch/x86/kernel/ds.c:514:35: error: incompatible types for operation (*)
arch/x86/kernel/ds.c:514:35:    left side has type unsigned char static [unsigned] [toplevel] sizeof_ptr_field
arch/x86/kernel/ds.c:514:35:    right side has type bad type enum bts_field field
arch/x86/kernel/ds.c:514:7: error: invalid assignment
arch/x86/kernel/ds.c:514:35: error: incompatible types for operation (*)
arch/x86/kernel/ds.c:514:35:    left side has type unsigned char static [unsigned] [toplevel] sizeof_ptr_field
arch/x86/kernel/ds.c:514:35:    right side has type bad type enum bts_field field
arch/x86/kernel/ds.c:514:7: error: invalid assignment
arch/x86/kernel/ds.c:514:35: error: incompatible types for operation (*)
arch/x86/kernel/ds.c:514:35:    left side has type unsigned char static [unsigned] [toplevel] sizeof_ptr_field
arch/x86/kernel/ds.c:514:35:    right side has type bad type enum bts_field field
arch/x86/kernel/ds.c:514:7: error: invalid assignment
arch/x86/kernel/ds.c:514:35: error: incompatible types for operation (*)
arch/x86/kernel/ds.c:514:35:    left side has type unsigned char static [unsigned] [toplevel] sizeof_ptr_field
arch/x86/kernel/ds.c:514:35:    right side has type bad type enum bts_field field
arch/x86/kernel/ds.c:514:7: error: invalid assignment
arch/x86/kernel/ds.c:514:35: error: incompatible types for operation (*)
arch/x86/kernel/ds.c:514:35:    left side has type unsigned char static [unsigned] [toplevel] sizeof_ptr_field
arch/x86/kernel/ds.c:514:35:    right side has type bad type enum bts_field field
arch/x86/kernel/ds.c:514:7: error: invalid assignment
arch/x86/kernel/ds.c:520:35: error: incompatible types for operation (*)
arch/x86/kernel/ds.c:520:35:    left side has type unsigned char static [unsigned] [toplevel] sizeof_ptr_field
arch/x86/kernel/ds.c:520:35:    right side has type bad type enum bts_field field
arch/x86/kernel/ds.c:520:7: error: invalid assignment
arch/x86/kernel/ds.c:520:35: error: incompatible types for operation (*)

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Markus Metzger <markus.t.metzger@intel.com>
LKML-Reference: <1245494740.8613.12.camel@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ds.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 48bfe138603..ef42a038f1a 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -509,15 +509,15 @@ enum bts_field {
 	bts_escape		= ((unsigned long)-1 & ~bts_qual_mask)
 };
 
-static inline unsigned long bts_get(const char *base, enum bts_field field)
+static inline unsigned long bts_get(const char *base, unsigned long field)
 {
 	base += (ds_cfg.sizeof_ptr_field * field);
 	return *(unsigned long *)base;
 }
 
-static inline void bts_set(char *base, enum bts_field field, unsigned long val)
+static inline void bts_set(char *base, unsigned long field, unsigned long val)
 {
-	base += (ds_cfg.sizeof_ptr_field * field);;
+	base += (ds_cfg.sizeof_ptr_field * field);
 	(*(unsigned long *)base) = val;
 }
 
-- 
cgit v1.2.3


From c9944881acf02b6f25fa62a0441a98b7dc0d7ae6 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Wed, 24 Jun 2009 13:42:40 +0800
Subject: crypto: aes-ni - Don't print message with KERN_ERR on old system

When the aes-intel module is loaded on a system that does not have the
AES instructions, it prints

    Intel AES-NI instructions are not detected.

at level KERN_ERR.  Since aes-intel is aliased to "aes" it will be tried
whenever anything uses AES and spam the console.  This doesn't match
existing practice for how to handle "no hardware" when initializing a
module, so downgrade the message to KERN_INFO.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/aesni-intel_glue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index c580c5ec1ca..d3ec8d588d4 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -636,7 +636,7 @@ static int __init aesni_init(void)
 	int err;
 
 	if (!cpu_has_aes) {
-		printk(KERN_ERR "Intel AES-NI instructions are not detected.\n");
+		printk(KERN_INFO "Intel AES-NI instructions are not detected.\n");
 		return -ENODEV;
 	}
 	if ((err = crypto_register_alg(&aesni_alg)))
-- 
cgit v1.2.3


From 2495fbf7effa6868f5d74124ae9b22a57980755b Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Fri, 26 Jun 2009 10:53:57 -0700
Subject: x86, setup: remove obsolete pre-Kconfig CONFIG_VIDEO_ variables

There were a set of pre-Kconfig configuration variables defined in the
video code.  There is absolutely no evidence that they have been
tweaked by anybody in modern history, so just get rid of them and hope
nobody notices.  If someone does complain, these should be made real
Kconfig variables.

Reported-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/boot/video-vesa.c |  7 +------
 arch/x86/boot/video-vga.c  | 10 ----------
 arch/x86/boot/video.c      |  5 -----
 arch/x86/boot/video.h      | 20 ++------------------
 4 files changed, 3 insertions(+), 39 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c
index c700147d6ff..d7ef26ba454 100644
--- a/arch/x86/boot/video-vesa.c
+++ b/arch/x86/boot/video-vesa.c
@@ -31,7 +31,6 @@ static inline void vesa_store_mode_params_graphics(void) {}
 
 static int vesa_probe(void)
 {
-#if defined(CONFIG_VIDEO_VESA) || defined(CONFIG_FIRMWARE_EDID)
 	struct biosregs ireg, oreg;
 	u16 mode;
 	addr_t mode_ptr;
@@ -49,8 +48,7 @@ static int vesa_probe(void)
 	    vginfo.signature != VESA_MAGIC ||
 	    vginfo.version < 0x0102)
 		return 0;	/* Not present */
-#endif /* CONFIG_VIDEO_VESA || CONFIG_FIRMWARE_EDID */
-#ifdef CONFIG_VIDEO_VESA
+
 	set_fs(vginfo.video_mode_ptr.seg);
 	mode_ptr = vginfo.video_mode_ptr.off;
 
@@ -102,9 +100,6 @@ static int vesa_probe(void)
 	}
 
 	return nmodes;
-#else
-	return 0;
-#endif /* CONFIG_VIDEO_VESA */
 }
 
 static int vesa_set_mode(struct mode_info *mode)
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
index 8f8d827e254..819caa1f200 100644
--- a/arch/x86/boot/video-vga.c
+++ b/arch/x86/boot/video-vga.c
@@ -47,14 +47,6 @@ static u8 vga_set_basic_mode(void)
 
 	initregs(&ireg);
 
-#ifdef CONFIG_VIDEO_400_HACK
-	if (adapter >= ADAPTER_VGA) {
-		ireg.ax = 0x1202;
-		ireg.bx = 0x0030;
-		intcall(0x10, &ireg, NULL);
-	}
-#endif
-
 	ax = 0x0f00;
 	intcall(0x10, &ireg, &oreg);
 	mode = oreg.al;
@@ -62,11 +54,9 @@ static u8 vga_set_basic_mode(void)
 	set_fs(0);
 	rows = rdfs8(0x484);	/* rows minus one */
 
-#ifndef CONFIG_VIDEO_400_HACK
 	if ((oreg.ax == 0x5003 || oreg.ax == 0x5007) &&
 	    (rows == 0 || rows == 24))
 		return mode;
-#endif
 
 	if (mode != 3 && mode != 7)
 		mode = 3;
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index bad728b76fc..d42da380249 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -221,7 +221,6 @@ static unsigned int mode_menu(void)
 	}
 }
 
-#ifdef CONFIG_VIDEO_RETAIN
 /* Save screen content to the heap */
 static struct saved_screen {
 	int x, y;
@@ -299,10 +298,6 @@ static void restore_screen(void)
 	ireg.dl = saved.curx;
 	intcall(0x10, &ireg, NULL);
 }
-#else
-#define save_screen()		((void)0)
-#define restore_screen()	((void)0)
-#endif
 
 void set_video(void)
 {
diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h
index 5bb174a997f..ff339c5db31 100644
--- a/arch/x86/boot/video.h
+++ b/arch/x86/boot/video.h
@@ -17,19 +17,8 @@
 
 #include <linux/types.h>
 
-/* Enable autodetection of SVGA adapters and modes. */
-#undef CONFIG_VIDEO_SVGA
-
-/* Enable autodetection of VESA modes */
-#define CONFIG_VIDEO_VESA
-
-/* Retain screen contents when switching modes */
-#define CONFIG_VIDEO_RETAIN
-
-/* Force 400 scan lines for standard modes (hack to fix bad BIOS behaviour */
-#undef CONFIG_VIDEO_400_HACK
-
-/* This code uses an extended set of video mode numbers. These include:
+/*
+ * This code uses an extended set of video mode numbers. These include:
  * Aliases for standard modes
  *      NORMAL_VGA (-1)
  *      EXTENDED_VGA (-2)
@@ -67,13 +56,8 @@
 /* The "recalculate timings" flag */
 #define VIDEO_RECALC 0x8000
 
-/* Define DO_STORE according to CONFIG_VIDEO_RETAIN */
-#ifdef CONFIG_VIDEO_RETAIN
 void store_screen(void);
 #define DO_STORE() store_screen()
-#else
-#define DO_STORE() ((void)0)
-#endif /* CONFIG_VIDEO_RETAIN */
 
 /*
  * Mode table structures
-- 
cgit v1.2.3


From 087975b06b00af9bf888fab6f94ae113c5cd80bd Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Sat, 27 Jun 2009 15:35:15 +0900
Subject: x86: Clean up dump_pagetable()

Use pgtable access helpers for 32-bit version dump_pagetable()
and get rid of __typeof__() operators. This needs to make
pmd_pfn() available for 2-level pgtable.

Also, remove some casts for 64-bit version dump_pagetable().

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
LKML-Reference: <20090627063514.GA2834@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/pgtable.h | 10 ++++-----
 arch/x86/mm/fault.c            | 51 +++++++++++++++++-------------------------
 2 files changed, 26 insertions(+), 35 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 3cc06e3fceb..af5481e9486 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -134,6 +134,11 @@ static inline unsigned long pte_pfn(pte_t pte)
 	return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT;
 }
 
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+	return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
+}
+
 #define pte_page(pte)	pfn_to_page(pte_pfn(pte))
 
 static inline int pmd_large(pmd_t pte)
@@ -422,11 +427,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
 	return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
 }
 
-static inline unsigned long pmd_pfn(pmd_t pmd)
-{
-	return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
-}
-
 static inline int pud_large(pud_t pud)
 {
 	return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) ==
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 78a5fff857b..9bf7e52c286 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -285,26 +285,25 @@ check_v8086_mode(struct pt_regs *regs, unsigned long address,
 		tsk->thread.screen_bitmap |= 1 << bit;
 }
 
-static void dump_pagetable(unsigned long address)
+static bool low_pfn(unsigned long pfn)
 {
-	__typeof__(pte_val(__pte(0))) page;
+	return pfn < max_low_pfn;
+}
 
-	page = read_cr3();
-	page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
+static void dump_pagetable(unsigned long address)
+{
+	pgd_t *base = __va(read_cr3());
+	pgd_t *pgd = &base[pgd_index(address)];
+	pmd_t *pmd;
+	pte_t *pte;
 
 #ifdef CONFIG_X86_PAE
-	printk("*pdpt = %016Lx ", page);
-	if ((page >> PAGE_SHIFT) < max_low_pfn
-	    && page & _PAGE_PRESENT) {
-		page &= PAGE_MASK;
-		page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
-							& (PTRS_PER_PMD - 1)];
-		printk(KERN_CONT "*pde = %016Lx ", page);
-		page &= ~_PAGE_NX;
-	}
-#else
-	printk("*pde = %08lx ", page);
+	printk("*pdpt = %016Lx ", pgd_val(*pgd));
+	if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
+		goto out;
 #endif
+	pmd = pmd_offset(pud_offset(pgd, address), address);
+	printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
 
 	/*
 	 * We must not directly access the pte in the highpte
@@ -312,16 +311,12 @@ static void dump_pagetable(unsigned long address)
 	 * And let's rather not kmap-atomic the pte, just in case
 	 * it's allocated already:
 	 */
-	if ((page >> PAGE_SHIFT) < max_low_pfn
-	    && (page & _PAGE_PRESENT)
-	    && !(page & _PAGE_PSE)) {
-
-		page &= PAGE_MASK;
-		page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
-							& (PTRS_PER_PTE - 1)];
-		printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
-	}
+	if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd))
+		goto out;
 
+	pte = pte_offset_kernel(pmd, address);
+	printk("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte));
+out:
 	printk("\n");
 }
 
@@ -449,16 +444,12 @@ static int bad_address(void *p)
 
 static void dump_pagetable(unsigned long address)
 {
-	pgd_t *pgd;
+	pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK);
+	pgd_t *pgd = base + pgd_index(address);
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 
-	pgd = (pgd_t *)read_cr3();
-
-	pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
-
-	pgd += pgd_index(address);
 	if (bad_address(pgd))
 		goto bad;
 
-- 
cgit v1.2.3


From ce0c0f9eec2f377055e8b23c6fa192202381e022 Mon Sep 17 00:00:00 2001
From: "Figo.zhang" <figo1802@gmail.com>
Date: Sun, 28 Jun 2009 18:07:39 +0800
Subject: x86, pgtable.h: Clean up types

Use "unsigned long" consistently, not "unsigned".

Signed-off-by: Figo.zhang <figo1802@gmail.com>
LKML-Reference: <1246183659.2530.4.camel@myhost>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/pgtable.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index af5481e9486..9de8729c1c8 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -356,7 +356,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
  * this macro returns the index of the entry in the pmd page which would
  * control the given virtual address
  */
-static inline unsigned pmd_index(unsigned long address)
+static inline unsigned long pmd_index(unsigned long address)
 {
 	return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
 }
@@ -376,7 +376,7 @@ static inline unsigned pmd_index(unsigned long address)
  * this function returns the index of the entry in the pte page which would
  * control the given virtual address
  */
-static inline unsigned pte_index(unsigned long address)
+static inline unsigned long pte_index(unsigned long address)
 {
 	return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
 }
@@ -462,7 +462,7 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
 #define pgd_page(pgd)		pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
 
 /* to find an entry in a page-table-directory. */
-static inline unsigned pud_index(unsigned long address)
+static inline unsigned long pud_index(unsigned long address)
 {
 	return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
 }
-- 
cgit v1.2.3


From 565b0c1f100408ccbcb04ba458a14da454cb271d Mon Sep 17 00:00:00 2001
From: "Figo.zhang" <figo1802@gmail.com>
Date: Mon, 29 Jun 2009 12:02:55 +0800
Subject: x86, highmem_32.c: Clean up comment

Signed-off-by: Figo.zhang <figo1802@gmail.com>
Cc: Andrew Morton <akpm@osdl.org>
LKML-Reference: <1246248175.5759.12.camel@myhost>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/highmem_32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 58f621e8191..0c6f43cee25 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -24,7 +24,7 @@ void kunmap(struct page *page)
  * no global lock is needed and because the kmap code must perform a global TLB
  * invalidation when the kmap pool wraps.
  *
- * However when holding an atomic kmap is is not legal to sleep, so atomic
+ * However when holding an atomic kmap it is not legal to sleep, so atomic
  * kmaps are appropriate for short, tight code paths only.
  */
 void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
-- 
cgit v1.2.3


From 42204455f160dab0c47f19e1be23f5c927af2d17 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Sat, 4 Jul 2009 07:50:00 +0530
Subject: x86: Clean up mtrr/amd.c:

Fix trivial style problems :

  ERROR: trailing whitespace
  WARNING: line over 80 characters
  ERROR: do not use C99 // comments

arch/x86/kernel/cpu/mtrr/amd.o:

   text	   data	    bss	    dec	    hex	filename
    501	     32	      0	    533	    215	amd.o.before
    501	     32	      0	    533	    215	amd.o.after

md5:
   62f795eb840ee2d17b03df89e789e76c  amd.o.before.asm
   62f795eb840ee2d17b03df89e789e76c  amd.o.after.asm

Suggested-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <20090703164225.GA21447@elte.hu>
[ Also restructured comments to be standard, removed stray return,
  converted function description to DocBook style, etc. ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mtrr/amd.c | 97 ++++++++++++++++++++++--------------------
 1 file changed, 51 insertions(+), 46 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c
index ee2331b0e58..33af14110df 100644
--- a/arch/x86/kernel/cpu/mtrr/amd.c
+++ b/arch/x86/kernel/cpu/mtrr/amd.c
@@ -7,15 +7,15 @@
 
 static void
 amd_get_mtrr(unsigned int reg, unsigned long *base,
-	     unsigned long *size, mtrr_type * type)
+	     unsigned long *size, mtrr_type *type)
 {
 	unsigned long low, high;
 
 	rdmsr(MSR_K6_UWCCR, low, high);
-	/*  Upper dword is region 1, lower is region 0  */
+	/* Upper dword is region 1, lower is region 0 */
 	if (reg == 1)
 		low = high;
-	/*  The base masks off on the right alignment  */
+	/* The base masks off on the right alignment */
 	*base = (low & 0xFFFE0000) >> PAGE_SHIFT;
 	*type = 0;
 	if (low & 1)
@@ -27,74 +27,81 @@ amd_get_mtrr(unsigned int reg, unsigned long *base,
 		return;
 	}
 	/*
-	 *  This needs a little explaining. The size is stored as an
-	 *  inverted mask of bits of 128K granularity 15 bits long offset
-	 *  2 bits
+	 * This needs a little explaining. The size is stored as an
+	 * inverted mask of bits of 128K granularity 15 bits long offset
+	 * 2 bits.
 	 *
-	 *  So to get a size we do invert the mask and add 1 to the lowest
-	 *  mask bit (4 as its 2 bits in). This gives us a size we then shift
-	 *  to turn into 128K blocks
+	 * So to get a size we do invert the mask and add 1 to the lowest
+	 * mask bit (4 as its 2 bits in). This gives us a size we then shift
+	 * to turn into 128K blocks.
 	 *
-	 *  eg              111 1111 1111 1100      is 512K
+	 * eg              111 1111 1111 1100      is 512K
 	 *
-	 *  invert          000 0000 0000 0011
-	 *  +1              000 0000 0000 0100
-	 *  *128K   ...
+	 * invert          000 0000 0000 0011
+	 * +1              000 0000 0000 0100
+	 * *128K   ...
 	 */
 	low = (~low) & 0x1FFFC;
 	*size = (low + 4) << (15 - PAGE_SHIFT);
-	return;
 }
 
-static void amd_set_mtrr(unsigned int reg, unsigned long base,
-			 unsigned long size, mtrr_type type)
-/*  [SUMMARY] Set variable MTRR register on the local CPU.
-    <reg> The register to set.
-    <base> The base address of the region.
-    <size> The size of the region. If this is 0 the region is disabled.
-    <type> The type of the region.
-    [RETURNS] Nothing.
-*/
+/**
+ * amd_set_mtrr - Set variable MTRR register on the local CPU.
+ *
+ * @reg The register to set.
+ * @base The base address of the region.
+ * @size The size of the region. If this is 0 the region is disabled.
+ * @type The type of the region.
+ *
+ * Returns nothing.
+ */
+static void
+amd_set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
 {
 	u32 regs[2];
 
 	/*
-	 *  Low is MTRR0 , High MTRR 1
+	 * Low is MTRR0, High MTRR 1
 	 */
 	rdmsr(MSR_K6_UWCCR, regs[0], regs[1]);
 	/*
-	 *  Blank to disable
+	 * Blank to disable
 	 */
-	if (size == 0)
+	if (size == 0) {
 		regs[reg] = 0;
-	else
-		/* Set the register to the base, the type (off by one) and an
-		   inverted bitmask of the size The size is the only odd
-		   bit. We are fed say 512K We invert this and we get 111 1111
-		   1111 1011 but if you subtract one and invert you get the   
-		   desired 111 1111 1111 1100 mask
-
-		   But ~(x - 1) == ~x + 1 == -x. Two's complement rocks!  */
+	} else {
+		/*
+		 * Set the register to the base, the type (off by one) and an
+		 * inverted bitmask of the size The size is the only odd
+		 * bit. We are fed say 512K We invert this and we get 111 1111
+		 * 1111 1011 but if you subtract one and invert you get the
+		 * desired 111 1111 1111 1100 mask
+		 *
+		 *  But ~(x - 1) == ~x + 1 == -x. Two's complement rocks!
+		 */
 		regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC)
 		    | (base << PAGE_SHIFT) | (type + 1);
+	}
 
 	/*
-	 *  The writeback rule is quite specific. See the manual. Its
-	 *  disable local interrupts, write back the cache, set the mtrr
+	 * The writeback rule is quite specific. See the manual. Its
+	 * disable local interrupts, write back the cache, set the mtrr
 	 */
 	wbinvd();
 	wrmsr(MSR_K6_UWCCR, regs[0], regs[1]);
 }
 
-static int amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
+static int
+amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
 {
-	/* Apply the K6 block alignment and size rules
-	   In order
-	   o Uncached or gathering only
-	   o 128K or bigger block
-	   o Power of 2 block
-	   o base suitably aligned to the power
-	*/
+	/*
+	 * Apply the K6 block alignment and size rules
+	 * In order
+	 * o Uncached or gathering only
+	 * o 128K or bigger block
+	 * o Power of 2 block
+	 * o base suitably aligned to the power
+	 */
 	if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT))
 	    || (size & ~(size - 1)) - size || (base & (size - 1)))
 		return -EINVAL;
@@ -115,5 +122,3 @@ int __init amd_init_mtrr(void)
 	set_mtrr_ops(&amd_mtrr_ops);
 	return 0;
 }
-
-//arch_initcall(amd_mtrr_init);
-- 
cgit v1.2.3


From 6c4caa1ab737502190e416b76e6c10d2bf24276a Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Sat, 4 Jul 2009 07:50:44 +0530
Subject: x86: Clean up mtrr/centaur.c

Remove dead code and fix trivial style problems:

  ERROR: trailing whitespace X 2
  WARNING: line over 80 characters X 3
  ROR: trailing whitespace
  ERROR: do not use C99 // comments X 2

arch/x86/kernel/cpu/mtrr/centaur.o:

   text	   data	    bss	    dec	    hex	filename
    605	     32	     68	    705	    2c1	centaur.o.before
    605	     32	     68	    705	    2c1	centaur.o.after

md5:
   a4865ea98ce3c163bb1d376a3949b3e3  centaur.o.before.asm
   a4865ea98ce3c163bb1d376a3949b3e3  centaur.o.after.asm

Suggested-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <20090703164225.GA21447@elte.hu>
[ Standardized comments, DocBook, curly braces, newlines. ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mtrr/centaur.c | 168 ++++++++-----------------------------
 1 file changed, 35 insertions(+), 133 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c
index cb9aa3a7a7a..de89f14eff3 100644
--- a/arch/x86/kernel/cpu/mtrr/centaur.c
+++ b/arch/x86/kernel/cpu/mtrr/centaur.c
@@ -1,7 +1,9 @@
 #include <linux/init.h>
 #include <linux/mm.h>
+
 #include <asm/mtrr.h>
 #include <asm/msr.h>
+
 #include "mtrr.h"
 
 static struct {
@@ -12,25 +14,25 @@ static struct {
 static u8 centaur_mcr_reserved;
 static u8 centaur_mcr_type;	/* 0 for winchip, 1 for winchip2 */
 
-/*
- *	Report boot time MCR setups 
+/**
+ * centaur_get_free_region - Get a free MTRR.
+ *
+ * @base: The starting (base) address of the region.
+ * @size: The size (in bytes) of the region.
+ *
+ * Returns: the index of the region on success, else -1 on error.
  */
-
 static int
 centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg)
-/*  [SUMMARY] Get a free MTRR.
-    <base> The starting (base) address of the region.
-    <size> The size (in bytes) of the region.
-    [RETURNS] The index of the region on success, else -1 on error.
-*/
 {
-	int i, max;
-	mtrr_type ltype;
 	unsigned long lbase, lsize;
+	mtrr_type ltype;
+	int i, max;
 
 	max = num_var_ranges;
 	if (replace_reg >= 0 && replace_reg < max)
 		return replace_reg;
+
 	for (i = 0; i < max; ++i) {
 		if (centaur_mcr_reserved & (1 << i))
 			continue;
@@ -38,11 +40,14 @@ centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg)
 		if (lsize == 0)
 			return i;
 	}
+
 	return -ENOSPC;
 }
 
-void
-mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
+/*
+ * Report boot time MCR setups
+ */
+void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
 {
 	centaur_mcr[mcr].low = lo;
 	centaur_mcr[mcr].high = hi;
@@ -54,33 +59,35 @@ centaur_get_mcr(unsigned int reg, unsigned long *base,
 {
 	*base = centaur_mcr[reg].high >> PAGE_SHIFT;
 	*size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT;
-	*type = MTRR_TYPE_WRCOMB;	/*  If it is there, it is write-combining  */
+	*type = MTRR_TYPE_WRCOMB;		/* write-combining  */
+
 	if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2))
 		*type = MTRR_TYPE_UNCACHABLE;
 	if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25)
 		*type = MTRR_TYPE_WRBACK;
 	if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31)
 		*type = MTRR_TYPE_WRBACK;
-
 }
 
-static void centaur_set_mcr(unsigned int reg, unsigned long base,
-			    unsigned long size, mtrr_type type)
+static void
+centaur_set_mcr(unsigned int reg, unsigned long base,
+		unsigned long size, mtrr_type type)
 {
 	unsigned long low, high;
 
 	if (size == 0) {
-		/*  Disable  */
+		/* Disable */
 		high = low = 0;
 	} else {
 		high = base << PAGE_SHIFT;
-		if (centaur_mcr_type == 0)
-			low = -size << PAGE_SHIFT | 0x1f;	/* only support write-combining... */
-		else {
+		if (centaur_mcr_type == 0) {
+			/* Only support write-combining... */
+			low = -size << PAGE_SHIFT | 0x1f;
+		} else {
 			if (type == MTRR_TYPE_UNCACHABLE)
-				low = -size << PAGE_SHIFT | 0x02;	/* NC */
+				low = -size << PAGE_SHIFT | 0x02; /* NC */
 			else
-				low = -size << PAGE_SHIFT | 0x09;	/* WWO,WC */
+				low = -size << PAGE_SHIFT | 0x09; /* WWO, WC */
 		}
 	}
 	centaur_mcr[reg].high = high;
@@ -88,118 +95,16 @@ static void centaur_set_mcr(unsigned int reg, unsigned long base,
 	wrmsr(MSR_IDT_MCR0 + reg, low, high);
 }
 
-#if 0
-/*
- *	Initialise the later (saner) Winchip MCR variant. In this version
- *	the BIOS can pass us the registers it has used (but not their values)
- *	and the control register is read/write
- */
-
-static void __init
-centaur_mcr1_init(void)
-{
-	unsigned i;
-	u32 lo, hi;
-
-	/* Unfortunately, MCR's are read-only, so there is no way to
-	 * find out what the bios might have done.
-	 */
-
-	rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
-	if (((lo >> 17) & 7) == 1) {	/* Type 1 Winchip2 MCR */
-		lo &= ~0x1C0;	/* clear key */
-		lo |= 0x040;	/* set key to 1 */
-		wrmsr(MSR_IDT_MCR_CTRL, lo, hi);	/* unlock MCR */
-	}
-
-	centaur_mcr_type = 1;
-
-	/*
-	 *  Clear any unconfigured MCR's.
-	 */
-
-	for (i = 0; i < 8; ++i) {
-		if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) {
-			if (!(lo & (1 << (9 + i))))
-				wrmsr(MSR_IDT_MCR0 + i, 0, 0);
-			else
-				/*
-				 *      If the BIOS set up an MCR we cannot see it
-				 *      but we don't wish to obliterate it
-				 */
-				centaur_mcr_reserved |= (1 << i);
-		}
-	}
-	/*  
-	 *  Throw the main write-combining switch... 
-	 *  However if OOSTORE is enabled then people have already done far
-	 *  cleverer things and we should behave. 
-	 */
-
-	lo |= 15;		/* Write combine enables */
-	wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
-}
-
-/*
- *	Initialise the original winchip with read only MCR registers
- *	no used bitmask for the BIOS to pass on and write only control
- */
-
-static void __init
-centaur_mcr0_init(void)
-{
-	unsigned i;
-
-	/* Unfortunately, MCR's are read-only, so there is no way to
-	 * find out what the bios might have done.
-	 */
-
-	/* Clear any unconfigured MCR's.
-	 * This way we are sure that the centaur_mcr array contains the actual
-	 * values. The disadvantage is that any BIOS tweaks are thus undone.
-	 *
-	 */
-	for (i = 0; i < 8; ++i) {
-		if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0)
-			wrmsr(MSR_IDT_MCR0 + i, 0, 0);
-	}
-
-	wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0);	/* Write only */
-}
-
-/*
- *	Initialise Winchip series MCR registers
- */
-
-static void __init
-centaur_mcr_init(void)
-{
-	struct set_mtrr_context ctxt;
-
-	set_mtrr_prepare_save(&ctxt);
-	set_mtrr_cache_disable(&ctxt);
-
-	if (boot_cpu_data.x86_model == 4)
-		centaur_mcr0_init();
-	else if (boot_cpu_data.x86_model == 8 || boot_cpu_data.x86_model == 9)
-		centaur_mcr1_init();
-
-	set_mtrr_done(&ctxt);
-}
-#endif
-
-static int centaur_validate_add_page(unsigned long base, 
-				     unsigned long size, unsigned int type)
+static int
+centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
 {
 	/*
-	 *  FIXME: Winchip2 supports uncached
+	 * FIXME: Winchip2 supports uncached
 	 */
-	if (type != MTRR_TYPE_WRCOMB && 
+	if (type != MTRR_TYPE_WRCOMB &&
 	    (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) {
-		printk(KERN_WARNING
-		       "mtrr: only write-combining%s supported\n",
-		       centaur_mcr_type ? " and uncacheable are"
-		       : " is");
+		pr_warning("mtrr: only write-combining%s supported\n",
+			   centaur_mcr_type ? " and uncacheable are" : " is");
 		return -EINVAL;
 	}
 	return 0;
@@ -207,7 +112,6 @@ static int centaur_validate_add_page(unsigned long base,
 
 static struct mtrr_ops centaur_mtrr_ops = {
 	.vendor            = X86_VENDOR_CENTAUR,
-//	.init              = centaur_mcr_init,
 	.set               = centaur_set_mcr,
 	.get               = centaur_get_mcr,
 	.get_free_region   = centaur_get_free_region,
@@ -220,5 +124,3 @@ int __init centaur_init_mtrr(void)
 	set_mtrr_ops(&centaur_mtrr_ops);
 	return 0;
 }
-
-//arch_initcall(centaur_init_mtrr);
-- 
cgit v1.2.3


From 63f9600fadb10ea739108ae93e3e842d9843c58b Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Sat, 4 Jul 2009 07:51:32 +0530
Subject: x86: Clean up mtrr/cleanup.c

Fix trivial style problems:

  WARNING: Use #include <linux/uaccess.h> instead of <asm/uaccess.h>
  WARNING: Use #include <linux/kvm_para.h> instead of <asm/kvm_para.h>

Also, nr_mtrr_spare_reg should be unsigned long.

arch/x86/kernel/cpu/mtrr/cleanup.o:

   text	   data	    bss	    dec	    hex	filename
   6241	   8992	   2056	  17289	   4389	cleanup.o.before
   6241	   8992	   2056	  17289	   4389	cleanup.o.after

The md5 has changed:
   1a7a27513aef1825236daf29110fe657  cleanup.o.before.asm
   bcea358efa2532b6020e338e158447af  cleanup.o.after.asm

Because a WARN_ON()'s __LINE__ value changed by 3 lines.

Suggested-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <20090703164225.GA21447@elte.hu>
[ Did lots of other cleanups to make the code look more consistent. ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mtrr/cleanup.c | 350 +++++++++++++++++++------------------
 1 file changed, 176 insertions(+), 174 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 1d584a18a50..b8aba811b60 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -1,51 +1,52 @@
-/*  MTRR (Memory Type Range Register) cleanup
-
-    Copyright (C) 2009 Yinghai Lu
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Library General Public
-    License as published by the Free Software Foundation; either
-    version 2 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Library General Public License for more details.
-
-    You should have received a copy of the GNU Library General Public
-    License along with this library; if not, write to the Free
-    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
-
+/*
+ * MTRR (Memory Type Range Register) cleanup
+ *
+ *  Copyright (C) 2009 Yinghai Lu
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/smp.h>
 #include <linux/cpu.h>
-#include <linux/mutex.h>
 #include <linux/sort.h>
+#include <linux/mutex.h>
+#include <linux/uaccess.h>
+#include <linux/kvm_para.h>
 
+#include <asm/processor.h>
 #include <asm/e820.h>
 #include <asm/mtrr.h>
-#include <asm/uaccess.h>
-#include <asm/processor.h>
 #include <asm/msr.h>
-#include <asm/kvm_para.h>
+
 #include "mtrr.h"
 
-/* should be related to MTRR_VAR_RANGES nums */
+/* Should be related to MTRR_VAR_RANGES nums */
 #define RANGE_NUM 256
 
 struct res_range {
-	unsigned long start;
-	unsigned long end;
+	unsigned long	start;
+	unsigned long	end;
 };
 
 static int __init
-add_range(struct res_range *range, int nr_range, unsigned long start,
-			      unsigned long end)
+add_range(struct res_range *range, int nr_range,
+	  unsigned long start, unsigned long end)
 {
-	/* out of slots */
+	/* Out of slots: */
 	if (nr_range >= RANGE_NUM)
 		return nr_range;
 
@@ -58,12 +59,12 @@ add_range(struct res_range *range, int nr_range, unsigned long start,
 }
 
 static int __init
-add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
-			      unsigned long end)
+add_range_with_merge(struct res_range *range, int nr_range,
+		     unsigned long start, unsigned long end)
 {
 	int i;
 
-	/* try to merge it with old one */
+	/* Try to merge it with old one: */
 	for (i = 0; i < nr_range; i++) {
 		unsigned long final_start, final_end;
 		unsigned long common_start, common_end;
@@ -84,7 +85,7 @@ add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
 		return nr_range;
 	}
 
-	/* need to add that */
+	/* Need to add it: */
 	return add_range(range, nr_range, start, end);
 }
 
@@ -117,7 +118,7 @@ subtract_range(struct res_range *range, unsigned long start, unsigned long end)
 		}
 
 		if (start > range[j].start && end < range[j].end) {
-			/* find the new spare */
+			/* Find the new spare: */
 			for (i = 0; i < RANGE_NUM; i++) {
 				if (range[i].end == 0)
 					break;
@@ -147,13 +148,19 @@ static int __init cmp_range(const void *x1, const void *x2)
 }
 
 struct var_mtrr_range_state {
-	unsigned long base_pfn;
-	unsigned long size_pfn;
-	mtrr_type type;
+	unsigned long	base_pfn;
+	unsigned long	size_pfn;
+	mtrr_type	type;
 };
 
 static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
+
 static int __initdata debug_print;
+#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
+
+
+#define BIOS_BUG_MSG KERN_WARNING \
+	"WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
 
 static int __init
 x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
@@ -180,7 +187,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
 				 range[i].start, range[i].end + 1);
 	}
 
-	/* take out UC ranges */
+	/* Take out UC ranges: */
 	for (i = 0; i < num_var_ranges; i++) {
 		type = range_state[i].type;
 		if (type != MTRR_TYPE_UNCACHABLE &&
@@ -244,10 +251,9 @@ static int __initdata nr_range;
 
 static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
 {
-	unsigned long sum;
+	unsigned long sum = 0;
 	int i;
 
-	sum = 0;
 	for (i = 0; i < nr_range; i++)
 		sum += range[i].end + 1 - range[i].start;
 
@@ -288,7 +294,7 @@ struct var_mtrr_state {
 
 static void __init
 set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
-		unsigned char type, unsigned int address_bits)
+	     unsigned char type, unsigned int address_bits)
 {
 	u32 base_lo, base_hi, mask_lo, mask_hi;
 	u64 base, mask;
@@ -301,7 +307,7 @@ set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
 	mask = (1ULL << address_bits) - 1;
 	mask &= ~((((u64)sizek) << 10) - 1);
 
-	base  = ((u64)basek) << 10;
+	base = ((u64)basek) << 10;
 
 	base |= type;
 	mask |= 0x800;
@@ -317,15 +323,14 @@ set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
 
 static void __init
 save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
-		unsigned char type)
+	      unsigned char type)
 {
 	range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
 	range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
 	range_state[reg].type = type;
 }
 
-static void __init
-set_var_mtrr_all(unsigned int address_bits)
+static void __init set_var_mtrr_all(unsigned int address_bits)
 {
 	unsigned long basek, sizek;
 	unsigned char type;
@@ -342,11 +347,11 @@ set_var_mtrr_all(unsigned int address_bits)
 
 static unsigned long to_size_factor(unsigned long sizek, char *factorp)
 {
-	char factor;
 	unsigned long base = sizek;
+	char factor;
 
 	if (base & ((1<<10) - 1)) {
-		/* not MB alignment */
+		/* Not MB-aligned: */
 		factor = 'K';
 	} else if (base & ((1<<20) - 1)) {
 		factor = 'M';
@@ -372,11 +377,12 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
 		unsigned long max_align, align;
 		unsigned long sizek;
 
-		/* Compute the maximum size I can make a range */
+		/* Compute the maximum size with which we can make a range: */
 		if (range_startk)
 			max_align = ffs(range_startk) - 1;
 		else
 			max_align = 32;
+
 		align = fls(range_sizek) - 1;
 		if (align > max_align)
 			align = max_align;
@@ -386,11 +392,10 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
 			char start_factor = 'K', size_factor = 'K';
 			unsigned long start_base, size_base;
 
-			start_base = to_size_factor(range_startk,
-							 &start_factor),
-			size_base = to_size_factor(sizek, &size_factor),
+			start_base = to_size_factor(range_startk, &start_factor);
+			size_base = to_size_factor(sizek, &size_factor);
 
-			printk(KERN_DEBUG "Setting variable MTRR %d, "
+			Dprintk("Setting variable MTRR %d, "
 				"base: %ld%cB, range: %ld%cB, type %s\n",
 				reg, start_base, start_factor,
 				size_base, size_factor,
@@ -425,10 +430,11 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
 	chunk_sizek = state->chunk_sizek;
 	gran_sizek = state->gran_sizek;
 
-	/* align with gran size, prevent small block used up MTRRs */
+	/* Align with gran size, prevent small block used up MTRRs: */
 	range_basek = ALIGN(state->range_startk, gran_sizek);
 	if ((range_basek > basek) && basek)
 		return second_sizek;
+
 	state->range_sizek -= (range_basek - state->range_startk);
 	range_sizek = ALIGN(state->range_sizek, gran_sizek);
 
@@ -439,22 +445,21 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
 	}
 	state->range_sizek = range_sizek;
 
-	/* try to append some small hole */
+	/* Try to append some small hole: */
 	range0_basek = state->range_startk;
 	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
 
-	/* no increase */
+	/* No increase: */
 	if (range0_sizek == state->range_sizek) {
-		if (debug_print)
-			printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
-				range0_basek<<10,
-				(range0_basek + state->range_sizek)<<10);
+		Dprintk("rangeX: %016lx - %016lx\n",
+			range0_basek<<10,
+			(range0_basek + state->range_sizek)<<10);
 		state->reg = range_to_mtrr(state->reg, range0_basek,
 				state->range_sizek, MTRR_TYPE_WRBACK);
 		return 0;
 	}
 
-	/* only cut back, when it is not the last */
+	/* Only cut back when it is not the last: */
 	if (sizek) {
 		while (range0_basek + range0_sizek > (basek + sizek)) {
 			if (range0_sizek >= chunk_sizek)
@@ -470,16 +475,16 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
 second_try:
 	range_basek = range0_basek + range0_sizek;
 
-	/* one hole in the middle */
+	/* One hole in the middle: */
 	if (range_basek > basek && range_basek <= (basek + sizek))
 		second_sizek = range_basek - basek;
 
 	if (range0_sizek > state->range_sizek) {
 
-		/* one hole in middle or at end */
+		/* One hole in middle or at the end: */
 		hole_sizek = range0_sizek - state->range_sizek - second_sizek;
 
-		/* hole size should be less than half of range0 size */
+		/* Hole size should be less than half of range0 size: */
 		if (hole_sizek >= (range0_sizek >> 1) &&
 		    range0_sizek >= chunk_sizek) {
 			range0_sizek -= chunk_sizek;
@@ -491,32 +496,30 @@ second_try:
 	}
 
 	if (range0_sizek) {
-		if (debug_print)
-			printk(KERN_DEBUG "range0: %016lx - %016lx\n",
-				range0_basek<<10,
-				(range0_basek + range0_sizek)<<10);
+		Dprintk("range0: %016lx - %016lx\n",
+			range0_basek<<10,
+			(range0_basek + range0_sizek)<<10);
 		state->reg = range_to_mtrr(state->reg, range0_basek,
 				range0_sizek, MTRR_TYPE_WRBACK);
 	}
 
 	if (range0_sizek < state->range_sizek) {
-		/* need to handle left over */
+		/* Need to handle left over range: */
 		range_sizek = state->range_sizek - range0_sizek;
 
-		if (debug_print)
-			printk(KERN_DEBUG "range: %016lx - %016lx\n",
-				 range_basek<<10,
-				 (range_basek + range_sizek)<<10);
+		Dprintk("range: %016lx - %016lx\n",
+			 range_basek<<10,
+			 (range_basek + range_sizek)<<10);
+
 		state->reg = range_to_mtrr(state->reg, range_basek,
 				 range_sizek, MTRR_TYPE_WRBACK);
 	}
 
 	if (hole_sizek) {
 		hole_basek = range_basek - hole_sizek - second_sizek;
-		if (debug_print)
-			printk(KERN_DEBUG "hole: %016lx - %016lx\n",
-				 hole_basek<<10,
-				 (hole_basek + hole_sizek)<<10);
+		Dprintk("hole: %016lx - %016lx\n",
+			 hole_basek<<10,
+			 (hole_basek + hole_sizek)<<10);
 		state->reg = range_to_mtrr(state->reg, hole_basek,
 				 hole_sizek, MTRR_TYPE_UNCACHABLE);
 	}
@@ -537,23 +540,23 @@ set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
 	basek = base_pfn << (PAGE_SHIFT - 10);
 	sizek = size_pfn << (PAGE_SHIFT - 10);
 
-	/* See if I can merge with the last range */
+	/* See if I can merge with the last range: */
 	if ((basek <= 1024) ||
 	    (state->range_startk + state->range_sizek == basek)) {
 		unsigned long endk = basek + sizek;
 		state->range_sizek = endk - state->range_startk;
 		return;
 	}
-	/* Write the range mtrrs */
+	/* Write the range mtrrs: */
 	if (state->range_sizek != 0)
 		second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
 
-	/* Allocate an msr */
+	/* Allocate an msr: */
 	state->range_startk = basek + second_sizek;
 	state->range_sizek  = sizek - second_sizek;
 }
 
-/* mininum size of mtrr block that can take hole */
+/* Mininum size of mtrr block that can take hole: */
 static u64 mtrr_chunk_size __initdata = (256ULL<<20);
 
 static int __init parse_mtrr_chunk_size_opt(char *p)
@@ -565,7 +568,7 @@ static int __init parse_mtrr_chunk_size_opt(char *p)
 }
 early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
 
-/* granity of mtrr of block */
+/* Granularity of mtrr of block: */
 static u64 mtrr_gran_size __initdata;
 
 static int __init parse_mtrr_gran_size_opt(char *p)
@@ -577,7 +580,7 @@ static int __init parse_mtrr_gran_size_opt(char *p)
 }
 early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
 
-static int nr_mtrr_spare_reg __initdata =
+static unsigned long nr_mtrr_spare_reg __initdata =
 				 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
 
 static int __init parse_mtrr_spare_reg(char *arg)
@@ -586,7 +589,6 @@ static int __init parse_mtrr_spare_reg(char *arg)
 		nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
 	return 0;
 }
-
 early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
 
 static int __init
@@ -594,8 +596,8 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range,
 		    u64 chunk_size, u64 gran_size)
 {
 	struct var_mtrr_state var_state;
-	int i;
 	int num_reg;
+	int i;
 
 	var_state.range_startk	= 0;
 	var_state.range_sizek	= 0;
@@ -605,17 +607,18 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range,
 
 	memset(range_state, 0, sizeof(range_state));
 
-	/* Write the range etc */
-	for (i = 0; i < nr_range; i++)
+	/* Write the range: */
+	for (i = 0; i < nr_range; i++) {
 		set_var_mtrr_range(&var_state, range[i].start,
 				   range[i].end - range[i].start + 1);
+	}
 
-	/* Write the last range */
+	/* Write the last range: */
 	if (var_state.range_sizek != 0)
 		range_to_mtrr_with_hole(&var_state, 0, 0);
 
 	num_reg = var_state.reg;
-	/* Clear out the extra MTRR's */
+	/* Clear out the extra MTRR's: */
 	while (var_state.reg < num_var_ranges) {
 		save_var_mtrr(var_state.reg, 0, 0, 0);
 		var_state.reg++;
@@ -625,11 +628,11 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range,
 }
 
 struct mtrr_cleanup_result {
-	unsigned long gran_sizek;
-	unsigned long chunk_sizek;
-	unsigned long lose_cover_sizek;
-	unsigned int num_reg;
-	int bad;
+	unsigned long	gran_sizek;
+	unsigned long	chunk_sizek;
+	unsigned long	lose_cover_sizek;
+	unsigned int	num_reg;
+	int		bad;
 };
 
 /*
@@ -645,10 +648,10 @@ static unsigned long __initdata min_loss_pfn[RANGE_NUM];
 
 static void __init print_out_mtrr_range_state(void)
 {
-	int i;
 	char start_factor = 'K', size_factor = 'K';
 	unsigned long start_base, size_base;
 	mtrr_type type;
+	int i;
 
 	for (i = 0; i < num_var_ranges; i++) {
 
@@ -676,10 +679,10 @@ static int __init mtrr_need_cleanup(void)
 	int i;
 	mtrr_type type;
 	unsigned long size;
-	/* extra one for all 0 */
+	/* Extra one for all 0: */
 	int num[MTRR_NUM_TYPES + 1];
 
-	/* check entries number */
+	/* Check entries number: */
 	memset(num, 0, sizeof(num));
 	for (i = 0; i < num_var_ranges; i++) {
 		type = range_state[i].type;
@@ -693,88 +696,86 @@ static int __init mtrr_need_cleanup(void)
 		num[type]++;
 	}
 
-	/* check if we got UC entries */
+	/* Check if we got UC entries: */
 	if (!num[MTRR_TYPE_UNCACHABLE])
 		return 0;
 
-	/* check if we only had WB and UC */
+	/* Check if we only had WB and UC */
 	if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
-		num_var_ranges - num[MTRR_NUM_TYPES])
+	    num_var_ranges - num[MTRR_NUM_TYPES])
 		return 0;
 
 	return 1;
 }
 
 static unsigned long __initdata range_sums;
-static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
-					 unsigned long extra_remove_base,
-					 unsigned long extra_remove_size,
-					 int i)
+
+static void __init
+mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
+		      unsigned long x_remove_base,
+		      unsigned long x_remove_size, int i)
 {
-	int num_reg;
 	static struct res_range range_new[RANGE_NUM];
-	static int nr_range_new;
 	unsigned long range_sums_new;
+	static int nr_range_new;
+	int num_reg;
 
-	/* convert ranges to var ranges state */
-	num_reg = x86_setup_var_mtrrs(range, nr_range,
-						chunk_size, gran_size);
+	/* Convert ranges to var ranges state: */
+	num_reg = x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
 
-	/* we got new setting in range_state, check it */
+	/* We got new setting in range_state, check it: */
 	memset(range_new, 0, sizeof(range_new));
 	nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
-				extra_remove_base, extra_remove_size);
+				x_remove_base, x_remove_size);
 	range_sums_new = sum_ranges(range_new, nr_range_new);
 
 	result[i].chunk_sizek = chunk_size >> 10;
 	result[i].gran_sizek = gran_size >> 10;
 	result[i].num_reg = num_reg;
+
 	if (range_sums < range_sums_new) {
-		result[i].lose_cover_sizek =
-			(range_sums_new - range_sums) << PSHIFT;
+		result[i].lose_cover_sizek = (range_sums_new - range_sums) << PSHIFT;
 		result[i].bad = 1;
-	} else
-		result[i].lose_cover_sizek =
-			(range_sums - range_sums_new) << PSHIFT;
+	} else {
+		result[i].lose_cover_sizek = (range_sums - range_sums_new) << PSHIFT;
+	}
 
-	/* double check it */
+	/* Double check it: */
 	if (!result[i].bad && !result[i].lose_cover_sizek) {
-		if (nr_range_new != nr_range ||
-			memcmp(range, range_new, sizeof(range)))
-				result[i].bad = 1;
+		if (nr_range_new != nr_range || memcmp(range, range_new, sizeof(range)))
+			result[i].bad = 1;
 	}
 
-	if (!result[i].bad && (range_sums - range_sums_new <
-				min_loss_pfn[num_reg])) {
-		min_loss_pfn[num_reg] =
-			range_sums - range_sums_new;
-	}
+	if (!result[i].bad && (range_sums - range_sums_new < min_loss_pfn[num_reg]))
+		min_loss_pfn[num_reg] = range_sums - range_sums_new;
 }
 
 static void __init mtrr_print_out_one_result(int i)
 {
-	char gran_factor, chunk_factor, lose_factor;
 	unsigned long gran_base, chunk_base, lose_base;
+	char gran_factor, chunk_factor, lose_factor;
 
 	gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
 	chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
 	lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
-	printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
-			result[i].bad ? "*BAD*" : " ",
-			gran_base, gran_factor, chunk_base, chunk_factor);
-	printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ld%c\n",
-			result[i].num_reg, result[i].bad ? "-" : "",
-			lose_base, lose_factor);
+
+	pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t",
+		result[i].bad ? "*BAD*" : " ",
+		gran_base, gran_factor, chunk_base, chunk_factor);
+	pr_cont("num_reg: %d  \tlose cover RAM: %s%ld%c\n",
+		result[i].num_reg, result[i].bad ? "-" : "",
+		lose_base, lose_factor);
 }
 
 static int __init mtrr_search_optimal_index(void)
 {
-	int i;
 	int num_reg_good;
 	int index_good;
+	int i;
 
 	if (nr_mtrr_spare_reg >= num_var_ranges)
 		nr_mtrr_spare_reg = num_var_ranges - 1;
+
 	num_reg_good = -1;
 	for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
 		if (!min_loss_pfn[i])
@@ -796,24 +797,24 @@ static int __init mtrr_search_optimal_index(void)
 	return index_good;
 }
 
-
 int __init mtrr_cleanup(unsigned address_bits)
 {
-	unsigned long extra_remove_base, extra_remove_size;
+	unsigned long x_remove_base, x_remove_size;
 	unsigned long base, size, def, dummy;
-	mtrr_type type;
 	u64 chunk_size, gran_size;
+	mtrr_type type;
 	int index_good;
 	int i;
 
 	if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
 		return 0;
+
 	rdmsr(MSR_MTRRdefType, def, dummy);
 	def &= 0xff;
 	if (def != MTRR_TYPE_UNCACHABLE)
 		return 0;
 
-	/* get it and store it aside */
+	/* Get it and store it aside: */
 	memset(range_state, 0, sizeof(range_state));
 	for (i = 0; i < num_var_ranges; i++) {
 		mtrr_if->get(i, &base, &size, &type);
@@ -822,29 +823,28 @@ int __init mtrr_cleanup(unsigned address_bits)
 		range_state[i].type = type;
 	}
 
-	/* check if we need handle it and can handle it */
+	/* Check if we need handle it and can handle it: */
 	if (!mtrr_need_cleanup())
 		return 0;
 
-	/* print original var MTRRs at first, for debugging: */
+	/* Print original var MTRRs at first, for debugging: */
 	printk(KERN_DEBUG "original variable MTRRs\n");
 	print_out_mtrr_range_state();
 
 	memset(range, 0, sizeof(range));
-	extra_remove_size = 0;
-	extra_remove_base = 1 << (32 - PAGE_SHIFT);
+	x_remove_size = 0;
+	x_remove_base = 1 << (32 - PAGE_SHIFT);
 	if (mtrr_tom2)
-		extra_remove_size =
-			(mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
-	nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
-					  extra_remove_size);
+		x_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - x_remove_base;
+
+	nr_range = x86_get_mtrr_mem_range(range, 0, x_remove_base, x_remove_size);
 	/*
-	 * [0, 1M) should always be coverred by var mtrr with WB
-	 * and fixed mtrrs should take effective before var mtrr for it
+	 * [0, 1M) should always be covered by var mtrr with WB
+	 * and fixed mtrrs should take effect before var mtrr for it:
 	 */
 	nr_range = add_range_with_merge(range, nr_range, 0,
 					(1ULL<<(20 - PAGE_SHIFT)) - 1);
-	/* sort the ranges */
+	/* Sort the ranges: */
 	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
 
 	range_sums = sum_ranges(range, nr_range);
@@ -854,7 +854,7 @@ int __init mtrr_cleanup(unsigned address_bits)
 	if (mtrr_chunk_size && mtrr_gran_size) {
 		i = 0;
 		mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
-				      extra_remove_base, extra_remove_size, i);
+				      x_remove_base, x_remove_size, i);
 
 		mtrr_print_out_one_result(i);
 
@@ -880,7 +880,7 @@ int __init mtrr_cleanup(unsigned address_bits)
 				continue;
 
 			mtrr_calc_range_state(chunk_size, gran_size,
-				      extra_remove_base, extra_remove_size, i);
+				      x_remove_base, x_remove_size, i);
 			if (debug_print) {
 				mtrr_print_out_one_result(i);
 				printk(KERN_INFO "\n");
@@ -890,7 +890,7 @@ int __init mtrr_cleanup(unsigned address_bits)
 		}
 	}
 
-	/* try to find the optimal index */
+	/* Try to find the optimal index: */
 	index_good = mtrr_search_optimal_index();
 
 	if (index_good != -1) {
@@ -898,7 +898,7 @@ int __init mtrr_cleanup(unsigned address_bits)
 		i = index_good;
 		mtrr_print_out_one_result(i);
 
-		/* convert ranges to var ranges state */
+		/* Convert ranges to var ranges state: */
 		chunk_size = result[i].chunk_sizek;
 		chunk_size <<= 10;
 		gran_size = result[i].gran_sizek;
@@ -941,8 +941,8 @@ early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
  * Note this won't check if the MTRRs < 4GB where the magic bit doesn't
  * apply to are wrong, but so far we don't know of any such case in the wild.
  */
-#define Tom2Enabled (1U << 21)
-#define Tom2ForceMemTypeWB (1U << 22)
+#define Tom2Enabled		(1U << 21)
+#define Tom2ForceMemTypeWB	(1U << 22)
 
 int __init amd_special_default_mtrr(void)
 {
@@ -952,7 +952,7 @@ int __init amd_special_default_mtrr(void)
 		return 0;
 	if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
 		return 0;
-	/* In case some hypervisor doesn't pass SYSCFG through */
+	/* In case some hypervisor doesn't pass SYSCFG through: */
 	if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
 		return 0;
 	/*
@@ -965,19 +965,21 @@ int __init amd_special_default_mtrr(void)
 	return 0;
 }
 
-static u64 __init real_trim_memory(unsigned long start_pfn,
-				   unsigned long limit_pfn)
+static u64 __init
+real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn)
 {
 	u64 trim_start, trim_size;
+
 	trim_start = start_pfn;
 	trim_start <<= PAGE_SHIFT;
+
 	trim_size = limit_pfn;
 	trim_size <<= PAGE_SHIFT;
 	trim_size -= trim_start;
 
-	return e820_update_range(trim_start, trim_size, E820_RAM,
-				E820_RESERVED);
+	return e820_update_range(trim_start, trim_size, E820_RAM, E820_RESERVED);
 }
+
 /**
  * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
  * @end_pfn: ending page frame number
@@ -985,7 +987,7 @@ static u64 __init real_trim_memory(unsigned long start_pfn,
  * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
  * memory configurations.  This routine checks that the highest MTRR matches
  * the end of memory, to make sure the MTRRs having a write back type cover
- * all of the memory the kernel is intending to use. If not, it'll trim any
+ * all of the memory the kernel is intending to use.  If not, it'll trim any
  * memory off the end by adjusting end_pfn, removing it from the kernel's
  * allocation pools, warning the user with an obnoxious message.
  */
@@ -994,21 +996,22 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 	unsigned long i, base, size, highest_pfn = 0, def, dummy;
 	mtrr_type type;
 	u64 total_trim_size;
-
 	/* extra one for all 0 */
 	int num[MTRR_NUM_TYPES + 1];
+
 	/*
 	 * Make sure we only trim uncachable memory on machines that
 	 * support the Intel MTRR architecture:
 	 */
 	if (!is_cpu(INTEL) || disable_mtrr_trim)
 		return 0;
+
 	rdmsr(MSR_MTRRdefType, def, dummy);
 	def &= 0xff;
 	if (def != MTRR_TYPE_UNCACHABLE)
 		return 0;
 
-	/* get it and store it aside */
+	/* Get it and store it aside: */
 	memset(range_state, 0, sizeof(range_state));
 	for (i = 0; i < num_var_ranges; i++) {
 		mtrr_if->get(i, &base, &size, &type);
@@ -1017,7 +1020,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 		range_state[i].type = type;
 	}
 
-	/* Find highest cached pfn */
+	/* Find highest cached pfn: */
 	for (i = 0; i < num_var_ranges; i++) {
 		type = range_state[i].type;
 		if (type != MTRR_TYPE_WRBACK)
@@ -1028,13 +1031,13 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 			highest_pfn = base + size;
 	}
 
-	/* kvm/qemu doesn't have mtrr set right, don't trim them all */
+	/* kvm/qemu doesn't have mtrr set right, don't trim them all: */
 	if (!highest_pfn) {
 		printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
 		return 0;
 	}
 
-	/* check entries number */
+	/* Check entries number: */
 	memset(num, 0, sizeof(num));
 	for (i = 0; i < num_var_ranges; i++) {
 		type = range_state[i].type;
@@ -1046,11 +1049,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 		num[type]++;
 	}
 
-	/* no entry for WB? */
+	/* No entry for WB? */
 	if (!num[MTRR_TYPE_WRBACK])
 		return 0;
 
-	/* check if we only had WB and UC */
+	/* Check if we only had WB and UC: */
 	if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
 		num_var_ranges - num[MTRR_NUM_TYPES])
 		return 0;
@@ -1066,31 +1069,31 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 	}
 	nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
 
+	/* Check the head: */
 	total_trim_size = 0;
-	/* check the head */
 	if (range[0].start)
 		total_trim_size += real_trim_memory(0, range[0].start);
-	/* check the holes */
+
+	/* Check the holes: */
 	for (i = 0; i < nr_range - 1; i++) {
 		if (range[i].end + 1 < range[i+1].start)
 			total_trim_size += real_trim_memory(range[i].end + 1,
 							    range[i+1].start);
 	}
-	/* check the top */
+
+	/* Check the top: */
 	i = nr_range - 1;
 	if (range[i].end + 1 < end_pfn)
 		total_trim_size += real_trim_memory(range[i].end + 1,
 							 end_pfn);
 
 	if (total_trim_size) {
-		printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
-			" all of memory, losing %lluMB of RAM.\n",
-			total_trim_size >> 20);
+		pr_warning("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", total_trim_size >> 20);
 
 		if (!changed_by_mtrr_cleanup)
 			WARN_ON(1);
 
-		printk(KERN_INFO "update e820 for mtrr\n");
+		pr_info("update e820 for mtrr\n");
 		update_e820();
 
 		return 1;
@@ -1098,4 +1101,3 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 
 	return 0;
 }
-
-- 
cgit v1.2.3


From 2311037708c170977506fbcbe0a2ba0c6d221940 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Sat, 4 Jul 2009 07:52:08 +0530
Subject: x86: Clean up mtrr/cyrix.c

Fix trivial style problems:

  WARNING: Use #include <linux/io.h> instead of <asm/io.h>
  WARNING: line over 80 characters
  ERROR: do not initialise statics to 0 or NULL
  ERROR: space prohibited after that open parenthesis '(' X 2
  ERROR: space prohibited before that close parenthesis ')' X 2
  ERROR: trailing whitespace X 2
  ERROR: trailing statements should be on next line
  ERROR: do not use C99 // comments X 2

arch/x86/kernel/cpu/mtrr/cyrix.o:

   text	   data	    bss	    dec	    hex	filename
   1637	     32	      8	   1677	    68d	cyrix.o.before
   1637	     32	      8	   1677	    68d	cyrix.o.after

md5:
   6f52abd06905be3f4cabb5239f9b0ff0  cyrix.o.before.asm
   6f52abd06905be3f4cabb5239f9b0ff0  cyrix.o.after.asm

Suggested-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <20090703164225.GA21447@elte.hu>
[ Made the code more consistent ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mtrr/cyrix.c | 94 ++++++++++++++++++++++------------------
 1 file changed, 51 insertions(+), 43 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index ff14c320040..228d982ce09 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -1,38 +1,40 @@
 #include <linux/init.h>
+#include <linux/io.h>
 #include <linux/mm.h>
-#include <asm/mtrr.h>
-#include <asm/msr.h>
-#include <asm/io.h>
+
 #include <asm/processor-cyrix.h>
 #include <asm/processor-flags.h>
+#include <asm/mtrr.h>
+#include <asm/msr.h>
+
 #include "mtrr.h"
 
 static void
 cyrix_get_arr(unsigned int reg, unsigned long *base,
 	      unsigned long *size, mtrr_type * type)
 {
-	unsigned long flags;
 	unsigned char arr, ccr3, rcr, shift;
+	unsigned long flags;
 
 	arr = CX86_ARR_BASE + (reg << 1) + reg;	/* avoid multiplication by 3 */
 
-	/* Save flags and disable interrupts */
 	local_irq_save(flags);
 
 	ccr3 = getCx86(CX86_CCR3);
 	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);	/* enable MAPEN */
-	((unsigned char *) base)[3] = getCx86(arr);
-	((unsigned char *) base)[2] = getCx86(arr + 1);
-	((unsigned char *) base)[1] = getCx86(arr + 2);
+	((unsigned char *)base)[3] = getCx86(arr);
+	((unsigned char *)base)[2] = getCx86(arr + 1);
+	((unsigned char *)base)[1] = getCx86(arr + 2);
 	rcr = getCx86(CX86_RCR_BASE + reg);
-	setCx86(CX86_CCR3, ccr3);	/* disable MAPEN */
+	setCx86(CX86_CCR3, ccr3);			/* disable MAPEN */
 
-	/* Enable interrupts if it was enabled previously */
 	local_irq_restore(flags);
+
 	shift = ((unsigned char *) base)[1] & 0x0f;
 	*base >>= PAGE_SHIFT;
 
-	/* Power of two, at least 4K on ARR0-ARR6, 256K on ARR7
+	/*
+	 * Power of two, at least 4K on ARR0-ARR6, 256K on ARR7
 	 * Note: shift==0xf means 4G, this is unsupported.
 	 */
 	if (shift)
@@ -76,17 +78,20 @@ cyrix_get_arr(unsigned int reg, unsigned long *base,
 	}
 }
 
+/*
+ * cyrix_get_free_region - get a free ARR.
+ *
+ * @base: the starting (base) address of the region.
+ * @size: the size (in bytes) of the region.
+ *
+ * Returns: the index of the region on success, else -1 on error.
+*/
 static int
 cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
-/*  [SUMMARY] Get a free ARR.
-    <base> The starting (base) address of the region.
-    <size> The size (in bytes) of the region.
-    [RETURNS] The index of the region on success, else -1 on error.
-*/
 {
-	int i;
-	mtrr_type ltype;
 	unsigned long lbase, lsize;
+	mtrr_type ltype;
+	int i;
 
 	switch (replace_reg) {
 	case 7:
@@ -107,14 +112,17 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
 		cyrix_get_arr(7, &lbase, &lsize, &ltype);
 		if (lsize == 0)
 			return 7;
-		/*  Else try ARR0-ARR6 first  */
+		/* Else try ARR0-ARR6 first  */
 	} else {
 		for (i = 0; i < 7; i++) {
 			cyrix_get_arr(i, &lbase, &lsize, &ltype);
 			if (lsize == 0)
 				return i;
 		}
-		/* ARR0-ARR6 isn't free, try ARR7 but its size must be at least 256K */
+		/*
+		 * ARR0-ARR6 isn't free
+		 * try ARR7 but its size must be at least 256K
+		 */
 		cyrix_get_arr(i, &lbase, &lsize, &ltype);
 		if ((lsize == 0) && (size >= 0x40))
 			return i;
@@ -122,21 +130,22 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
 	return -ENOSPC;
 }
 
-static u32 cr4 = 0;
-static u32 ccr3;
+static u32 cr4, ccr3;
 
 static void prepare_set(void)
 {
 	u32 cr0;
 
 	/*  Save value of CR4 and clear Page Global Enable (bit 7)  */
-	if ( cpu_has_pge ) {
+	if (cpu_has_pge) {
 		cr4 = read_cr4();
 		write_cr4(cr4 & ~X86_CR4_PGE);
 	}
 
-	/*  Disable and flush caches. Note that wbinvd flushes the TLBs as
-	    a side-effect  */
+	/*
+	 * Disable and flush caches.
+	 * Note that wbinvd flushes the TLBs as a side-effect
+	 */
 	cr0 = read_cr0() | X86_CR0_CD;
 	wbinvd();
 	write_cr0(cr0);
@@ -147,22 +156,21 @@ static void prepare_set(void)
 
 	/* Cyrix ARRs - everything else was excluded at the top */
 	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);
-
 }
 
 static void post_set(void)
 {
-	/*  Flush caches and TLBs  */
+	/* Flush caches and TLBs */
 	wbinvd();
 
 	/* Cyrix ARRs - everything else was excluded at the top */
 	setCx86(CX86_CCR3, ccr3);
-		
-	/*  Enable caches  */
+
+	/* Enable caches */
 	write_cr0(read_cr0() & 0xbfffffff);
 
-	/*  Restore value of CR4  */
-	if ( cpu_has_pge )
+	/* Restore value of CR4 */
+	if (cpu_has_pge)
 		write_cr4(cr4);
 }
 
@@ -178,7 +186,8 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base,
 		size >>= 6;
 
 	size &= 0x7fff;		/* make sure arr_size <= 14 */
-	for (arr_size = 0; size; arr_size++, size >>= 1) ;
+	for (arr_size = 0; size; arr_size++, size >>= 1)
+		;
 
 	if (reg < 7) {
 		switch (type) {
@@ -215,18 +224,18 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base,
 	prepare_set();
 
 	base <<= PAGE_SHIFT;
-	setCx86(arr, ((unsigned char *) &base)[3]);
-	setCx86(arr + 1, ((unsigned char *) &base)[2]);
-	setCx86(arr + 2, (((unsigned char *) &base)[1]) | arr_size);
+	setCx86(arr + 0,  ((unsigned char *)&base)[3]);
+	setCx86(arr + 1,  ((unsigned char *)&base)[2]);
+	setCx86(arr + 2, (((unsigned char *)&base)[1]) | arr_size);
 	setCx86(CX86_RCR_BASE + reg, arr_type);
 
 	post_set();
 }
 
 typedef struct {
-	unsigned long base;
-	unsigned long size;
-	mtrr_type type;
+	unsigned long	base;
+	unsigned long	size;
+	mtrr_type	type;
 } arr_state_t;
 
 static arr_state_t arr_state[8] = {
@@ -247,16 +256,17 @@ static void cyrix_set_all(void)
 		setCx86(CX86_CCR0 + i, ccr_state[i]);
 	for (; i < 7; i++)
 		setCx86(CX86_CCR4 + i, ccr_state[i]);
-	for (i = 0; i < 8; i++)
-		cyrix_set_arr(i, arr_state[i].base, 
+
+	for (i = 0; i < 8; i++) {
+		cyrix_set_arr(i, arr_state[i].base,
 			      arr_state[i].size, arr_state[i].type);
+	}
 
 	post_set();
 }
 
 static struct mtrr_ops cyrix_mtrr_ops = {
 	.vendor            = X86_VENDOR_CYRIX,
-//	.init              = cyrix_arr_init,
 	.set_all	   = cyrix_set_all,
 	.set               = cyrix_set_arr,
 	.get               = cyrix_get_arr,
@@ -270,5 +280,3 @@ int __init cyrix_init_mtrr(void)
 	set_mtrr_ops(&cyrix_mtrr_ops);
 	return 0;
 }
-
-//arch_initcall(cyrix_init_mtrr);
-- 
cgit v1.2.3


From a1a499a39911fcfecbebaba1f38588088909f918 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Sat, 4 Jul 2009 07:53:00 +0530
Subject: x86: Clean up mtrr/generic.c

Fix following trivial style problems:

  ERROR: trailing whitespace X 4
  WARNING: Use #include <linux/io.h> instead of <asm/io.h>
  WARNING: braces {} are not necessary for single statement blocks X 3
  ERROR: "foo * bar" should be "foo *bar"
  WARNING: line over 80 characters X 6
  ERROR: "foo * bar" should be "foo *bar"
  ERROR: spaces required around that '=' (ctx:VxO)
  ERROR: space required before that '-' (ctx:OxV)
  WARNING: suspect code indent for conditional statements (8, 12)
  ERROR: spaces required around that '=' (ctx:VxV)
  ERROR: do not initialise statics to 0 or NULL
  ERROR: space prohibited after that open parenthesis '(' X 2
  ERROR: space prohibited before that close parenthesis ')' X 2
  ERROR: trailing statements should be on next line
  ERROR: return is not a function, parentheses are not required

Also use pr_debug and pr_warning where possible.

arch/x86/kernel/cpu/mtrr/generic.o:

   text	   data	    bss	    dec	    hex	filename
   5652	     77	   4224	   9953	   26e1	generic.o.before
   5652	     77	   4220	   9949	   26dd	generic.o.after

The md5 changed:
   b34d6c045f06daa4ed092b90cc760e8f  generic.o.before.asm
   a490c6251cfd8442fbffecc0e09a573d  generic.o.after.asm

Because mtrr_state moved from data to bss, changing its
offsets - and also because __LINE__ numbers changed.

Suggested-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <20090703164225.GA21447@elte.hu>
[ Further cleanups to make the code more consistent ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mtrr/generic.c | 304 +++++++++++++++++++++----------------
 1 file changed, 169 insertions(+), 135 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 0543f69f0b2..55da0c5f68d 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -1,28 +1,34 @@
-/* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
-   because MTRRs can span upto 40 bits (36bits on most modern x86) */ 
+/*
+ * This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
+ * because MTRRs can span upto 40 bits (36bits on most modern x86)
+ */
+#define DEBUG
+
+#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/io.h>
 #include <linux/mm.h>
-#include <linux/module.h>
-#include <asm/io.h>
-#include <asm/mtrr.h>
-#include <asm/msr.h>
-#include <asm/system.h>
-#include <asm/cpufeature.h>
+
 #include <asm/processor-flags.h>
+#include <asm/cpufeature.h>
 #include <asm/tlbflush.h>
+#include <asm/system.h>
+#include <asm/mtrr.h>
+#include <asm/msr.h>
 #include <asm/pat.h>
+
 #include "mtrr.h"
 
 struct fixed_range_block {
-	int base_msr; /* start address of an MTRR block */
-	int ranges;   /* number of MTRRs in this block  */
+	int base_msr;		/* start address of an MTRR block */
+	int ranges;		/* number of MTRRs in this block  */
 };
 
 static struct fixed_range_block fixed_range_blocks[] = {
-	{ MSR_MTRRfix64K_00000, 1 }, /* one  64k MTRR  */
-	{ MSR_MTRRfix16K_80000, 2 }, /* two  16k MTRRs */
-	{ MSR_MTRRfix4K_C0000,  8 }, /* eight 4k MTRRs */
+	{ MSR_MTRRfix64K_00000, 1 }, /* one   64k MTRR  */
+	{ MSR_MTRRfix16K_80000, 2 }, /* two   16k MTRRs */
+	{ MSR_MTRRfix4K_C0000,  8 }, /* eight  4k MTRRs */
 	{}
 };
 
@@ -30,10 +36,10 @@ static unsigned long smp_changes_mask;
 static int mtrr_state_set;
 u64 mtrr_tom2;
 
-struct mtrr_state_type mtrr_state = {};
+struct mtrr_state_type mtrr_state;
 EXPORT_SYMBOL_GPL(mtrr_state);
 
-/**
+/*
  * BIOS is expected to clear MtrrFixDramModEn bit, see for example
  * "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
  * Opteron Processors" (26094 Rev. 3.30 February 2006), section
@@ -104,9 +110,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
 	 * Look of multiple ranges matching this address and pick type
 	 * as per MTRR precedence
 	 */
-	if (!(mtrr_state.enabled & 2)) {
+	if (!(mtrr_state.enabled & 2))
 		return mtrr_state.def_type;
-	}
 
 	prev_match = 0xFF;
 	for (i = 0; i < num_var_ranges; ++i) {
@@ -125,9 +130,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
 		if (start_state != end_state)
 			return 0xFE;
 
-		if ((start & mask) != (base & mask)) {
+		if ((start & mask) != (base & mask))
 			continue;
-		}
 
 		curr_match = mtrr_state.var_ranges[i].base_lo & 0xff;
 		if (prev_match == 0xFF) {
@@ -148,9 +152,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
 			curr_match = MTRR_TYPE_WRTHROUGH;
 		}
 
-		if (prev_match != curr_match) {
+		if (prev_match != curr_match)
 			return MTRR_TYPE_UNCACHABLE;
-		}
 	}
 
 	if (mtrr_tom2) {
@@ -164,7 +167,7 @@ u8 mtrr_type_lookup(u64 start, u64 end)
 	return mtrr_state.def_type;
 }
 
-/*  Get the MSR pair relating to a var range  */
+/* Get the MSR pair relating to a var range */
 static void
 get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
 {
@@ -172,7 +175,7 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
 	rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
 }
 
-/*  fill the MSR pair relating to a var range  */
+/* Fill the MSR pair relating to a var range */
 void fill_mtrr_var_range(unsigned int index,
 		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
 {
@@ -186,10 +189,9 @@ void fill_mtrr_var_range(unsigned int index,
 	vr[index].mask_hi = mask_hi;
 }
 
-static void
-get_fixed_ranges(mtrr_type * frs)
+static void get_fixed_ranges(mtrr_type *frs)
 {
-	unsigned int *p = (unsigned int *) frs;
+	unsigned int *p = (unsigned int *)frs;
 	int i;
 
 	k8_check_syscfg_dram_mod_en();
@@ -217,22 +219,22 @@ static void __init print_fixed_last(void)
 	if (!last_fixed_end)
 		return;
 
-	printk(KERN_DEBUG "  %05X-%05X %s\n", last_fixed_start,
-		last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type));
+	pr_debug("  %05X-%05X %s\n", last_fixed_start,
+		 last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type));
 
 	last_fixed_end = 0;
 }
 
 static void __init update_fixed_last(unsigned base, unsigned end,
-				       mtrr_type type)
+				     mtrr_type type)
 {
 	last_fixed_start = base;
 	last_fixed_end = end;
 	last_fixed_type = type;
 }
 
-static void __init print_fixed(unsigned base, unsigned step,
-			       const mtrr_type *types)
+static void __init
+print_fixed(unsigned base, unsigned step, const mtrr_type *types)
 {
 	unsigned i;
 
@@ -259,54 +261,55 @@ static void __init print_mtrr_state(void)
 	unsigned int i;
 	int high_width;
 
-	printk(KERN_DEBUG "MTRR default type: %s\n",
-			 mtrr_attrib_to_str(mtrr_state.def_type));
+	pr_debug("MTRR default type: %s\n",
+		 mtrr_attrib_to_str(mtrr_state.def_type));
 	if (mtrr_state.have_fixed) {
-		printk(KERN_DEBUG "MTRR fixed ranges %sabled:\n",
-		       mtrr_state.enabled & 1 ? "en" : "dis");
+		pr_debug("MTRR fixed ranges %sabled:\n",
+			 mtrr_state.enabled & 1 ? "en" : "dis");
 		print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0);
 		for (i = 0; i < 2; ++i)
-			print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8);
+			print_fixed(0x80000 + i * 0x20000, 0x04000,
+				    mtrr_state.fixed_ranges + (i + 1) * 8);
 		for (i = 0; i < 8; ++i)
-			print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8);
+			print_fixed(0xC0000 + i * 0x08000, 0x01000,
+				    mtrr_state.fixed_ranges + (i + 3) * 8);
 
 		/* tail */
 		print_fixed_last();
 	}
-	printk(KERN_DEBUG "MTRR variable ranges %sabled:\n",
-	       mtrr_state.enabled & 2 ? "en" : "dis");
+	pr_debug("MTRR variable ranges %sabled:\n",
+		 mtrr_state.enabled & 2 ? "en" : "dis");
 	if (size_or_mask & 0xffffffffUL)
 		high_width = ffs(size_or_mask & 0xffffffffUL) - 1;
 	else
 		high_width = ffs(size_or_mask>>32) + 32 - 1;
 	high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4;
+
 	for (i = 0; i < num_var_ranges; ++i) {
 		if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
-			printk(KERN_DEBUG "  %u base %0*X%05X000 mask %0*X%05X000 %s\n",
-			       i,
-			       high_width,
-			       mtrr_state.var_ranges[i].base_hi,
-			       mtrr_state.var_ranges[i].base_lo >> 12,
-			       high_width,
-			       mtrr_state.var_ranges[i].mask_hi,
-			       mtrr_state.var_ranges[i].mask_lo >> 12,
-			       mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff));
+			pr_debug("  %u base %0*X%05X000 mask %0*X%05X000 %s\n",
+				 i,
+				 high_width,
+				 mtrr_state.var_ranges[i].base_hi,
+				 mtrr_state.var_ranges[i].base_lo >> 12,
+				 high_width,
+				 mtrr_state.var_ranges[i].mask_hi,
+				 mtrr_state.var_ranges[i].mask_lo >> 12,
+				 mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff));
 		else
-			printk(KERN_DEBUG "  %u disabled\n", i);
-	}
-	if (mtrr_tom2) {
-		printk(KERN_DEBUG "TOM2: %016llx aka %lldM\n",
-				  mtrr_tom2, mtrr_tom2>>20);
+			pr_debug("  %u disabled\n", i);
 	}
+	if (mtrr_tom2)
+		pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
 }
 
-/*  Grab all of the MTRR state for this CPU into *state  */
+/* Grab all of the MTRR state for this CPU into *state */
 void __init get_mtrr_state(void)
 {
-	unsigned int i;
 	struct mtrr_var_range *vrs;
-	unsigned lo, dummy;
 	unsigned long flags;
+	unsigned lo, dummy;
+	unsigned int i;
 
 	vrs = mtrr_state.var_ranges;
 
@@ -324,6 +327,7 @@ void __init get_mtrr_state(void)
 
 	if (amd_special_default_mtrr()) {
 		unsigned low, high;
+
 		/* TOP_MEM2 */
 		rdmsr(MSR_K8_TOP_MEM2, low, high);
 		mtrr_tom2 = high;
@@ -344,10 +348,9 @@ void __init get_mtrr_state(void)
 
 	post_set();
 	local_irq_restore(flags);
-
 }
 
-/*  Some BIOS's are fucked and don't set all MTRRs the same!  */
+/* Some BIOS's are messed up and don't set all MTRRs the same! */
 void __init mtrr_state_warn(void)
 {
 	unsigned long mask = smp_changes_mask;
@@ -355,28 +358,33 @@ void __init mtrr_state_warn(void)
 	if (!mask)
 		return;
 	if (mask & MTRR_CHANGE_MASK_FIXED)
-		printk(KERN_WARNING "mtrr: your CPUs had inconsistent fixed MTRR settings\n");
+		pr_warning("mtrr: your CPUs had inconsistent fixed MTRR settings\n");
 	if (mask & MTRR_CHANGE_MASK_VARIABLE)
-		printk(KERN_WARNING "mtrr: your CPUs had inconsistent variable MTRR settings\n");
+		pr_warning("mtrr: your CPUs had inconsistent variable MTRR settings\n");
 	if (mask & MTRR_CHANGE_MASK_DEFTYPE)
-		printk(KERN_WARNING "mtrr: your CPUs had inconsistent MTRRdefType settings\n");
+		pr_warning("mtrr: your CPUs had inconsistent MTRRdefType settings\n");
+
 	printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n");
 	printk(KERN_INFO "mtrr: corrected configuration.\n");
 }
 
-/* Doesn't attempt to pass an error out to MTRR users
-   because it's quite complicated in some cases and probably not
-   worth it because the best error handling is to ignore it. */
+/*
+ * Doesn't attempt to pass an error out to MTRR users
+ * because it's quite complicated in some cases and probably not
+ * worth it because the best error handling is to ignore it.
+ */
 void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
 {
-	if (wrmsr_safe(msr, a, b) < 0)
+	if (wrmsr_safe(msr, a, b) < 0) {
 		printk(KERN_ERR
 			"MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
 			smp_processor_id(), msr, a, b);
+	}
 }
 
 /**
- * set_fixed_range - checks & updates a fixed-range MTRR if it differs from the value it should have
+ * set_fixed_range - checks & updates a fixed-range MTRR if it
+ *		     differs from the value it should have
  * @msr: MSR address of the MTTR which should be checked and updated
  * @changed: pointer which indicates whether the MTRR needed to be changed
  * @msrwords: pointer to the MSR values which the MSR should have
@@ -401,20 +409,23 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
  *
  * Returns: The index of the region on success, else negative on error.
  */
-int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
+int
+generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
 {
-	int i, max;
-	mtrr_type ltype;
 	unsigned long lbase, lsize;
+	mtrr_type ltype;
+	int i, max;
 
 	max = num_var_ranges;
 	if (replace_reg >= 0 && replace_reg < max)
 		return replace_reg;
+
 	for (i = 0; i < max; ++i) {
 		mtrr_if->get(i, &lbase, &lsize, &ltype);
 		if (lsize == 0)
 			return i;
 	}
+
 	return -ENOSPC;
 }
 
@@ -434,7 +445,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
 	rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
 
 	if ((mask_lo & 0x800) == 0) {
-		/*  Invalid (i.e. free) range  */
+		/*  Invalid (i.e. free) range */
 		*base = 0;
 		*size = 0;
 		*type = 0;
@@ -471,27 +482,31 @@ out_put_cpu:
 }
 
 /**
- * set_fixed_ranges - checks & updates the fixed-range MTRRs if they differ from the saved set
+ * set_fixed_ranges - checks & updates the fixed-range MTRRs if they
+ *		      differ from the saved set
  * @frs: pointer to fixed-range MTRR values, saved by get_fixed_ranges()
  */
-static int set_fixed_ranges(mtrr_type * frs)
+static int set_fixed_ranges(mtrr_type *frs)
 {
-	unsigned long long *saved = (unsigned long long *) frs;
+	unsigned long long *saved = (unsigned long long *)frs;
 	bool changed = false;
-	int block=-1, range;
+	int block = -1, range;
 
 	k8_check_syscfg_dram_mod_en();
 
-	while (fixed_range_blocks[++block].ranges)
-	    for (range=0; range < fixed_range_blocks[block].ranges; range++)
-		set_fixed_range(fixed_range_blocks[block].base_msr + range,
-		    &changed, (unsigned int *) saved++);
+	while (fixed_range_blocks[++block].ranges) {
+		for (range = 0; range < fixed_range_blocks[block].ranges; range++)
+			set_fixed_range(fixed_range_blocks[block].base_msr + range,
+					&changed, (unsigned int *)saved++);
+	}
 
 	return changed;
 }
 
-/*  Set the MSR pair relating to a var range. Returns TRUE if
-    changes are made  */
+/*
+ * Set the MSR pair relating to a var range.
+ * Returns true if changes are made.
+ */
 static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
 {
 	unsigned int lo, hi;
@@ -501,6 +516,7 @@ static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
 	if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL)
 	    || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) !=
 		(hi & (size_and_mask >> (32 - PAGE_SHIFT)))) {
+
 		mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
 		changed = true;
 	}
@@ -526,21 +542,26 @@ static u32 deftype_lo, deftype_hi;
  */
 static unsigned long set_mtrr_state(void)
 {
-	unsigned int i;
 	unsigned long change_mask = 0;
+	unsigned int i;
 
-	for (i = 0; i < num_var_ranges; i++)
+	for (i = 0; i < num_var_ranges; i++) {
 		if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i]))
 			change_mask |= MTRR_CHANGE_MASK_VARIABLE;
+	}
 
 	if (mtrr_state.have_fixed && set_fixed_ranges(mtrr_state.fixed_ranges))
 		change_mask |= MTRR_CHANGE_MASK_FIXED;
 
-	/*  Set_mtrr_restore restores the old value of MTRRdefType,
-	   so to set it we fiddle with the saved value  */
+	/*
+	 * Set_mtrr_restore restores the old value of MTRRdefType,
+	 * so to set it we fiddle with the saved value:
+	 */
 	if ((deftype_lo & 0xff) != mtrr_state.def_type
 	    || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) {
-		deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type | (mtrr_state.enabled << 10);
+
+		deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type |
+			     (mtrr_state.enabled << 10);
 		change_mask |= MTRR_CHANGE_MASK_DEFTYPE;
 	}
 
@@ -548,33 +569,36 @@ static unsigned long set_mtrr_state(void)
 }
 
 
-static unsigned long cr4 = 0;
+static unsigned long cr4;
 static DEFINE_SPINLOCK(set_atomicity_lock);
 
 /*
- * Since we are disabling the cache don't allow any interrupts - they
- * would run extremely slow and would only increase the pain.  The caller must
- * ensure that local interrupts are disabled and are reenabled after post_set()
- * has been called.
+ * Since we are disabling the cache don't allow any interrupts,
+ * they would run extremely slow and would only increase the pain.
+ *
+ * The caller must ensure that local interrupts are disabled and
+ * are reenabled after post_set() has been called.
  */
-
 static void prepare_set(void) __acquires(set_atomicity_lock)
 {
 	unsigned long cr0;
 
-	/*  Note that this is not ideal, since the cache is only flushed/disabled
-	   for this CPU while the MTRRs are changed, but changing this requires
-	   more invasive changes to the way the kernel boots  */
+	/*
+	 * Note that this is not ideal
+	 * since the cache is only flushed/disabled for this CPU while the
+	 * MTRRs are changed, but changing this requires more invasive
+	 * changes to the way the kernel boots
+	 */
 
 	spin_lock(&set_atomicity_lock);
 
-	/*  Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
+	/* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
 	cr0 = read_cr0() | X86_CR0_CD;
 	write_cr0(cr0);
 	wbinvd();
 
-	/*  Save value of CR4 and clear Page Global Enable (bit 7)  */
-	if ( cpu_has_pge ) {
+	/* Save value of CR4 and clear Page Global Enable (bit 7) */
+	if (cpu_has_pge) {
 		cr4 = read_cr4();
 		write_cr4(cr4 & ~X86_CR4_PGE);
 	}
@@ -582,26 +606,26 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
 	/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
 	__flush_tlb();
 
-	/*  Save MTRR state */
+	/* Save MTRR state */
 	rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
 
-	/*  Disable MTRRs, and set the default type to uncached  */
+	/* Disable MTRRs, and set the default type to uncached */
 	mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi);
 }
 
 static void post_set(void) __releases(set_atomicity_lock)
 {
-	/*  Flush TLBs (no need to flush caches - they are disabled)  */
+	/* Flush TLBs (no need to flush caches - they are disabled) */
 	__flush_tlb();
 
 	/* Intel (P6) standard MTRRs */
 	mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
-		
-	/*  Enable caches  */
+
+	/* Enable caches */
 	write_cr0(read_cr0() & 0xbfffffff);
 
-	/*  Restore value of CR4  */
-	if ( cpu_has_pge )
+	/* Restore value of CR4 */
+	if (cpu_has_pge)
 		write_cr4(cr4);
 	spin_unlock(&set_atomicity_lock);
 }
@@ -623,24 +647,27 @@ static void generic_set_all(void)
 	post_set();
 	local_irq_restore(flags);
 
-	/*  Use the atomic bitops to update the global mask  */
+	/* Use the atomic bitops to update the global mask */
 	for (count = 0; count < sizeof mask * 8; ++count) {
 		if (mask & 0x01)
 			set_bit(count, &smp_changes_mask);
 		mask >>= 1;
 	}
-	
+
 }
 
+/**
+ * generic_set_mtrr - set variable MTRR register on the local CPU.
+ *
+ * @reg: The register to set.
+ * @base: The base address of the region.
+ * @size: The size of the region. If this is 0 the region is disabled.
+ * @type: The type of the region.
+ *
+ * Returns nothing.
+ */
 static void generic_set_mtrr(unsigned int reg, unsigned long base,
 			     unsigned long size, mtrr_type type)
-/*  [SUMMARY] Set variable MTRR register on the local CPU.
-    <reg> The register to set.
-    <base> The base address of the region.
-    <size> The size of the region. If this is 0 the region is disabled.
-    <type> The type of the region.
-    [RETURNS] Nothing.
-*/
 {
 	unsigned long flags;
 	struct mtrr_var_range *vr;
@@ -651,8 +678,10 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
 	prepare_set();
 
 	if (size == 0) {
-		/* The invalid bit is kept in the mask, so we simply clear the
-		   relevant mask register to disable a range. */
+		/*
+		 * The invalid bit is kept in the mask, so we simply
+		 * clear the relevant mask register to disable a range.
+		 */
 		mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0);
 		memset(vr, 0, sizeof(struct mtrr_var_range));
 	} else {
@@ -669,46 +698,50 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
 	local_irq_restore(flags);
 }
 
-int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
+int generic_validate_add_page(unsigned long base, unsigned long size,
+			      unsigned int type)
 {
 	unsigned long lbase, last;
 
-	/*  For Intel PPro stepping <= 7, must be 4 MiB aligned 
-	    and not touch 0x70000000->0x7003FFFF */
+	/*
+	 * For Intel PPro stepping <= 7
+	 * must be 4 MiB aligned and not touch 0x70000000 -> 0x7003FFFF
+	 */
 	if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 &&
 	    boot_cpu_data.x86_model == 1 &&
 	    boot_cpu_data.x86_mask <= 7) {
 		if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
-			printk(KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
+			pr_warning("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
 			return -EINVAL;
 		}
 		if (!(base + size < 0x70000 || base > 0x7003F) &&
 		    (type == MTRR_TYPE_WRCOMB
 		     || type == MTRR_TYPE_WRBACK)) {
-			printk(KERN_WARNING "mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
+			pr_warning("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
 			return -EINVAL;
 		}
 	}
 
-	/*  Check upper bits of base and last are equal and lower bits are 0
-	    for base and 1 for last  */
+	/*
+	 * Check upper bits of base and last are equal and lower bits are 0
+	 * for base and 1 for last
+	 */
 	last = base + size - 1;
 	for (lbase = base; !(lbase & 1) && (last & 1);
-	     lbase = lbase >> 1, last = last >> 1) ;
+	     lbase = lbase >> 1, last = last >> 1)
+		;
 	if (lbase != last) {
-		printk(KERN_WARNING "mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n",
-		       base, size);
+		pr_warning("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size);
 		return -EINVAL;
 	}
 	return 0;
 }
 
-
 static int generic_have_wrcomb(void)
 {
 	unsigned long config, dummy;
 	rdmsr(MSR_MTRRcap, config, dummy);
-	return (config & (1 << 10));
+	return config & (1 << 10);
 }
 
 int positive_have_wrcomb(void)
@@ -716,14 +749,15 @@ int positive_have_wrcomb(void)
 	return 1;
 }
 
-/* generic structure...
+/*
+ * Generic structure...
  */
 struct mtrr_ops generic_mtrr_ops = {
-	.use_intel_if      = 1,
-	.set_all	   = generic_set_all,
-	.get               = generic_get_mtrr,
-	.get_free_region   = generic_get_free_region,
-	.set               = generic_set_mtrr,
-	.validate_add_page = generic_validate_add_page,
-	.have_wrcomb       = generic_have_wrcomb,
+	.use_intel_if		= 1,
+	.set_all		= generic_set_all,
+	.get			= generic_get_mtrr,
+	.get_free_region	= generic_get_free_region,
+	.set			= generic_set_mtrr,
+	.validate_add_page	= generic_validate_add_page,
+	.have_wrcomb		= generic_have_wrcomb,
 };
-- 
cgit v1.2.3


From 26dc67eda19beafb7e5ef2770cec5b3ee5995a8e Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Sat, 4 Jul 2009 07:53:40 +0530
Subject: x86: Clean up mtrr/if.c

Fix:

  WARNING: Use #include <linux/uaccess.h> instead of <asm/uaccess.h>
  ERROR: trailing whitespace X 7
  ERROR: trailing statements should be on next line X 3
  WARNING: line over 80 characters X 5
  ERROR: space required before the open parenthesis '('

arch/x86/kernel/cpu/mtrr/if.o:

   text	   data	    bss	    dec	    hex	filename
   2239	      4	      0	   2243	    8c3	if.o.before
   2239	      4	      0	   2243	    8c3	if.o.after

md5:
   78d1f2aa4843ec6509c18e2dee54bc7f  if.o.before.asm
   78d1f2aa4843ec6509c18e2dee54bc7f  if.o.after.asm

Suggested-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <20090703164225.GA21447@elte.hu>
[ More cleanups to make the code more consistent. ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mtrr/if.c | 135 ++++++++++++++++++++++++------------------
 1 file changed, 76 insertions(+), 59 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index fb73a52913a..08b6ea4c62b 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -1,27 +1,28 @@
-#include <linux/init.h>
-#include <linux/proc_fs.h>
 #include <linux/capability.h>
-#include <linux/ctype.h>
-#include <linux/module.h>
 #include <linux/seq_file.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <linux/proc_fs.h>
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
 
 #define LINE_SIZE 80
 
 #include <asm/mtrr.h>
+
 #include "mtrr.h"
 
 #define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
 
 static const char *const mtrr_strings[MTRR_NUM_TYPES] =
 {
-    "uncachable",               /* 0 */
-    "write-combining",          /* 1 */
-    "?",                        /* 2 */
-    "?",                        /* 3 */
-    "write-through",            /* 4 */
-    "write-protect",            /* 5 */
-    "write-back",               /* 6 */
+	"uncachable",		/* 0 */
+	"write-combining",	/* 1 */
+	"?",			/* 2 */
+	"?",			/* 3 */
+	"write-through",	/* 4 */
+	"write-protect",	/* 5 */
+	"write-back",		/* 6 */
 };
 
 const char *mtrr_attrib_to_str(int x)
@@ -35,8 +36,8 @@ static int
 mtrr_file_add(unsigned long base, unsigned long size,
 	      unsigned int type, bool increment, struct file *file, int page)
 {
+	unsigned int *fcount = FILE_FCOUNT(file);
 	int reg, max;
-	unsigned int *fcount = FILE_FCOUNT(file); 
 
 	max = num_var_ranges;
 	if (fcount == NULL) {
@@ -61,8 +62,8 @@ static int
 mtrr_file_del(unsigned long base, unsigned long size,
 	      struct file *file, int page)
 {
-	int reg;
 	unsigned int *fcount = FILE_FCOUNT(file);
+	int reg;
 
 	if (!page) {
 		if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1)))
@@ -81,13 +82,14 @@ mtrr_file_del(unsigned long base, unsigned long size,
 	return reg;
 }
 
-/* RED-PEN: seq_file can seek now. this is ignored. */
+/*
+ * seq_file can seek but we ignore it.
+ *
+ * Format of control line:
+ *    "base=%Lx size=%Lx type=%s" or "disable=%d"
+ */
 static ssize_t
 mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
-/*  Format of control line:
-    "base=%Lx size=%Lx type=%s"     OR:
-    "disable=%d"
-*/
 {
 	int i, err;
 	unsigned long reg;
@@ -100,15 +102,18 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
 		return -EPERM;
 	if (!len)
 		return -EINVAL;
+
 	memset(line, 0, LINE_SIZE);
 	if (len > LINE_SIZE)
 		len = LINE_SIZE;
 	if (copy_from_user(line, buf, len - 1))
 		return -EFAULT;
+
 	linelen = strlen(line);
 	ptr = line + linelen - 1;
 	if (linelen && *ptr == '\n')
 		*ptr = '\0';
+
 	if (!strncmp(line, "disable=", 8)) {
 		reg = simple_strtoul(line + 8, &ptr, 0);
 		err = mtrr_del_page(reg, 0, 0);
@@ -116,28 +121,35 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
 			return err;
 		return len;
 	}
+
 	if (strncmp(line, "base=", 5))
 		return -EINVAL;
+
 	base = simple_strtoull(line + 5, &ptr, 0);
-	for (; isspace(*ptr); ++ptr) ;
+	for (; isspace(*ptr); ++ptr)
+		;
+
 	if (strncmp(ptr, "size=", 5))
 		return -EINVAL;
+
 	size = simple_strtoull(ptr + 5, &ptr, 0);
 	if ((base & 0xfff) || (size & 0xfff))
 		return -EINVAL;
-	for (; isspace(*ptr); ++ptr) ;
+	for (; isspace(*ptr); ++ptr)
+		;
+
 	if (strncmp(ptr, "type=", 5))
 		return -EINVAL;
 	ptr += 5;
-	for (; isspace(*ptr); ++ptr) ;
+	for (; isspace(*ptr); ++ptr)
+		;
+
 	for (i = 0; i < MTRR_NUM_TYPES; ++i) {
 		if (strcmp(ptr, mtrr_strings[i]))
 			continue;
 		base >>= PAGE_SHIFT;
 		size >>= PAGE_SHIFT;
-		err =
-		    mtrr_add_page((unsigned long) base, (unsigned long) size, i,
-				  true);
+		err = mtrr_add_page((unsigned long)base, (unsigned long)size, i, true);
 		if (err < 0)
 			return err;
 		return len;
@@ -181,7 +193,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
 	case MTRRIOC32_SET_PAGE_ENTRY:
 	case MTRRIOC32_DEL_PAGE_ENTRY:
 	case MTRRIOC32_KILL_PAGE_ENTRY: {
-		struct mtrr_sentry32 __user *s32 = (struct mtrr_sentry32 __user *)__arg;
+		struct mtrr_sentry32 __user *s32;
+
+		s32 = (struct mtrr_sentry32 __user *)__arg;
 		err = get_user(sentry.base, &s32->base);
 		err |= get_user(sentry.size, &s32->size);
 		err |= get_user(sentry.type, &s32->type);
@@ -191,7 +205,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
 	}
 	case MTRRIOC32_GET_ENTRY:
 	case MTRRIOC32_GET_PAGE_ENTRY: {
-		struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg;
+		struct mtrr_gentry32 __user *g32;
+
+		g32 = (struct mtrr_gentry32 __user *)__arg;
 		err = get_user(gentry.regnum, &g32->regnum);
 		err |= get_user(gentry.base, &g32->base);
 		err |= get_user(gentry.size, &g32->size);
@@ -314,7 +330,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
 	if (err)
 		return err;
 
-	switch(cmd) {
+	switch (cmd) {
 	case MTRRIOC_GET_ENTRY:
 	case MTRRIOC_GET_PAGE_ENTRY:
 		if (copy_to_user(arg, &gentry, sizeof gentry))
@@ -323,7 +339,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
 #ifdef CONFIG_COMPAT
 	case MTRRIOC32_GET_ENTRY:
 	case MTRRIOC32_GET_PAGE_ENTRY: {
-		struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg;
+		struct mtrr_gentry32 __user *g32;
+
+		g32 = (struct mtrr_gentry32 __user *)__arg;
 		err = put_user(gentry.base, &g32->base);
 		err |= put_user(gentry.size, &g32->size);
 		err |= put_user(gentry.regnum, &g32->regnum);
@@ -335,11 +353,10 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
 	return err;
 }
 
-static int
-mtrr_close(struct inode *ino, struct file *file)
+static int mtrr_close(struct inode *ino, struct file *file)
 {
-	int i, max;
 	unsigned int *fcount = FILE_FCOUNT(file);
+	int i, max;
 
 	if (fcount != NULL) {
 		max = num_var_ranges;
@@ -359,22 +376,22 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset);
 
 static int mtrr_open(struct inode *inode, struct file *file)
 {
-	if (!mtrr_if) 
+	if (!mtrr_if)
 		return -EIO;
-	if (!mtrr_if->get) 
-		return -ENXIO; 
+	if (!mtrr_if->get)
+		return -ENXIO;
 	return single_open(file, mtrr_seq_show, NULL);
 }
 
 static const struct file_operations mtrr_fops = {
-	.owner   = THIS_MODULE,
-	.open	 = mtrr_open, 
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.write   = mtrr_write,
-	.unlocked_ioctl = mtrr_ioctl,
-	.compat_ioctl = mtrr_ioctl,
-	.release = mtrr_close,
+	.owner			= THIS_MODULE,
+	.open			= mtrr_open,
+	.read			= seq_read,
+	.llseek			= seq_lseek,
+	.write			= mtrr_write,
+	.unlocked_ioctl		= mtrr_ioctl,
+	.compat_ioctl		= mtrr_ioctl,
+	.release		= mtrr_close,
 };
 
 static int mtrr_seq_show(struct seq_file *seq, void *offset)
@@ -388,23 +405,24 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
 	max = num_var_ranges;
 	for (i = 0; i < max; i++) {
 		mtrr_if->get(i, &base, &size, &type);
-		if (size == 0)
+		if (size == 0) {
 			mtrr_usage_table[i] = 0;
-		else {
-			if (size < (0x100000 >> PAGE_SHIFT)) {
-				/* less than 1MB */
-				factor = 'K';
-				size <<= PAGE_SHIFT - 10;
-			} else {
-				factor = 'M';
-				size >>= 20 - PAGE_SHIFT;
-			}
-			/* RED-PEN: base can be > 32bit */ 
-			len += seq_printf(seq, 
-				   "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n",
-			     i, base, base >> (20 - PAGE_SHIFT), size, factor,
-			     mtrr_usage_table[i], mtrr_attrib_to_str(type));
+			continue;
 		}
+		if (size < (0x100000 >> PAGE_SHIFT)) {
+			/* less than 1MB */
+			factor = 'K';
+			size <<= PAGE_SHIFT - 10;
+		} else {
+			factor = 'M';
+			size >>= 20 - PAGE_SHIFT;
+		}
+		/* Base can be > 32bit */
+		len += seq_printf(seq, "reg%02i: base=0x%06lx000 "
+			"(%5luMB), size=%5lu%cB, count=%d: %s\n",
+			i, base, base >> (20 - PAGE_SHIFT), size,
+			factor, mtrr_usage_table[i],
+			mtrr_attrib_to_str(type));
 	}
 	return 0;
 }
@@ -422,6 +440,5 @@ static int __init mtrr_if_init(void)
 	proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_fops);
 	return 0;
 }
-
 arch_initcall(mtrr_if_init);
 #endif			/*  CONFIG_PROC_FS  */
-- 
cgit v1.2.3


From 3ec8dbcb09bb6df83993ca03e88cb85e3aaa8edb Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Sat, 4 Jul 2009 07:54:16 +0530
Subject: x86: Clean up mtrr/mtrr.h

Fix:

  ERROR: do not use C99 // comments
  ERROR: "foo * bar" should be "foo *bar" X 2

Suggested-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <20090703164225.GA21447@elte.hu>
[ More tidyups ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mtrr/mtrr.h | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 7538b767f20..a501dee9a87 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -1,5 +1,5 @@
 /*
- * local mtrr defines.
+ * local MTRR defines.
  */
 
 #include <linux/types.h>
@@ -14,13 +14,12 @@ extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
 struct mtrr_ops {
 	u32	vendor;
 	u32	use_intel_if;
-//	void	(*init)(void);
 	void	(*set)(unsigned int reg, unsigned long base,
 		       unsigned long size, mtrr_type type);
 	void	(*set_all)(void);
 
 	void	(*get)(unsigned int reg, unsigned long *base,
-		       unsigned long *size, mtrr_type * type);
+		       unsigned long *size, mtrr_type *type);
 	int	(*get_free_region)(unsigned long base, unsigned long size,
 				   int replace_reg);
 	int	(*validate_add_page)(unsigned long base, unsigned long size,
@@ -39,11 +38,11 @@ extern int positive_have_wrcomb(void);
 
 /* library functions for processor-specific routines */
 struct set_mtrr_context {
-	unsigned long flags;
-	unsigned long cr4val;
-	u32 deftype_lo;
-	u32 deftype_hi;
-	u32 ccr3;
+	unsigned long	flags;
+	unsigned long	cr4val;
+	u32		deftype_lo;
+	u32		deftype_hi;
+	u32		ccr3;
 };
 
 void set_mtrr_done(struct set_mtrr_context *ctxt);
@@ -54,10 +53,10 @@ void fill_mtrr_var_range(unsigned int index,
 		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
 void get_mtrr_state(void);
 
-extern void set_mtrr_ops(struct mtrr_ops * ops);
+extern void set_mtrr_ops(struct mtrr_ops *ops);
 
 extern u64 size_or_mask, size_and_mask;
-extern struct mtrr_ops * mtrr_if;
+extern struct mtrr_ops *mtrr_if;
 
 #define is_cpu(vnd)	(mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd)
 #define use_intel()	(mtrr_if && mtrr_if->use_intel_if == 1)
-- 
cgit v1.2.3


From 09b22c85d59dd935fdfa71655a443785e3f99c18 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Sat, 4 Jul 2009 07:54:53 +0530
Subject: x86: Clean up mtrr/state.c

Fix:

  WARNING: Use #include <linux/io.h> instead of <asm/io.h>
  WARNING: line over 80 characters X 4

arch/x86/kernel/cpu/mtrr/state.o:

   text	   data	    bss	    dec	    hex	filename
    864	      0	      0	    864	    360	state.o.before
    864	      0	      0	    864	    360	state.o.after

md5:
   c5c4364b9aeac74d70111e1e49667a2c  state.o.before.asm
   c5c4364b9aeac74d70111e1e49667a2c  state.o.after.asm

Suggested-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <20090703164225.GA21447@elte.hu>
[ More cleanups ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mtrr/state.c | 68 +++++++++++++++++++++++-----------------
 1 file changed, 40 insertions(+), 28 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c
index 1f5fb1588d1..dfc80b4e6b0 100644
--- a/arch/x86/kernel/cpu/mtrr/state.c
+++ b/arch/x86/kernel/cpu/mtrr/state.c
@@ -1,24 +1,25 @@
-#include <linux/mm.h>
 #include <linux/init.h>
-#include <asm/io.h>
-#include <asm/mtrr.h>
-#include <asm/msr.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+
 #include <asm/processor-cyrix.h>
 #include <asm/processor-flags.h>
-#include "mtrr.h"
+#include <asm/mtrr.h>
+#include <asm/msr.h>
 
+#include "mtrr.h"
 
-/*  Put the processor into a state where MTRRs can be safely set  */
+/* Put the processor into a state where MTRRs can be safely set */
 void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
 {
 	unsigned int cr0;
 
-	/*  Disable interrupts locally  */
+	/* Disable interrupts locally */
 	local_irq_save(ctxt->flags);
 
 	if (use_intel() || is_cpu(CYRIX)) {
 
-		/*  Save value of CR4 and clear Page Global Enable (bit 7)  */
+		/* Save value of CR4 and clear Page Global Enable (bit 7) */
 		if (cpu_has_pge) {
 			ctxt->cr4val = read_cr4();
 			write_cr4(ctxt->cr4val & ~X86_CR4_PGE);
@@ -33,50 +34,61 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
 		write_cr0(cr0);
 		wbinvd();
 
-		if (use_intel())
-			/*  Save MTRR state */
+		if (use_intel()) {
+			/* Save MTRR state */
 			rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
-		else
-			/* Cyrix ARRs - everything else were excluded at the top */
+		} else {
+			/*
+			 * Cyrix ARRs -
+			 * everything else were excluded at the top
+			 */
 			ctxt->ccr3 = getCx86(CX86_CCR3);
+		}
 	}
 }
 
 void set_mtrr_cache_disable(struct set_mtrr_context *ctxt)
 {
-	if (use_intel())
-		/*  Disable MTRRs, and set the default type to uncached  */
+	if (use_intel()) {
+		/* Disable MTRRs, and set the default type to uncached */
 		mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL,
 		      ctxt->deftype_hi);
-	else if (is_cpu(CYRIX))
-		/* Cyrix ARRs - everything else were excluded at the top */
-		setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10);
+	} else {
+		if (is_cpu(CYRIX)) {
+			/* Cyrix ARRs - everything else were excluded at the top */
+			setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10);
+		}
+	}
 }
 
-/*  Restore the processor after a set_mtrr_prepare  */
+/* Restore the processor after a set_mtrr_prepare */
 void set_mtrr_done(struct set_mtrr_context *ctxt)
 {
 	if (use_intel() || is_cpu(CYRIX)) {
 
-		/*  Flush caches and TLBs  */
+		/* Flush caches and TLBs */
 		wbinvd();
 
-		/*  Restore MTRRdefType  */
-		if (use_intel())
+		/* Restore MTRRdefType */
+		if (use_intel()) {
 			/* Intel (P6) standard MTRRs */
-			mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
-		else
-			/* Cyrix ARRs - everything else was excluded at the top */
+			mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo,
+				   ctxt->deftype_hi);
+		} else {
+			/*
+			 * Cyrix ARRs -
+			 * everything else was excluded at the top
+			 */
 			setCx86(CX86_CCR3, ctxt->ccr3);
+		}
 
-		/*  Enable caches  */
+		/* Enable caches */
 		write_cr0(read_cr0() & 0xbfffffff);
 
-		/*  Restore value of CR4  */
+		/* Restore value of CR4 */
 		if (cpu_has_pge)
 			write_cr4(ctxt->cr4val);
 	}
-	/*  Re-enable interrupts locally (if enabled previously)  */
+	/* Re-enable interrupts locally (if enabled previously) */
 	local_irq_restore(ctxt->flags);
 }
-
-- 
cgit v1.2.3


From dbd51be026eaf84088fdee7fab9f38fa92eef26d Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Sat, 4 Jul 2009 07:56:28 +0530
Subject: x86: Clean up mtrr/main.c

Fix following trivial style problems:

  ERROR: trailing whitespace X 25
  WARNING: Use #include <linux/uaccess.h> instead of <asm/uaccess.h>
  WARNING: Use #include <linux/kvm_para.h> instead of <asm/kvm_para.h>
  ERROR: do not initialise externals to 0 or NULL X 2
  ERROR: "foo * bar" should be "foo *bar" X 5
  ERROR: do not use assignment in if condition X 2
  WARNING: line over 80 characters X 8
  ERROR: return is not a function, parentheses are not required
  WARNING: braces {} are not necessary for any arm of this statement
  ERROR: space required before the open parenthesis '(' X 2
  ERROR: open brace '{' following function declarations go on the next line
  ERROR: space required after that ',' (ctx:VxV) X 8
  ERROR: space required before the open parenthesis '(' X 3
  ERROR: else should follow close brace '}'
  WARNING: space prohibited between function name and open parenthesis '('
  WARNING: EXPORT_SYMBOL(foo); should immediately follow its function/variable X 2

Also use pr_debug and pr_warning where possible.

total: 50 errors, 14 warnings

arch/x86/kernel/cpu/mtrr/main.o:

   text	   data	    bss	    dec	    hex	filename
   3668	    116	   4156	   7940	   1f04	main.o.before
   3668	    116	   4156	   7940	   1f04	main.o.after

md5:
   e01af2fd28deef77c8d01e71acfbd365  main.o.before.asm
   e01af2fd28deef77c8d01e71acfbd365  main.o.after.asm

Suggested-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <20090703164225.GA21447@elte.hu>
Cc: Avi Kivity <avi@redhat.com> # Avi, please have a look at the kvm_para.h bit
[ More cleanups ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mtrr/main.c | 455 +++++++++++++++++++++-------------------
 1 file changed, 242 insertions(+), 213 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 8fc248b5aea..7af0f88a416 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -25,43 +25,48 @@
     Operating System Writer's Guide" (Intel document number 242692),
     section 11.11.7
 
-    This was cleaned and made readable by Patrick Mochel <mochel@osdl.org> 
-    on 6-7 March 2002. 
-    Source: Intel Architecture Software Developers Manual, Volume 3: 
+    This was cleaned and made readable by Patrick Mochel <mochel@osdl.org>
+    on 6-7 March 2002.
+    Source: Intel Architecture Software Developers Manual, Volume 3:
     System Programming Guide; Section 9.11. (1997 edition - PPro).
 */
 
+#define DEBUG
+
+#include <linux/types.h> /* FIXME: kvm_para.h needs this */
+
+#include <linux/kvm_para.h>
+#include <linux/uaccess.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/init.h>
+#include <linux/sort.h>
+#include <linux/cpu.h>
 #include <linux/pci.h>
 #include <linux/smp.h>
-#include <linux/cpu.h>
-#include <linux/mutex.h>
-#include <linux/sort.h>
 
+#include <asm/processor.h>
 #include <asm/e820.h>
 #include <asm/mtrr.h>
-#include <asm/uaccess.h>
-#include <asm/processor.h>
 #include <asm/msr.h>
-#include <asm/kvm_para.h>
+
 #include "mtrr.h"
 
-u32 num_var_ranges = 0;
+u32 num_var_ranges;
 
 unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
 static DEFINE_MUTEX(mtrr_mutex);
 
 u64 size_or_mask, size_and_mask;
 
-static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {};
+static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
 
-struct mtrr_ops * mtrr_if = NULL;
+struct mtrr_ops *mtrr_if;
 
 static void set_mtrr(unsigned int reg, unsigned long base,
 		     unsigned long size, mtrr_type type);
 
-void set_mtrr_ops(struct mtrr_ops * ops)
+void set_mtrr_ops(struct mtrr_ops *ops)
 {
 	if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
 		mtrr_ops[ops->vendor] = ops;
@@ -72,30 +77,36 @@ static int have_wrcomb(void)
 {
 	struct pci_dev *dev;
 	u8 rev;
-	
-	if ((dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) {
-		/* ServerWorks LE chipsets < rev 6 have problems with write-combining
-		   Don't allow it and leave room for other chipsets to be tagged */
+
+	dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL);
+	if (dev != NULL) {
+		/*
+		 * ServerWorks LE chipsets < rev 6 have problems with
+		 * write-combining. Don't allow it and leave room for other
+		 * chipsets to be tagged
+		 */
 		if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
 		    dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) {
 			pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
 			if (rev <= 5) {
-				printk(KERN_INFO "mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n");
+				pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n");
 				pci_dev_put(dev);
 				return 0;
 			}
 		}
-		/* Intel 450NX errata # 23. Non ascending cacheline evictions to
-		   write combining memory may resulting in data corruption */
+		/*
+		 * Intel 450NX errata # 23. Non ascending cacheline evictions to
+		 * write combining memory may resulting in data corruption
+		 */
 		if (dev->vendor == PCI_VENDOR_ID_INTEL &&
 		    dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
-			printk(KERN_INFO "mtrr: Intel 450NX MMC detected. Write-combining disabled.\n");
+			pr_info("mtrr: Intel 450NX MMC detected. Write-combining disabled.\n");
 			pci_dev_put(dev);
 			return 0;
 		}
 		pci_dev_put(dev);
-	}		
-	return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0);
+	}
+	return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0;
 }
 
 /*  This function returns the number of variable MTRRs  */
@@ -103,12 +114,13 @@ static void __init set_num_var_ranges(void)
 {
 	unsigned long config = 0, dummy;
 
-	if (use_intel()) {
+	if (use_intel())
 		rdmsr(MSR_MTRRcap, config, dummy);
-	} else if (is_cpu(AMD))
+	else if (is_cpu(AMD))
 		config = 2;
 	else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
 		config = 8;
+
 	num_var_ranges = config & 0xff;
 }
 
@@ -130,10 +142,12 @@ struct set_mtrr_data {
 	mtrr_type	smp_type;
 };
 
+/**
+ * ipi_handler - Synchronisation handler. Executed by "other" CPUs.
+ *
+ * Returns nothing.
+ */
 static void ipi_handler(void *info)
-/*  [SUMMARY] Synchronisation handler. Executed by "other" CPUs.
-    [RETURNS] Nothing.
-*/
 {
 #ifdef CONFIG_SMP
 	struct set_mtrr_data *data = info;
@@ -142,18 +156,19 @@ static void ipi_handler(void *info)
 	local_irq_save(flags);
 
 	atomic_dec(&data->count);
-	while(!atomic_read(&data->gate))
+	while (!atomic_read(&data->gate))
 		cpu_relax();
 
 	/*  The master has cleared me to execute  */
-	if (data->smp_reg != ~0U) 
-		mtrr_if->set(data->smp_reg, data->smp_base, 
+	if (data->smp_reg != ~0U) {
+		mtrr_if->set(data->smp_reg, data->smp_base,
 			     data->smp_size, data->smp_type);
-	else
+	} else {
 		mtrr_if->set_all();
+	}
 
 	atomic_dec(&data->count);
-	while(atomic_read(&data->gate))
+	while (atomic_read(&data->gate))
 		cpu_relax();
 
 	atomic_dec(&data->count);
@@ -161,7 +176,8 @@ static void ipi_handler(void *info)
 #endif
 }
 
-static inline int types_compatible(mtrr_type type1, mtrr_type type2) {
+static inline int types_compatible(mtrr_type type1, mtrr_type type2)
+{
 	return type1 == MTRR_TYPE_UNCACHABLE ||
 	       type2 == MTRR_TYPE_UNCACHABLE ||
 	       (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) ||
@@ -176,10 +192,10 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) {
  * @type:	mtrr type
  *
  * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
- * 
+ *
  * 1. Send IPI to do the following:
  * 2. Disable Interrupts
- * 3. Wait for all procs to do so 
+ * 3. Wait for all procs to do so
  * 4. Enter no-fill cache mode
  * 5. Flush caches
  * 6. Clear PGE bit
@@ -189,26 +205,27 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) {
  * 10. Enable all range registers
  * 11. Flush all TLBs and caches again
  * 12. Enter normal cache mode and reenable caching
- * 13. Set PGE 
+ * 13. Set PGE
  * 14. Wait for buddies to catch up
  * 15. Enable interrupts.
- * 
+ *
  * What does that mean for us? Well, first we set data.count to the number
  * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait
  * until it hits 0 and proceed. We set the data.gate flag and reset data.count.
- * Meanwhile, they are waiting for that flag to be set. Once it's set, each 
- * CPU goes through the transition of updating MTRRs. The CPU vendors may each do it 
- * differently, so we call mtrr_if->set() callback and let them take care of it.
- * When they're done, they again decrement data->count and wait for data.gate to 
- * be reset. 
- * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag.
+ * Meanwhile, they are waiting for that flag to be set. Once it's set, each
+ * CPU goes through the transition of updating MTRRs.
+ * The CPU vendors may each do it differently,
+ * so we call mtrr_if->set() callback and let them take care of it.
+ * When they're done, they again decrement data->count and wait for data.gate
+ * to be reset.
+ * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag
  * Everyone then enables interrupts and we all continue on.
  *
  * Note that the mechanism is the same for UP systems, too; all the SMP stuff
  * becomes nops.
  */
-static void set_mtrr(unsigned int reg, unsigned long base,
-		     unsigned long size, mtrr_type type)
+static void
+set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
 {
 	struct set_mtrr_data data;
 	unsigned long flags;
@@ -218,121 +235,122 @@ static void set_mtrr(unsigned int reg, unsigned long base,
 	data.smp_size = size;
 	data.smp_type = type;
 	atomic_set(&data.count, num_booting_cpus() - 1);
-	/* make sure data.count is visible before unleashing other CPUs */
+
+	/* Make sure data.count is visible before unleashing other CPUs */
 	smp_wmb();
-	atomic_set(&data.gate,0);
+	atomic_set(&data.gate, 0);
 
-	/*  Start the ball rolling on other CPUs  */
+	/* Start the ball rolling on other CPUs */
 	if (smp_call_function(ipi_handler, &data, 0) != 0)
 		panic("mtrr: timed out waiting for other CPUs\n");
 
 	local_irq_save(flags);
 
-	while(atomic_read(&data.count))
+	while (atomic_read(&data.count))
 		cpu_relax();
 
-	/* ok, reset count and toggle gate */
+	/* Ok, reset count and toggle gate */
 	atomic_set(&data.count, num_booting_cpus() - 1);
 	smp_wmb();
-	atomic_set(&data.gate,1);
+	atomic_set(&data.gate, 1);
 
-	/* do our MTRR business */
+	/* Do our MTRR business */
 
-	/* HACK!
+	/*
+	 * HACK!
 	 * We use this same function to initialize the mtrrs on boot.
 	 * The state of the boot cpu's mtrrs has been saved, and we want
-	 * to replicate across all the APs. 
+	 * to replicate across all the APs.
 	 * If we're doing that @reg is set to something special...
 	 */
-	if (reg != ~0U) 
-		mtrr_if->set(reg,base,size,type);
+	if (reg != ~0U)
+		mtrr_if->set(reg, base, size, type);
 
-	/* wait for the others */
-	while(atomic_read(&data.count))
+	/* Wait for the others */
+	while (atomic_read(&data.count))
 		cpu_relax();
 
 	atomic_set(&data.count, num_booting_cpus() - 1);
 	smp_wmb();
-	atomic_set(&data.gate,0);
+	atomic_set(&data.gate, 0);
 
 	/*
 	 * Wait here for everyone to have seen the gate change
 	 * So we're the last ones to touch 'data'
 	 */
-	while(atomic_read(&data.count))
+	while (atomic_read(&data.count))
 		cpu_relax();
 
 	local_irq_restore(flags);
 }
 
 /**
- *	mtrr_add_page - Add a memory type region
- *	@base: Physical base address of region in pages (in units of 4 kB!)
- *	@size: Physical size of region in pages (4 kB)
- *	@type: Type of MTRR desired
- *	@increment: If this is true do usage counting on the region
+ * mtrr_add_page - Add a memory type region
+ * @base: Physical base address of region in pages (in units of 4 kB!)
+ * @size: Physical size of region in pages (4 kB)
+ * @type: Type of MTRR desired
+ * @increment: If this is true do usage counting on the region
  *
- *	Memory type region registers control the caching on newer Intel and
- *	non Intel processors. This function allows drivers to request an
- *	MTRR is added. The details and hardware specifics of each processor's
- *	implementation are hidden from the caller, but nevertheless the 
- *	caller should expect to need to provide a power of two size on an
- *	equivalent power of two boundary.
+ * Memory type region registers control the caching on newer Intel and
+ * non Intel processors. This function allows drivers to request an
+ * MTRR is added. The details and hardware specifics of each processor's
+ * implementation are hidden from the caller, but nevertheless the
+ * caller should expect to need to provide a power of two size on an
+ * equivalent power of two boundary.
  *
- *	If the region cannot be added either because all regions are in use
- *	or the CPU cannot support it a negative value is returned. On success
- *	the register number for this entry is returned, but should be treated
- *	as a cookie only.
+ * If the region cannot be added either because all regions are in use
+ * or the CPU cannot support it a negative value is returned. On success
+ * the register number for this entry is returned, but should be treated
+ * as a cookie only.
  *
- *	On a multiprocessor machine the changes are made to all processors.
- *	This is required on x86 by the Intel processors.
+ * On a multiprocessor machine the changes are made to all processors.
+ * This is required on x86 by the Intel processors.
  *
- *	The available types are
+ * The available types are
  *
- *	%MTRR_TYPE_UNCACHABLE	-	No caching
+ * %MTRR_TYPE_UNCACHABLE - No caching
  *
- *	%MTRR_TYPE_WRBACK	-	Write data back in bursts whenever
+ * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
  *
- *	%MTRR_TYPE_WRCOMB	-	Write data back soon but allow bursts
+ * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
  *
- *	%MTRR_TYPE_WRTHROUGH	-	Cache reads but not writes
+ * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
  *
- *	BUGS: Needs a quiet flag for the cases where drivers do not mind
- *	failures and do not wish system log messages to be sent.
+ * BUGS: Needs a quiet flag for the cases where drivers do not mind
+ * failures and do not wish system log messages to be sent.
  */
-
-int mtrr_add_page(unsigned long base, unsigned long size, 
+int mtrr_add_page(unsigned long base, unsigned long size,
 		  unsigned int type, bool increment)
 {
+	unsigned long lbase, lsize;
 	int i, replace, error;
 	mtrr_type ltype;
-	unsigned long lbase, lsize;
 
 	if (!mtrr_if)
 		return -ENXIO;
-		
-	if ((error = mtrr_if->validate_add_page(base,size,type)))
+
+	error = mtrr_if->validate_add_page(base, size, type);
+	if (error)
 		return error;
 
 	if (type >= MTRR_NUM_TYPES) {
-		printk(KERN_WARNING "mtrr: type: %u invalid\n", type);
+		pr_warning("mtrr: type: %u invalid\n", type);
 		return -EINVAL;
 	}
 
-	/*  If the type is WC, check that this processor supports it  */
+	/* If the type is WC, check that this processor supports it */
 	if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
-		printk(KERN_WARNING
-		       "mtrr: your processor doesn't support write-combining\n");
+		pr_warning("mtrr: your processor doesn't support write-combining\n");
 		return -ENOSYS;
 	}
 
 	if (!size) {
-		printk(KERN_WARNING "mtrr: zero sized request\n");
+		pr_warning("mtrr: zero sized request\n");
 		return -EINVAL;
 	}
 
 	if (base & size_or_mask || size & size_or_mask) {
-		printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n");
+		pr_warning("mtrr: base or size exceeds the MTRR width\n");
 		return -EINVAL;
 	}
 
@@ -341,36 +359,40 @@ int mtrr_add_page(unsigned long base, unsigned long size,
 
 	/* No CPU hotplug when we change MTRR entries */
 	get_online_cpus();
-	/*  Search for existing MTRR  */
+
+	/* Search for existing MTRR  */
 	mutex_lock(&mtrr_mutex);
 	for (i = 0; i < num_var_ranges; ++i) {
 		mtrr_if->get(i, &lbase, &lsize, &ltype);
-		if (!lsize || base > lbase + lsize - 1 || base + size - 1 < lbase)
+		if (!lsize || base > lbase + lsize - 1 ||
+		    base + size - 1 < lbase)
 			continue;
-		/*  At this point we know there is some kind of overlap/enclosure  */
+		/*
+		 * At this point we know there is some kind of
+		 * overlap/enclosure
+		 */
 		if (base < lbase || base + size - 1 > lbase + lsize - 1) {
-			if (base <= lbase && base + size - 1 >= lbase + lsize - 1) {
+			if (base <= lbase &&
+			    base + size - 1 >= lbase + lsize - 1) {
 				/*  New region encloses an existing region  */
 				if (type == ltype) {
 					replace = replace == -1 ? i : -2;
 					continue;
-				}
-				else if (types_compatible(type, ltype))
+				} else if (types_compatible(type, ltype))
 					continue;
 			}
-			printk(KERN_WARNING
-			       "mtrr: 0x%lx000,0x%lx000 overlaps existing"
-			       " 0x%lx000,0x%lx000\n", base, size, lbase,
-			       lsize);
+			pr_warning("mtrr: 0x%lx000,0x%lx000 overlaps existing"
+				" 0x%lx000,0x%lx000\n", base, size, lbase,
+				lsize);
 			goto out;
 		}
-		/*  New region is enclosed by an existing region  */
+		/* New region is enclosed by an existing region */
 		if (ltype != type) {
 			if (types_compatible(type, ltype))
 				continue;
-			printk (KERN_WARNING "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
-			     base, size, mtrr_attrib_to_str(ltype),
-			     mtrr_attrib_to_str(type));
+			pr_warning("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
+				base, size, mtrr_attrib_to_str(ltype),
+				mtrr_attrib_to_str(type));
 			goto out;
 		}
 		if (increment)
@@ -378,7 +400,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
 		error = i;
 		goto out;
 	}
-	/*  Search for an empty MTRR  */
+	/* Search for an empty MTRR */
 	i = mtrr_if->get_free_region(base, size, replace);
 	if (i >= 0) {
 		set_mtrr(i, base, size, type);
@@ -393,8 +415,9 @@ int mtrr_add_page(unsigned long base, unsigned long size,
 				mtrr_usage_table[replace] = 0;
 			}
 		}
-	} else
-		printk(KERN_INFO "mtrr: no more MTRRs available\n");
+	} else {
+		pr_info("mtrr: no more MTRRs available\n");
+	}
 	error = i;
  out:
 	mutex_unlock(&mtrr_mutex);
@@ -405,10 +428,8 @@ int mtrr_add_page(unsigned long base, unsigned long size,
 static int mtrr_check(unsigned long base, unsigned long size)
 {
 	if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
-		printk(KERN_WARNING
-			"mtrr: size and base must be multiples of 4 kiB\n");
-		printk(KERN_DEBUG
-			"mtrr: size: 0x%lx  base: 0x%lx\n", size, base);
+		pr_warning("mtrr: size and base must be multiples of 4 kiB\n");
+		pr_debug("mtrr: size: 0x%lx  base: 0x%lx\n", size, base);
 		dump_stack();
 		return -1;
 	}
@@ -416,66 +437,64 @@ static int mtrr_check(unsigned long base, unsigned long size)
 }
 
 /**
- *	mtrr_add - Add a memory type region
- *	@base: Physical base address of region
- *	@size: Physical size of region
- *	@type: Type of MTRR desired
- *	@increment: If this is true do usage counting on the region
+ * mtrr_add - Add a memory type region
+ * @base: Physical base address of region
+ * @size: Physical size of region
+ * @type: Type of MTRR desired
+ * @increment: If this is true do usage counting on the region
  *
- *	Memory type region registers control the caching on newer Intel and
- *	non Intel processors. This function allows drivers to request an
- *	MTRR is added. The details and hardware specifics of each processor's
- *	implementation are hidden from the caller, but nevertheless the 
- *	caller should expect to need to provide a power of two size on an
- *	equivalent power of two boundary.
+ * Memory type region registers control the caching on newer Intel and
+ * non Intel processors. This function allows drivers to request an
+ * MTRR is added. The details and hardware specifics of each processor's
+ * implementation are hidden from the caller, but nevertheless the
+ * caller should expect to need to provide a power of two size on an
+ * equivalent power of two boundary.
  *
- *	If the region cannot be added either because all regions are in use
- *	or the CPU cannot support it a negative value is returned. On success
- *	the register number for this entry is returned, but should be treated
- *	as a cookie only.
+ * If the region cannot be added either because all regions are in use
+ * or the CPU cannot support it a negative value is returned. On success
+ * the register number for this entry is returned, but should be treated
+ * as a cookie only.
  *
- *	On a multiprocessor machine the changes are made to all processors.
- *	This is required on x86 by the Intel processors.
+ * On a multiprocessor machine the changes are made to all processors.
+ * This is required on x86 by the Intel processors.
  *
- *	The available types are
+ * The available types are
  *
- *	%MTRR_TYPE_UNCACHABLE	-	No caching
+ * %MTRR_TYPE_UNCACHABLE - No caching
  *
- *	%MTRR_TYPE_WRBACK	-	Write data back in bursts whenever
+ * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
  *
- *	%MTRR_TYPE_WRCOMB	-	Write data back soon but allow bursts
+ * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
  *
- *	%MTRR_TYPE_WRTHROUGH	-	Cache reads but not writes
+ * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
  *
- *	BUGS: Needs a quiet flag for the cases where drivers do not mind
- *	failures and do not wish system log messages to be sent.
+ * BUGS: Needs a quiet flag for the cases where drivers do not mind
+ * failures and do not wish system log messages to be sent.
  */
-
-int
-mtrr_add(unsigned long base, unsigned long size, unsigned int type,
-	 bool increment)
+int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
+	     bool increment)
 {
 	if (mtrr_check(base, size))
 		return -EINVAL;
 	return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
 			     increment);
 }
+EXPORT_SYMBOL(mtrr_add);
 
 /**
- *	mtrr_del_page - delete a memory type region
- *	@reg: Register returned by mtrr_add
- *	@base: Physical base address
- *	@size: Size of region
+ * mtrr_del_page - delete a memory type region
+ * @reg: Register returned by mtrr_add
+ * @base: Physical base address
+ * @size: Size of region
  *
- *	If register is supplied then base and size are ignored. This is
- *	how drivers should call it.
+ * If register is supplied then base and size are ignored. This is
+ * how drivers should call it.
  *
- *	Releases an MTRR region. If the usage count drops to zero the 
- *	register is freed and the region returns to default state.
- *	On success the register is returned, on failure a negative error
- *	code.
+ * Releases an MTRR region. If the usage count drops to zero the
+ * register is freed and the region returns to default state.
+ * On success the register is returned, on failure a negative error
+ * code.
  */
-
 int mtrr_del_page(int reg, unsigned long base, unsigned long size)
 {
 	int i, max;
@@ -500,22 +519,22 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
 			}
 		}
 		if (reg < 0) {
-			printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base,
-			       size);
+			pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n",
+				 base, size);
 			goto out;
 		}
 	}
 	if (reg >= max) {
-		printk(KERN_WARNING "mtrr: register: %d too big\n", reg);
+		pr_warning("mtrr: register: %d too big\n", reg);
 		goto out;
 	}
 	mtrr_if->get(reg, &lbase, &lsize, &ltype);
 	if (lsize < 1) {
-		printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
+		pr_warning("mtrr: MTRR %d not used\n", reg);
 		goto out;
 	}
 	if (mtrr_usage_table[reg] < 1) {
-		printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
+		pr_warning("mtrr: reg: %d has count=0\n", reg);
 		goto out;
 	}
 	if (--mtrr_usage_table[reg] < 1)
@@ -526,33 +545,31 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
 	put_online_cpus();
 	return error;
 }
+
 /**
- *	mtrr_del - delete a memory type region
- *	@reg: Register returned by mtrr_add
- *	@base: Physical base address
- *	@size: Size of region
+ * mtrr_del - delete a memory type region
+ * @reg: Register returned by mtrr_add
+ * @base: Physical base address
+ * @size: Size of region
  *
- *	If register is supplied then base and size are ignored. This is
- *	how drivers should call it.
+ * If register is supplied then base and size are ignored. This is
+ * how drivers should call it.
  *
- *	Releases an MTRR region. If the usage count drops to zero the 
- *	register is freed and the region returns to default state.
- *	On success the register is returned, on failure a negative error
- *	code.
+ * Releases an MTRR region. If the usage count drops to zero the
+ * register is freed and the region returns to default state.
+ * On success the register is returned, on failure a negative error
+ * code.
  */
-
-int
-mtrr_del(int reg, unsigned long base, unsigned long size)
+int mtrr_del(int reg, unsigned long base, unsigned long size)
 {
 	if (mtrr_check(base, size))
 		return -EINVAL;
 	return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
 }
-
-EXPORT_SYMBOL(mtrr_add);
 EXPORT_SYMBOL(mtrr_del);
 
-/* HACK ALERT!
+/*
+ * HACK ALERT!
  * These should be called implicitly, but we can't yet until all the initcall
  * stuff is done...
  */
@@ -576,29 +593,28 @@ struct mtrr_value {
 
 static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES];
 
-static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
+static int mtrr_save(struct sys_device *sysdev, pm_message_t state)
 {
 	int i;
 
 	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i,
-			     &mtrr_value[i].lbase,
-			     &mtrr_value[i].lsize,
-			     &mtrr_value[i].ltype);
+		mtrr_if->get(i, &mtrr_value[i].lbase,
+				&mtrr_value[i].lsize,
+				&mtrr_value[i].ltype);
 	}
 	return 0;
 }
 
-static int mtrr_restore(struct sys_device * sysdev)
+static int mtrr_restore(struct sys_device *sysdev)
 {
 	int i;
 
 	for (i = 0; i < num_var_ranges; i++) {
-		if (mtrr_value[i].lsize)
-			set_mtrr(i,
-				 mtrr_value[i].lbase,
-				 mtrr_value[i].lsize,
-				 mtrr_value[i].ltype);
+		if (mtrr_value[i].lsize) {
+			set_mtrr(i, mtrr_value[i].lbase,
+				    mtrr_value[i].lsize,
+				    mtrr_value[i].ltype);
+		}
 	}
 	return 0;
 }
@@ -615,26 +631,29 @@ int __initdata changed_by_mtrr_cleanup;
 /**
  * mtrr_bp_init - initialize mtrrs on the boot CPU
  *
- * This needs to be called early; before any of the other CPUs are 
+ * This needs to be called early; before any of the other CPUs are
  * initialized (i.e. before smp_init()).
- * 
+ *
  */
 void __init mtrr_bp_init(void)
 {
 	u32 phys_addr;
+
 	init_ifs();
 
 	phys_addr = 32;
 
 	if (cpu_has_mtrr) {
 		mtrr_if = &generic_mtrr_ops;
-		size_or_mask = 0xff000000;	/* 36 bits */
+		size_or_mask = 0xff000000;			/* 36 bits */
 		size_and_mask = 0x00f00000;
 		phys_addr = 36;
 
-		/* This is an AMD specific MSR, but we assume(hope?) that
-		   Intel will implement it to when they extend the address
-		   bus of the Xeon. */
+		/*
+		 * This is an AMD specific MSR, but we assume(hope?) that
+		 * Intel will implement it to when they extend the address
+		 * bus of the Xeon.
+		 */
 		if (cpuid_eax(0x80000000) >= 0x80000008) {
 			phys_addr = cpuid_eax(0x80000008) & 0xff;
 			/* CPUID workaround for Intel 0F33/0F34 CPU */
@@ -649,9 +668,11 @@ void __init mtrr_bp_init(void)
 			size_and_mask = ~size_or_mask & 0xfffff00000ULL;
 		} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
 			   boot_cpu_data.x86 == 6) {
-			/* VIA C* family have Intel style MTRRs, but
-			   don't support PAE */
-			size_or_mask = 0xfff00000;	/* 32 bits */
+			/*
+			 * VIA C* family have Intel style MTRRs,
+			 * but don't support PAE
+			 */
+			size_or_mask = 0xfff00000;		/* 32 bits */
 			size_and_mask = 0;
 			phys_addr = 32;
 		}
@@ -694,7 +715,6 @@ void __init mtrr_bp_init(void)
 				changed_by_mtrr_cleanup = 1;
 				mtrr_if->set_all();
 			}
-
 		}
 	}
 }
@@ -706,12 +726,17 @@ void mtrr_ap_init(void)
 	if (!mtrr_if || !use_intel())
 		return;
 	/*
-	 * Ideally we should hold mtrr_mutex here to avoid mtrr entries changed,
-	 * but this routine will be called in cpu boot time, holding the lock
-	 * breaks it. This routine is called in two cases: 1.very earily time
-	 * of software resume, when there absolutely isn't mtrr entry changes;
-	 * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to
-	 * prevent mtrr entry changes
+	 * Ideally we should hold mtrr_mutex here to avoid mtrr entries
+	 * changed, but this routine will be called in cpu boot time,
+	 * holding the lock breaks it.
+	 *
+	 * This routine is called in two cases:
+	 *
+	 *   1. very earily time of software resume, when there absolutely
+	 *      isn't mtrr entry changes;
+	 *
+	 *   2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug
+	 *      lock to prevent mtrr entry changes
 	 */
 	local_irq_save(flags);
 
@@ -732,19 +757,23 @@ static int __init mtrr_init_finialize(void)
 {
 	if (!mtrr_if)
 		return 0;
+
 	if (use_intel()) {
 		if (!changed_by_mtrr_cleanup)
 			mtrr_state_warn();
-	} else {
-		/* The CPUs haven't MTRR and seem to not support SMP. They have
-		 * specific drivers, we use a tricky method to support
-		 * suspend/resume for them.
-		 * TBD: is there any system with such CPU which supports
-		 * suspend/resume?  if no, we should remove the code.
-		 */
-		sysdev_driver_register(&cpu_sysdev_class,
-			&mtrr_sysdev_driver);
+		return 0;
 	}
+
+	/*
+	 * The CPU has no MTRR and seems to not support SMP. They have
+	 * specific drivers, we use a tricky method to support
+	 * suspend/resume for them.
+	 *
+	 * TBD: is there any system with such CPU which supports
+	 * suspend/resume? If no, we should remove the code.
+	 */
+	sysdev_driver_register(&cpu_sysdev_class, &mtrr_sysdev_driver);
+
 	return 0;
 }
 subsys_initcall(mtrr_init_finialize);
-- 
cgit v1.2.3


From e3d0e69268dffb9676bf0800a60fb3573a723480 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 5 Jul 2009 09:44:11 +0200
Subject: x86: Further clean up of mtrr/generic.c

Yinghai noticed that i defined BIOS_BUG_MSG but added no
usage for it. The usage is to clean up this turd in generic.c:

			printk(KERN_WARNING "WARNING: BIOS bug: VAR MTRR %d "
				"contains strange UC entry under 1M, check "
				"with your system vendor!\n", i);

Breaking printk lines in the middle looks ugly, is hard to read
and breaks 'git grep'. Use the BIOS_BUG_MSG instead.

Also complete the moving of structure definitions and variables
to the top of the file.

Reported-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <20090703164225.GA21447@elte.hu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mtrr/cleanup.c | 56 ++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 29 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index b8aba811b60..315738c74aa 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -34,14 +34,37 @@
 
 #include "mtrr.h"
 
-/* Should be related to MTRR_VAR_RANGES nums */
-#define RANGE_NUM 256
-
 struct res_range {
 	unsigned long	start;
 	unsigned long	end;
 };
 
+struct var_mtrr_range_state {
+	unsigned long	base_pfn;
+	unsigned long	size_pfn;
+	mtrr_type	type;
+};
+
+struct var_mtrr_state {
+	unsigned long	range_startk;
+	unsigned long	range_sizek;
+	unsigned long	chunk_sizek;
+	unsigned long	gran_sizek;
+	unsigned int	reg;
+};
+
+/* Should be related to MTRR_VAR_RANGES nums */
+#define RANGE_NUM				256
+
+static struct res_range __initdata		range[RANGE_NUM];
+static int __initdata				nr_range;
+
+static struct var_mtrr_range_state __initdata	range_state[RANGE_NUM];
+
+static int __initdata debug_print;
+#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
+
+
 static int __init
 add_range(struct res_range *range, int nr_range,
 	  unsigned long start, unsigned long end)
@@ -147,18 +170,6 @@ static int __init cmp_range(const void *x1, const void *x2)
 	return start1 - start2;
 }
 
-struct var_mtrr_range_state {
-	unsigned long	base_pfn;
-	unsigned long	size_pfn;
-	mtrr_type	type;
-};
-
-static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
-
-static int __initdata debug_print;
-#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
-
-
 #define BIOS_BUG_MSG KERN_WARNING \
 	"WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
 
@@ -200,9 +211,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
 		if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed &&
 		    (mtrr_state.enabled & 1)) {
 			/* Var MTRR contains UC entry below 1M? Skip it: */
-			printk(KERN_WARNING "WARNING: BIOS bug: VAR MTRR %d "
-				"contains strange UC entry under 1M, check "
-				"with your system vendor!\n", i);
+			printk(BIOS_BUG_MSG, i);
 			if (base + size <= (1<<(20-PAGE_SHIFT)))
 				continue;
 			size -= (1<<(20-PAGE_SHIFT)) - base;
@@ -244,9 +253,6 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
 	return nr_range;
 }
 
-static struct res_range __initdata range[RANGE_NUM];
-static int __initdata nr_range;
-
 #ifdef CONFIG_MTRR_SANITIZER
 
 static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
@@ -284,14 +290,6 @@ static int __init mtrr_cleanup_debug_setup(char *str)
 }
 early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
 
-struct var_mtrr_state {
-	unsigned long	range_startk;
-	unsigned long	range_sizek;
-	unsigned long	chunk_sizek;
-	unsigned long	gran_sizek;
-	unsigned int	reg;
-};
-
 static void __init
 set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
 	     unsigned char type, unsigned int address_bits)
-- 
cgit v1.2.3


From 6ed106549d17474ca17a16057f4c0ed4eba5a7ca Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 23 Jun 2009 06:03:08 +0000
Subject: net: use NETDEV_TX_OK instead of 0 in ndo_start_xmit() functions

This patch is the result of an automatic spatch transformation to convert
all ndo_start_xmit() return values of 0 to NETDEV_TX_OK.

Some occurences are missed by the automatic conversion, those will be
handled in a seperate patch.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/ia64/hp/sim/simeth.c  | 2 +-
 arch/um/drivers/net_kern.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/hp/sim/simeth.c b/arch/ia64/hp/sim/simeth.c
index e4d8fde6810..7e81966ce48 100644
--- a/arch/ia64/hp/sim/simeth.c
+++ b/arch/ia64/hp/sim/simeth.c
@@ -412,7 +412,7 @@ simeth_tx(struct sk_buff *skb, struct net_device *dev)
 	 */
 
 	dev_kfree_skb(skb);
-	return 0;
+	return NETDEV_TX_OK;
 }
 
 static inline struct sk_buff *
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 3b44b47c7e1..4c75409bc09 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -245,7 +245,7 @@ static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	dev_kfree_skb(skb);
 
-	return 0;
+	return NETDEV_TX_OK;
 }
 
 static void uml_net_set_multicast_list(struct net_device *dev)
-- 
cgit v1.2.3


From ec634fe328182a1a098585bfc7b69e5042bdb08d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sun, 5 Jul 2009 19:23:38 -0700
Subject: net: convert remaining non-symbolic return values in ndo_start_xmit()
 functions

This patch converts the remaining occurences of raw return values to their
symbolic counterparts in ndo_start_xmit() functions that were missed by the
previous automatic conversion.

Additionally code that assumed the symbolic value of NETDEV_TX_OK to be zero
is changed to explicitly use NETDEV_TX_OK.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/xtensa/platforms/iss/network.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c
index edad4156d89..2f0b86b37cf 100644
--- a/arch/xtensa/platforms/iss/network.c
+++ b/arch/xtensa/platforms/iss/network.c
@@ -545,7 +545,7 @@ static int iss_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	spin_unlock_irqrestore(&lp->lock, flags);
 
 	dev_kfree_skb(skb);
-	return 0;
+	return NETDEV_TX_OK;
 }
 
 
-- 
cgit v1.2.3


From 9ff80942992cd5abd0779c815f310f65b7b83860 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Wed, 8 Jul 2009 22:03:53 +0400
Subject: x86: Clean up idt_descr and idt_tableby using NR_VECTORS instead of
 hardcoded number

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <20090708180353.GH5301@lenovo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/common.c | 2 +-
 arch/x86/kernel/traps.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f1961c07af9..d6f27c92854 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -982,7 +982,7 @@ static __init int setup_disablecpuid(char *arg)
 __setup("clearcpuid=", setup_disablecpuid);
 
 #ifdef CONFIG_X86_64
-struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
+struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
 
 DEFINE_PER_CPU_FIRST(union irq_stack_union,
 		     irq_stack_union) __aligned(PAGE_SIZE);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 5204332f475..7e4b1f5dec8 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -76,7 +76,7 @@ char ignore_fpu_irq;
  * F0 0F bug workaround.. We have a special link segment
  * for this.
  */
-gate_desc idt_table[256]
+gate_desc idt_table[NR_VECTORS]
 	__attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
 #endif
 
-- 
cgit v1.2.3


From a1b4f1a5b7f57be2593a9f1fca465a529c95fc07 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Sun, 5 Jul 2009 20:01:54 +0400
Subject: x86, ipi: Clean up safe_smp_processor_id() by using the
 cpu_has_apic() macro helper

We already use a lot of cpu_has_ helpers.
Lets do here the same for consistency.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <20090705160154.GB4791@lenovo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/ipi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index dbf5445727a..e6b4f517fcf 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -150,7 +150,7 @@ int safe_smp_processor_id(void)
 {
 	int apicid, cpuid;
 
-	if (!boot_cpu_has(X86_FEATURE_APIC))
+	if (!cpu_has_apic)
 		return 0;
 
 	apicid = hard_smp_processor_id();
-- 
cgit v1.2.3


From e90476d3bab4322070c0afb3e3b55671de8664ea Mon Sep 17 00:00:00 2001
From: Huang Weiyi <weiyi.huang@gmail.com>
Date: Sat, 11 Jul 2009 09:32:46 +0800
Subject: x86: Remove duplicated #include

Remove duplicated #include in:

  arch/x86/kernel/dumpstack.c

Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/dumpstack.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index c8405718a4c..2d8a371d433 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -15,7 +15,6 @@
 #include <linux/bug.h>
 #include <linux/nmi.h>
 #include <linux/sysfs.h>
-#include <linux/ftrace.h>
 
 #include <asm/stacktrace.h>
 
-- 
cgit v1.2.3


From 8bdbd962ecfcbdd96f9dbb02d780b4553afd2543 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 4 Jul 2009 00:35:45 +0100
Subject: x86/cpu: Clean up various files a bit

No code changes except printk levels (although some of the K6
mtrr code might be clearer if there were a few as would
splitting out some of the intel cache code).

Signed-off-by: Alan Cox <alan@linux.intel.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/amd.c              |  37 ++++++-----
 arch/x86/kernel/cpu/bugs.c             |  10 +--
 arch/x86/kernel/cpu/bugs_64.c          |   2 +-
 arch/x86/kernel/cpu/common.c           |   8 +--
 arch/x86/kernel/cpu/cyrix.c            |  19 ++++--
 arch/x86/kernel/cpu/hypervisor.c       |   5 +-
 arch/x86/kernel/cpu/intel.c            |  11 ++--
 arch/x86/kernel/cpu/intel_cacheinfo.c  | 116 +++++++++++++++++----------------
 arch/x86/kernel/cpu/perfctr-watchdog.c |  45 ++++++-------
 arch/x86/kernel/cpu/proc.c             |   2 +-
 arch/x86/kernel/cpu/vmware.c           |  18 ++---
 11 files changed, 144 insertions(+), 129 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 28e5f595604..c6eb02e6987 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -2,7 +2,7 @@
 #include <linux/bitops.h>
 #include <linux/mm.h>
 
-#include <asm/io.h>
+#include <linux/io.h>
 #include <asm/processor.h>
 #include <asm/apic.h>
 #include <asm/cpu.h>
@@ -45,8 +45,8 @@ static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c)
 #define CBAR_ENB	(0x80000000)
 #define CBAR_KEY	(0X000000CB)
 	if (c->x86_model == 9 || c->x86_model == 10) {
-		if (inl (CBAR) & CBAR_ENB)
-			outl (0 | CBAR_KEY, CBAR);
+		if (inl(CBAR) & CBAR_ENB)
+			outl(0 | CBAR_KEY, CBAR);
 	}
 }
 
@@ -87,9 +87,10 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
 		d = d2-d;
 
 		if (d > 20*K6_BUG_LOOP)
-			printk("system stability may be impaired when more than 32 MB are used.\n");
+			printk(KERN_CONT
+				"system stability may be impaired when more than 32 MB are used.\n");
 		else
-			printk("probably OK (after B9730xxxx).\n");
+			printk(KERN_CONT "probably OK (after B9730xxxx).\n");
 		printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n");
 	}
 
@@ -219,8 +220,9 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
 	if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
 		rdmsr(MSR_K7_CLK_CTL, l, h);
 		if ((l & 0xfff00000) != 0x20000000) {
-			printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l,
-				((l & 0x000fffff)|0x20000000));
+			printk(KERN_INFO
+			    "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
+					l, ((l & 0x000fffff)|0x20000000));
 			wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
 		}
 	}
@@ -398,7 +400,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 		u32 level;
 
 		level = cpuid_eax(1);
-		if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
+		if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
 			set_cpu_cap(c, X86_FEATURE_REP_GOOD);
 	}
 	if (c->x86 == 0x10 || c->x86 == 0x11)
@@ -487,27 +489,30 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 		 * benefit in doing so.
 		 */
 		if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
-		    printk(KERN_DEBUG "tseg: %010llx\n", tseg);
-		    if ((tseg>>PMD_SHIFT) <
+			printk(KERN_DEBUG "tseg: %010llx\n", tseg);
+			if ((tseg>>PMD_SHIFT) <
 				(max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
-			((tseg>>PMD_SHIFT) <
+				((tseg>>PMD_SHIFT) <
 				(max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
-			 (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
-			set_memory_4k((unsigned long)__va(tseg), 1);
+				(tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
+				set_memory_4k((unsigned long)__va(tseg), 1);
 		}
 	}
 #endif
 }
 
 #ifdef CONFIG_X86_32
-static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
+static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c,
+							unsigned int size)
 {
 	/* AMD errata T13 (order #21922) */
 	if ((c->x86 == 6)) {
-		if (c->x86_model == 3 && c->x86_mask == 0)	/* Duron Rev A0 */
+		/* Duron Rev A0 */
+		if (c->x86_model == 3 && c->x86_mask == 0)
 			size = 64;
+		/* Tbird rev A1/A2 */
 		if (c->x86_model == 4 &&
-		    (c->x86_mask == 0 || c->x86_mask == 1))	/* Tbird rev A1/A2 */
+			(c->x86_mask == 0 || c->x86_mask == 1))
 			size = 256;
 	}
 	return size;
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c8e315f1aa8..01a26521239 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -81,7 +81,7 @@ static void __init check_fpu(void)
 
 	boot_cpu_data.fdiv_bug = fdiv_bug;
 	if (boot_cpu_data.fdiv_bug)
-		printk("Hmm, FPU with FDIV bug.\n");
+		printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n");
 }
 
 static void __init check_hlt(void)
@@ -98,7 +98,7 @@ static void __init check_hlt(void)
 	halt();
 	halt();
 	halt();
-	printk("OK.\n");
+	printk(KERN_CONT "OK.\n");
 }
 
 /*
@@ -122,9 +122,9 @@ static void __init check_popad(void)
 	 * CPU hard. Too bad.
 	 */
 	if (res != 12345678)
-		printk("Buggy.\n");
+		printk(KERN_CONT "Buggy.\n");
 	else
-		printk("OK.\n");
+		printk(KERN_CONT "OK.\n");
 #endif
 }
 
@@ -156,7 +156,7 @@ void __init check_bugs(void)
 {
 	identify_boot_cpu();
 #ifndef CONFIG_SMP
-	printk("CPU: ");
+	printk(KERN_INFO "CPU: ");
 	print_cpu_info(&boot_cpu_data);
 #endif
 	check_config();
diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c
index 9a3ed0649d4..04f0fe5af83 100644
--- a/arch/x86/kernel/cpu/bugs_64.c
+++ b/arch/x86/kernel/cpu/bugs_64.c
@@ -15,7 +15,7 @@ void __init check_bugs(void)
 {
 	identify_boot_cpu();
 #if !defined(CONFIG_SMP)
-	printk("CPU: ");
+	printk(KERN_INFO "CPU: ");
 	print_cpu_info(&boot_cpu_data);
 #endif
 	alternative_instructions();
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index d6f27c92854..c96ea44928b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -18,8 +18,8 @@
 #include <asm/hypervisor.h>
 #include <asm/processor.h>
 #include <asm/sections.h>
-#include <asm/topology.h>
-#include <asm/cpumask.h>
+#include <linux/topology.h>
+#include <linux/cpumask.h>
 #include <asm/pgtable.h>
 #include <asm/atomic.h>
 #include <asm/proto.h>
@@ -28,13 +28,13 @@
 #include <asm/desc.h>
 #include <asm/i387.h>
 #include <asm/mtrr.h>
-#include <asm/numa.h>
+#include <linux/numa.h>
 #include <asm/asm.h>
 #include <asm/cpu.h>
 #include <asm/mce.h>
 #include <asm/msr.h>
 #include <asm/pat.h>
-#include <asm/smp.h>
+#include <linux/smp.h>
 
 #ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/uv/uv.h>
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 593171e967e..19807b89f05 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -3,10 +3,10 @@
 #include <linux/delay.h>
 #include <linux/pci.h>
 #include <asm/dma.h>
-#include <asm/io.h>
+#include <linux/io.h>
 #include <asm/processor-cyrix.h>
 #include <asm/processor-flags.h>
-#include <asm/timer.h>
+#include <linux/timer.h>
 #include <asm/pci-direct.h>
 #include <asm/tsc.h>
 
@@ -282,7 +282,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
 		 *  The 5510/5520 companion chips have a funky PIT.
 		 */
 		if (vendor == PCI_VENDOR_ID_CYRIX &&
-	 (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520))
+			(device == PCI_DEVICE_ID_CYRIX_5510 ||
+					device == PCI_DEVICE_ID_CYRIX_5520))
 			mark_tsc_unstable("cyrix 5510/5520 detected");
 	}
 #endif
@@ -299,7 +300,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
 			 *  ?  : 0x7x
 			 * GX1 : 0x8x          GX1  datasheet 56
 			 */
-			if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f))
+			if ((0x30 <= dir1 && dir1 <= 0x6f) ||
+					(0x80 <= dir1 && dir1 <= 0x8f))
 				geode_configure();
 			return;
 		} else { /* MediaGX */
@@ -427,9 +429,12 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
 			printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n");
 			local_irq_save(flags);
 			ccr3 = getCx86(CX86_CCR3);
-			setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);       /* enable MAPEN  */
-			setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80);  /* enable cpuid  */
-			setCx86(CX86_CCR3, ccr3);                       /* disable MAPEN */
+			/* enable MAPEN  */
+			setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);
+			/* enable cpuid  */
+			setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80);
+			/* disable MAPEN */
+			setCx86(CX86_CCR3, ccr3);
 			local_irq_restore(flags);
 		}
 	}
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index fb5b86af0b0..93ba8eeb100 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -28,11 +28,10 @@
 static inline void __cpuinit
 detect_hypervisor_vendor(struct cpuinfo_x86 *c)
 {
-	if (vmware_platform()) {
+	if (vmware_platform())
 		c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE;
-	} else {
+	else
 		c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE;
-	}
 }
 
 unsigned long get_hypervisor_tsc_freq(void)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 3260ab04499..80a722a071b 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -7,17 +7,17 @@
 #include <linux/sched.h>
 #include <linux/thread_info.h>
 #include <linux/module.h>
+#include <linux/uaccess.h>
 
 #include <asm/processor.h>
 #include <asm/pgtable.h>
 #include <asm/msr.h>
-#include <asm/uaccess.h>
 #include <asm/ds.h>
 #include <asm/bugs.h>
 #include <asm/cpu.h>
 
 #ifdef CONFIG_X86_64
-#include <asm/topology.h>
+#include <linux/topology.h>
 #include <asm/numa_64.h>
 #endif
 
@@ -174,7 +174,8 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
 #ifdef CONFIG_X86_F00F_BUG
 	/*
 	 * All current models of Pentium and Pentium with MMX technology CPUs
-	 * have the F0 0F bug, which lets nonprivileged users lock up the system.
+	 * have the F0 0F bug, which lets nonprivileged users lock up the
+	 * system.
 	 * Note that the workaround only should be initialized once...
 	 */
 	c->f00f_bug = 0;
@@ -207,7 +208,7 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
 			printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n");
 			printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n");
 			lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE;
-			wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
+			wrmsr(MSR_IA32_MISC_ENABLE, lo, hi);
 		}
 	}
 
@@ -283,7 +284,7 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
 	/* Intel has a non-standard dependency on %ecx for this CPUID level. */
 	cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
 	if (eax & 0x1f)
-		return ((eax >> 26) + 1);
+		return (eax >> 26) + 1;
 	else
 		return 1;
 }
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 789efe217e1..306bf0dca06 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -3,7 +3,7 @@
  *
  *	Changes:
  *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
- *		Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
+ *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
  *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
  */
 
@@ -16,7 +16,7 @@
 #include <linux/pci.h>
 
 #include <asm/processor.h>
-#include <asm/smp.h>
+#include <linux/smp.h>
 #include <asm/k8.h>
 
 #define LVL_1_INST	1
@@ -25,14 +25,15 @@
 #define LVL_3		4
 #define LVL_TRACE	5
 
-struct _cache_table
-{
+struct _cache_table {
 	unsigned char descriptor;
 	char cache_type;
 	short size;
 };
 
-/* all the cache descriptor types we care about (no TLB or trace cache entries) */
+/* All the cache descriptor types we care about (no TLB or
+   trace cache entries) */
+
 static const struct _cache_table __cpuinitconst cache_table[] =
 {
 	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
@@ -105,8 +106,7 @@ static const struct _cache_table __cpuinitconst cache_table[] =
 };
 
 
-enum _cache_type
-{
+enum _cache_type {
 	CACHE_TYPE_NULL	= 0,
 	CACHE_TYPE_DATA = 1,
 	CACHE_TYPE_INST = 2,
@@ -170,31 +170,31 @@ unsigned short			num_cache_leaves;
    Maybe later */
 union l1_cache {
 	struct {
-		unsigned line_size : 8;
-		unsigned lines_per_tag : 8;
-		unsigned assoc : 8;
-		unsigned size_in_kb : 8;
+		unsigned line_size:8;
+		unsigned lines_per_tag:8;
+		unsigned assoc:8;
+		unsigned size_in_kb:8;
 	};
 	unsigned val;
 };
 
 union l2_cache {
 	struct {
-		unsigned line_size : 8;
-		unsigned lines_per_tag : 4;
-		unsigned assoc : 4;
-		unsigned size_in_kb : 16;
+		unsigned line_size:8;
+		unsigned lines_per_tag:4;
+		unsigned assoc:4;
+		unsigned size_in_kb:16;
 	};
 	unsigned val;
 };
 
 union l3_cache {
 	struct {
-		unsigned line_size : 8;
-		unsigned lines_per_tag : 4;
-		unsigned assoc : 4;
-		unsigned res : 2;
-		unsigned size_encoded : 14;
+		unsigned line_size:8;
+		unsigned lines_per_tag:4;
+		unsigned assoc:4;
+		unsigned res:2;
+		unsigned size_encoded:14;
 	};
 	unsigned val;
 };
@@ -350,7 +350,8 @@ static int __cpuinit find_num_cache_leaves(void)
 
 unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 {
-	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
+	/* Cache sizes */
+	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
 	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
 	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
@@ -377,8 +378,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 
 			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
 			if (retval >= 0) {
-				switch(this_leaf.eax.split.level) {
-				    case 1:
+				switch (this_leaf.eax.split.level) {
+				case 1:
 					if (this_leaf.eax.split.type ==
 							CACHE_TYPE_DATA)
 						new_l1d = this_leaf.size/1024;
@@ -386,19 +387,20 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 							CACHE_TYPE_INST)
 						new_l1i = this_leaf.size/1024;
 					break;
-				    case 2:
+				case 2:
 					new_l2 = this_leaf.size/1024;
 					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 					index_msb = get_count_order(num_threads_sharing);
 					l2_id = c->apicid >> index_msb;
 					break;
-				    case 3:
+				case 3:
 					new_l3 = this_leaf.size/1024;
 					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
-					index_msb = get_count_order(num_threads_sharing);
+					index_msb = get_count_order(
+							num_threads_sharing);
 					l3_id = c->apicid >> index_msb;
 					break;
-				    default:
+				default:
 					break;
 				}
 			}
@@ -421,22 +423,21 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 		/* Number of times to iterate */
 		n = cpuid_eax(2) & 0xFF;
 
-		for ( i = 0 ; i < n ; i++ ) {
+		for (i = 0 ; i < n ; i++) {
 			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
 
 			/* If bit 31 is set, this is an unknown format */
-			for ( j = 0 ; j < 3 ; j++ ) {
-				if (regs[j] & (1 << 31)) regs[j] = 0;
-			}
+			for (j = 0 ; j < 3 ; j++)
+				if (regs[j] & (1 << 31))
+					regs[j] = 0;
 
 			/* Byte 0 is level count, not a descriptor */
-			for ( j = 1 ; j < 16 ; j++ ) {
+			for (j = 1 ; j < 16 ; j++) {
 				unsigned char des = dp[j];
 				unsigned char k = 0;
 
 				/* look up this descriptor in the table */
-				while (cache_table[k].descriptor != 0)
-				{
+				while (cache_table[k].descriptor != 0) {
 					if (cache_table[k].descriptor == des) {
 						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
 							break;
@@ -488,14 +489,14 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 	}
 
 	if (trace)
-		printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
-	else if ( l1i )
-		printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
+		printk(KERN_INFO "CPU: Trace cache: %dK uops", trace);
+	else if (l1i)
+		printk(KERN_INFO "CPU: L1 I cache: %dK", l1i);
 
 	if (l1d)
-		printk(", L1 D cache: %dK\n", l1d);
+		printk(KERN_CONT ", L1 D cache: %dK\n", l1d);
 	else
-		printk("\n");
+		printk(KERN_CONT "\n");
 
 	if (l2)
 		printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
@@ -558,8 +559,13 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
 	}
 }
 #else
-static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) {}
-static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) {}
+static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
+{
+}
+
+static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
+{
+}
 #endif
 
 static void __cpuinit free_cache_attributes(unsigned int cpu)
@@ -645,7 +651,7 @@ static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
 static ssize_t show_##file_name						\
 			(struct _cpuid4_info *this_leaf, char *buf)	\
 {									\
-	return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \
+	return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
 }
 
 show_one_plus(level, eax.split.level, 0);
@@ -656,7 +662,7 @@ show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
 
 static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
 {
-	return sprintf (buf, "%luK\n", this_leaf->size / 1024);
+	return sprintf(buf, "%luK\n", this_leaf->size / 1024);
 }
 
 static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
@@ -669,7 +675,7 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
 		const struct cpumask *mask;
 
 		mask = to_cpumask(this_leaf->shared_cpu_map);
-		n = type?
+		n = type ?
 			cpulist_scnprintf(buf, len-2, mask) :
 			cpumask_scnprintf(buf, len-2, mask);
 		buf[n++] = '\n';
@@ -800,7 +806,7 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
 static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
 		show_cache_disable_1, store_cache_disable_1);
 
-static struct attribute * default_attrs[] = {
+static struct attribute *default_attrs[] = {
 	&type.attr,
 	&level.attr,
 	&coherency_line_size.attr,
@@ -815,7 +821,7 @@ static struct attribute * default_attrs[] = {
 	NULL
 };
 
-static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
+static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 {
 	struct _cache_attr *fattr = to_attr(attr);
 	struct _index_kobject *this_leaf = to_object(kobj);
@@ -828,8 +834,8 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
 	return ret;
 }
 
-static ssize_t store(struct kobject * kobj, struct attribute * attr,
-		     const char * buf, size_t count)
+static ssize_t store(struct kobject *kobj, struct attribute *attr,
+		     const char *buf, size_t count)
 {
 	struct _cache_attr *fattr = to_attr(attr);
 	struct _index_kobject *this_leaf = to_object(kobj);
@@ -883,7 +889,7 @@ static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
 		goto err_out;
 
 	per_cpu(index_kobject, cpu) = kzalloc(
-	    sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL);
+	    sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
 	if (unlikely(per_cpu(index_kobject, cpu) == NULL))
 		goto err_out;
 
@@ -917,7 +923,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
 	}
 
 	for (i = 0; i < num_cache_leaves; i++) {
-		this_object = INDEX_KOBJECT_PTR(cpu,i);
+		this_object = INDEX_KOBJECT_PTR(cpu, i);
 		this_object->cpu = cpu;
 		this_object->index = i;
 		retval = kobject_init_and_add(&(this_object->kobj),
@@ -925,9 +931,8 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
 					      per_cpu(cache_kobject, cpu),
 					      "index%1lu", i);
 		if (unlikely(retval)) {
-			for (j = 0; j < i; j++) {
-				kobject_put(&(INDEX_KOBJECT_PTR(cpu,j)->kobj));
-			}
+			for (j = 0; j < i; j++)
+				kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
 			kobject_put(per_cpu(cache_kobject, cpu));
 			cpuid4_cache_sysfs_exit(cpu);
 			return retval;
@@ -952,7 +957,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
 	cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
 
 	for (i = 0; i < num_cache_leaves; i++)
-		kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
+		kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
 	kobject_put(per_cpu(cache_kobject, cpu));
 	cpuid4_cache_sysfs_exit(cpu);
 }
@@ -977,8 +982,7 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier =
-{
+static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
 	.notifier_call = cacheinfo_cpu_callback,
 };
 
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 5c481f6205b..8100a29c854 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -68,16 +68,16 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
 	/* returns the bit offset of the performance counter register */
 	switch (boot_cpu_data.x86_vendor) {
 	case X86_VENDOR_AMD:
-		return (msr - MSR_K7_PERFCTR0);
+		return msr - MSR_K7_PERFCTR0;
 	case X86_VENDOR_INTEL:
 		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
-			return (msr - MSR_ARCH_PERFMON_PERFCTR0);
+			return msr - MSR_ARCH_PERFMON_PERFCTR0;
 
 		switch (boot_cpu_data.x86) {
 		case 6:
-			return (msr - MSR_P6_PERFCTR0);
+			return msr - MSR_P6_PERFCTR0;
 		case 15:
-			return (msr - MSR_P4_BPU_PERFCTR0);
+			return msr - MSR_P4_BPU_PERFCTR0;
 		}
 	}
 	return 0;
@@ -92,16 +92,16 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
 	/* returns the bit offset of the event selection register */
 	switch (boot_cpu_data.x86_vendor) {
 	case X86_VENDOR_AMD:
-		return (msr - MSR_K7_EVNTSEL0);
+		return msr - MSR_K7_EVNTSEL0;
 	case X86_VENDOR_INTEL:
 		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
-			return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
+			return msr - MSR_ARCH_PERFMON_EVENTSEL0;
 
 		switch (boot_cpu_data.x86) {
 		case 6:
-			return (msr - MSR_P6_EVNTSEL0);
+			return msr - MSR_P6_EVNTSEL0;
 		case 15:
-			return (msr - MSR_P4_BSU_ESCR0);
+			return msr - MSR_P4_BSU_ESCR0;
 		}
 	}
 	return 0;
@@ -113,7 +113,7 @@ int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
 {
 	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
 
-	return (!test_bit(counter, perfctr_nmi_owner));
+	return !test_bit(counter, perfctr_nmi_owner);
 }
 
 /* checks the an msr for availability */
@@ -124,7 +124,7 @@ int avail_to_resrv_perfctr_nmi(unsigned int msr)
 	counter = nmi_perfctr_msr_to_bit(msr);
 	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
 
-	return (!test_bit(counter, perfctr_nmi_owner));
+	return !test_bit(counter, perfctr_nmi_owner);
 }
 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
 
@@ -237,7 +237,7 @@ static unsigned int adjust_for_32bit_ctr(unsigned int hz)
 	 */
 	counter_val = (u64)cpu_khz * 1000;
 	do_div(counter_val, retval);
- 	if (counter_val > 0x7fffffffULL) {
+	if (counter_val > 0x7fffffffULL) {
 		u64 count = (u64)cpu_khz * 1000;
 		do_div(count, 0x7fffffffUL);
 		retval = count + 1;
@@ -251,7 +251,7 @@ static void write_watchdog_counter(unsigned int perfctr_msr,
 	u64 count = (u64)cpu_khz * 1000;
 
 	do_div(count, nmi_hz);
-	if(descr)
+	if (descr)
 		pr_debug("setting %s to -0x%08Lx\n", descr, count);
 	wrmsrl(perfctr_msr, 0 - count);
 }
@@ -262,7 +262,7 @@ static void write_watchdog_counter32(unsigned int perfctr_msr,
 	u64 count = (u64)cpu_khz * 1000;
 
 	do_div(count, nmi_hz);
-	if(descr)
+	if (descr)
 		pr_debug("setting %s to -0x%08Lx\n", descr, count);
 	wrmsr(perfctr_msr, (u32)(-count), 0);
 }
@@ -296,7 +296,7 @@ static int setup_k7_watchdog(unsigned nmi_hz)
 
 	/* setup the timer */
 	wrmsr(evntsel_msr, evntsel, 0);
-	write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz);
+	write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz);
 
 	/* initialize the wd struct before enabling */
 	wd->perfctr_msr = perfctr_msr;
@@ -387,7 +387,7 @@ static int setup_p6_watchdog(unsigned nmi_hz)
 	/* setup the timer */
 	wrmsr(evntsel_msr, evntsel, 0);
 	nmi_hz = adjust_for_32bit_ctr(nmi_hz);
-	write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz);
+	write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz);
 
 	/* initialize the wd struct before enabling */
 	wd->perfctr_msr = perfctr_msr;
@@ -415,7 +415,7 @@ static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 
 	/* P6/ARCH_PERFMON has 32 bit counter write */
-	write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);
+	write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz);
 }
 
 static const struct wd_ops p6_wd_ops = {
@@ -490,9 +490,9 @@ static int setup_p4_watchdog(unsigned nmi_hz)
 	if (smp_num_siblings == 2) {
 		unsigned int ebx, apicid;
 
-        	ebx = cpuid_ebx(1);
-	        apicid = (ebx >> 24) & 0xff;
-        	ht_num = apicid & 1;
+		ebx = cpuid_ebx(1);
+		apicid = (ebx >> 24) & 0xff;
+		ht_num = apicid & 1;
 	} else
 #endif
 		ht_num = 0;
@@ -544,7 +544,7 @@ static int setup_p4_watchdog(unsigned nmi_hz)
 	}
 
 	evntsel = P4_ESCR_EVENT_SELECT(0x3F)
-	 	| P4_ESCR_OS
+		| P4_ESCR_OS
 		| P4_ESCR_USR;
 
 	cccr_val |= P4_CCCR_THRESHOLD(15)
@@ -612,7 +612,7 @@ static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 {
 	unsigned dummy;
 	/*
- 	 * P4 quirks:
+	 * P4 quirks:
 	 * - An overflown perfctr will assert its interrupt
 	 *   until the OVF flag in its CCCR is cleared.
 	 * - LVTPC is masked on interrupt and must be
@@ -662,7 +662,8 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
 	 * NOTE: Corresponding bit = 0 in ebx indicates event present.
 	 */
 	cpuid(10, &(eax.full), &ebx, &unused, &unused);
-	if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
+	if ((eax.split.mask_length <
+			(ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
 	    (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
 		return 0;
 
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index d5e30397246..1e904346bbf 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -128,7 +128,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 			if (i < ARRAY_SIZE(x86_power_flags) &&
 			    x86_power_flags[i])
 				seq_printf(m, "%s%s",
-					   x86_power_flags[i][0]?" ":"",
+					   x86_power_flags[i][0] ? " " : "",
 					   x86_power_flags[i]);
 			else
 				seq_printf(m, " [%d]", i);
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 284c399e323..bc24f514ec9 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -49,17 +49,17 @@ static inline int __vmware_platform(void)
 
 static unsigned long __vmware_get_tsc_khz(void)
 {
-        uint64_t tsc_hz;
-        uint32_t eax, ebx, ecx, edx;
+	uint64_t tsc_hz;
+	uint32_t eax, ebx, ecx, edx;
 
-        VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
+	VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
 
-        if (ebx == UINT_MAX)
-                return 0;
-        tsc_hz = eax | (((uint64_t)ebx) << 32);
-        do_div(tsc_hz, 1000);
-        BUG_ON(tsc_hz >> 32);
-        return tsc_hz;
+	if (ebx == UINT_MAX)
+		return 0;
+	tsc_hz = eax | (((uint64_t)ebx) << 32);
+	do_div(tsc_hz, 1000);
+	BUG_ON(tsc_hz >> 32);
+	return tsc_hz;
 }
 
 /*
-- 
cgit v1.2.3


From 406f104b4172de7452702c6810807c1b0132ba22 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 10 Jul 2009 13:18:26 +0800
Subject: crypto: sha1-s390 - Add export/import support

This patch adds export/import support to sha1-s390.  The exported
type is defined by struct sha1_state, which is basically the entire
descriptor state of sha1_generic.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/crypto/sha1_s390.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index e85ba348722..2c5ec7969e3 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -46,12 +46,38 @@ static int sha1_init(struct shash_desc *desc)
 	return 0;
 }
 
+static int sha1_export(struct shash_desc *desc, void *out)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+	struct sha1_state *octx = out;
+
+	octx->count = sctx->count;
+	memcpy(octx->state, sctx->state, sizeof(octx->state));
+	memcpy(octx->buffer, sctx->buf, sizeof(octx->buffer));
+	return 0;
+}
+
+static int sha1_import(struct shash_desc *desc, const u8 *in)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	struct sha1_state *ictx = in;
+
+	sctx->count = ictx->count;
+	memcpy(sctx->state, ictx->state, sizeof(ictx->state));
+	memcpy(sctx->buf, ictx->buffer, sizeof(ictx->buffer));
+	sctx->func = KIMD_SHA_1;
+	return 0;
+}
+
 static struct shash_alg alg = {
 	.digestsize	=	SHA1_DIGEST_SIZE,
 	.init		=	sha1_init,
 	.update		=	s390_sha_update,
 	.final		=	s390_sha_final,
+	.export		=	sha1_export,
+	.import		=	sha1_import,
 	.descsize	=	sizeof(struct s390_sha_ctx),
+	.statesize	=	sizeof(struct sha1_state),
 	.base		=	{
 		.cra_name	=	"sha1",
 		.cra_driver_name=	"sha1-s390",
-- 
cgit v1.2.3


From f63559bef380a95093408691c1081f07da755b74 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 10 Jul 2009 13:20:32 +0800
Subject: crypto: sha256-s390 - Add export/import support

This patch adds export/import support to sha256-s390.  The exported
type is defined by struct sha256_state, which is basically the entire
descriptor state of sha256_generic.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/crypto/sha256_s390.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index f9fefc56963..943a669452c 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -42,12 +42,38 @@ static int sha256_init(struct shash_desc *desc)
 	return 0;
 }
 
+static int sha256_export(struct shash_desc *desc, void *out)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+	struct sha256_state *octx = out;
+
+	octx->count = sctx->count;
+	memcpy(octx->state, sctx->state, sizeof(octx->state));
+	memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
+	return 0;
+}
+
+static int sha256_import(struct shash_desc *desc, const u8 *in)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	struct sha256_state *ictx = in;
+
+	sctx->count = ictx->count;
+	memcpy(sctx->state, ictx->state, sizeof(ictx->state));
+	memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
+	sctx->func = KIMD_SHA_256;
+	return 0;
+}
+
 static struct shash_alg alg = {
 	.digestsize	=	SHA256_DIGEST_SIZE,
 	.init		=	sha256_init,
 	.update		=	s390_sha_update,
 	.final		=	s390_sha_final,
+	.export		=	sha256_export,
+	.import		=	sha256_import,
 	.descsize	=	sizeof(struct s390_sha_ctx),
+	.statesize	=	sizeof(struct sha256_state),
 	.base		=	{
 		.cra_name	=	"sha256",
 		.cra_driver_name=	"sha256-s390",
-- 
cgit v1.2.3


From 8045a4c293d36c61656a20d581b11f7f0cd7acd5 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Tue, 7 Jul 2009 19:30:25 +0200
Subject: x86/oprofile: Fix cast of counter value

When casting the counter value to a 64 bit value in 32 bit mode, sign
extension may lead to broken counter values. This patch fixes this by
casting to (u64) instead of (s64).

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 4 ++--
 arch/x86/oprofile/op_model_p4.c  | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index e95268eb922..7ca8306aefa 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -111,7 +111,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 		if (counter_config[i].enabled && msrs->counters[i].addr) {
 			reset_value[i] = counter_config[i].count;
 			wrmsrl(msrs->counters[i].addr,
-			       -(s64)counter_config[i].count);
+			       -(u64)counter_config[i].count);
 			rdmsrl(msrs->controls[i].addr, val);
 			val &= model->reserved;
 			val |= op_x86_get_ctrl(model, &counter_config[i]);
@@ -237,7 +237,7 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
 		if (val & OP_CTR_OVERFLOW)
 			continue;
 		oprofile_add_sample(regs, i);
-		wrmsrl(msrs->counters[i].addr, -(s64)reset_value[i]);
+		wrmsrl(msrs->counters[i].addr, -(u64)reset_value[i]);
 	}
 
 	op_amd_handle_ibs(regs, msrs);
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index f01e53b118f..9db9e361182 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -580,7 +580,7 @@ static void p4_setup_ctrs(struct op_x86_model_spec const *model,
 			reset_value[i] = counter_config[i].count;
 			pmc_setup_one_p4_counter(i);
 			wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address,
-			       -(s64)counter_config[i].count);
+			       -(u64)counter_config[i].count);
 		} else {
 			reset_value[i] = 0;
 		}
@@ -625,11 +625,11 @@ static int p4_check_ctrs(struct pt_regs * const regs,
 		if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) {
 			oprofile_add_sample(regs, i);
 			wrmsrl(p4_counters[real].counter_address,
-			       -(s64)reset_value[i]);
+			       -(u64)reset_value[i]);
 			CCCR_CLEAR_OVF(low);
 			wrmsr(p4_counters[real].cccr_address, low, high);
 			wrmsrl(p4_counters[real].counter_address,
-			       -(s64)reset_value[i]);
+			       -(u64)reset_value[i]);
 		}
 	}
 
-- 
cgit v1.2.3


From 44ab9a6b0e909145d42615493952fe986b1ce5c2 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 9 Jul 2009 18:33:02 +0200
Subject: x86/oprofile: Rework and simplify nmi_cpu_setup()

This patch removes the function nmi_save_registers(). Per-cpu code is
now executed only in the function nmi_cpu_setup().  Also, it renames
the per-cpu function nmi_restore_registers() to
nmi_cpu_restore_registers().

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 93df76dd60f..25da1e17815 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -87,13 +87,6 @@ static void nmi_cpu_save_registers(struct op_msrs *msrs)
 	}
 }
 
-static void nmi_save_registers(void *dummy)
-{
-	int cpu = smp_processor_id();
-	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
-	nmi_cpu_save_registers(msrs);
-}
-
 static void free_msrs(void)
 {
 	int i;
@@ -137,6 +130,7 @@ static void nmi_cpu_setup(void *dummy)
 {
 	int cpu = smp_processor_id();
 	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
+	nmi_cpu_save_registers(msrs);
 	spin_lock(&oprofilefs_lock);
 	model->setup_ctrs(model, msrs);
 	spin_unlock(&oprofilefs_lock);
@@ -182,13 +176,12 @@ static int nmi_setup(void)
 		}
 
 	}
-	on_each_cpu(nmi_save_registers, NULL, 1);
 	on_each_cpu(nmi_cpu_setup, NULL, 1);
 	nmi_enabled = 1;
 	return 0;
 }
 
-static void nmi_restore_registers(struct op_msrs *msrs)
+static void nmi_cpu_restore_registers(struct op_msrs *msrs)
 {
 	struct op_msr *counters = msrs->counters;
 	struct op_msr *controls = msrs->controls;
@@ -220,7 +213,7 @@ static void nmi_cpu_shutdown(void *dummy)
 	apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
 	apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
 	apic_write(APIC_LVTERR, v);
-	nmi_restore_registers(msrs);
+	nmi_cpu_restore_registers(msrs);
 }
 
 static void nmi_shutdown(void)
-- 
cgit v1.2.3


From 6e63ea4b0b14ff5fb8a3ca704fcda7d28b95f079 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Tue, 7 Jul 2009 19:25:39 +0200
Subject: x86/oprofile: Whitespaces changes only

This patch fixes whitespace changes of code that will be touched in
follow-on patches.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c       | 12 ++++++------
 arch/x86/oprofile/op_model_amd.c  | 12 ++++++------
 arch/x86/oprofile/op_model_p4.c   |  8 ++++----
 arch/x86/oprofile/op_model_ppro.c |  8 ++++----
 4 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 25da1e17815..fca8dc94531 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -516,12 +516,12 @@ int __init op_nmi_init(struct oprofile_operations *ops)
 	register_cpu_notifier(&oprofile_cpu_nb);
 #endif
 	/* default values, can be overwritten by model */
-	ops->create_files = nmi_create_files;
-	ops->setup = nmi_setup;
-	ops->shutdown = nmi_shutdown;
-	ops->start = nmi_start;
-	ops->stop = nmi_stop;
-	ops->cpu_type = cpu_type;
+	ops->create_files	= nmi_create_files;
+	ops->setup		= nmi_setup;
+	ops->shutdown		= nmi_shutdown;
+	ops->start		= nmi_start;
+	ops->stop		= nmi_stop;
+	ops->cpu_type		= cpu_type;
 
 	if (model->init)
 		ret = model->init(ops);
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 7ca8306aefa..f676f8825a3 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -91,7 +91,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 	int i;
 
 	/* clear all counters */
-	for (i = 0 ; i < NUM_CONTROLS; ++i) {
+	for (i = 0; i < NUM_CONTROLS; ++i) {
 		if (unlikely(!msrs->controls[i].addr))
 			continue;
 		rdmsrl(msrs->controls[i].addr, val);
@@ -229,7 +229,7 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
 	u64 val;
 	int i;
 
-	for (i = 0 ; i < NUM_COUNTERS; ++i) {
+	for (i = 0; i < NUM_COUNTERS; ++i) {
 		if (!reset_value[i])
 			continue;
 		rdmsrl(msrs->counters[i].addr, val);
@@ -250,7 +250,7 @@ static void op_amd_start(struct op_msrs const * const msrs)
 {
 	u64 val;
 	int i;
-	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+	for (i = 0; i < NUM_COUNTERS; ++i) {
 		if (reset_value[i]) {
 			rdmsrl(msrs->controls[i].addr, val);
 			val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
@@ -270,7 +270,7 @@ static void op_amd_stop(struct op_msrs const * const msrs)
 	 * Subtle: stop on all counters to avoid race with setting our
 	 * pm callback
 	 */
-	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+	for (i = 0; i < NUM_COUNTERS; ++i) {
 		if (!reset_value[i])
 			continue;
 		rdmsrl(msrs->controls[i].addr, val);
@@ -285,11 +285,11 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
 {
 	int i;
 
-	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+	for (i = 0; i < NUM_COUNTERS; ++i) {
 		if (msrs->counters[i].addr)
 			release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
 	}
-	for (i = 0 ; i < NUM_CONTROLS ; ++i) {
+	for (i = 0; i < NUM_CONTROLS; ++i) {
 		if (msrs->controls[i].addr)
 			release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
 	}
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 9db9e361182..5921b7fc724 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -558,7 +558,7 @@ static void p4_setup_ctrs(struct op_x86_model_spec const *model,
 	}
 
 	/* clear the cccrs we will use */
-	for (i = 0 ; i < num_counters ; i++) {
+	for (i = 0; i < num_counters; i++) {
 		if (unlikely(!msrs->controls[i].addr))
 			continue;
 		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
@@ -575,7 +575,7 @@ static void p4_setup_ctrs(struct op_x86_model_spec const *model,
 	}
 
 	/* setup all counters */
-	for (i = 0 ; i < num_counters ; ++i) {
+	for (i = 0; i < num_counters; ++i) {
 		if (counter_config[i].enabled && msrs->controls[i].addr) {
 			reset_value[i] = counter_config[i].count;
 			pmc_setup_one_p4_counter(i);
@@ -678,7 +678,7 @@ static void p4_shutdown(struct op_msrs const * const msrs)
 {
 	int i;
 
-	for (i = 0 ; i < num_counters ; ++i) {
+	for (i = 0; i < num_counters; ++i) {
 		if (msrs->counters[i].addr)
 			release_perfctr_nmi(msrs->counters[i].addr);
 	}
@@ -687,7 +687,7 @@ static void p4_shutdown(struct op_msrs const * const msrs)
 	 * conjunction with the counter registers (hence the starting offset).
 	 * This saves a few bits.
 	 */
-	for (i = num_counters ; i < num_controls ; ++i) {
+	for (i = num_counters; i < num_controls; ++i) {
 		if (msrs->controls[i].addr)
 			release_evntsel_nmi(msrs->controls[i].addr);
 	}
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index cd72d5c73b4..570d717c330 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -81,7 +81,7 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
 	}
 
 	/* clear all counters */
-	for (i = 0 ; i < num_counters; ++i) {
+	for (i = 0; i < num_counters; ++i) {
 		if (unlikely(!msrs->controls[i].addr))
 			continue;
 		rdmsrl(msrs->controls[i].addr, val);
@@ -125,7 +125,7 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
 	if (unlikely(!reset_value))
 		goto out;
 
-	for (i = 0 ; i < num_counters; ++i) {
+	for (i = 0; i < num_counters; ++i) {
 		if (!reset_value[i])
 			continue;
 		rdmsrl(msrs->counters[i].addr, val);
@@ -188,11 +188,11 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
 {
 	int i;
 
-	for (i = 0 ; i < num_counters ; ++i) {
+	for (i = 0; i < num_counters; ++i) {
 		if (msrs->counters[i].addr)
 			release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
 	}
-	for (i = 0 ; i < num_counters ; ++i) {
+	for (i = 0; i < num_counters; ++i) {
 		if (msrs->controls[i].addr)
 			release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
 	}
-- 
cgit v1.2.3


From 6b2b171a774af256082635b53ac387b1613b7b4c Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 8 Jun 2009 02:53:50 -0700
Subject: x86/acpi: acpi_parse_madt_ioapic_entries: remove redundant braces

We don't put braces around a single statement.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/acpi/boot.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 6b8ca3a0285..ce31c1af854 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1179,9 +1179,8 @@ static int __init acpi_parse_madt_ioapic_entries(void)
 	 * If MPS is present, it will handle them,
 	 * otherwise the system will stay in PIC mode
 	 */
-	if (acpi_disabled || acpi_noirq) {
+	if (acpi_disabled || acpi_noirq)
 		return -ENODEV;
-	}
 
 	if (!cpu_has_apic)
 		return -ENODEV;
-- 
cgit v1.2.3


From 2f210deba9887dd9143b63b217506f1ac152e91c Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 8 Jun 2009 02:55:22 -0700
Subject: x86/ioapic.c: ioapic_modify_irq is too large to inline

If ioapic_modify_irq() is marked inline, it gets inlined several times.
Un-inlining it saves around 200 bytes in .text for me.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/apic/io_apic.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 90b5e6efa93..82271eb87bb 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -556,9 +556,9 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
 		add_pin_to_irq_node(cfg, node, newapic, newpin);
 }
 
-static inline void io_apic_modify_irq(struct irq_cfg *cfg,
-				int mask_and, int mask_or,
-				void (*final)(struct irq_pin_list *entry))
+static void io_apic_modify_irq(struct irq_cfg *cfg,
+			       int mask_and, int mask_or,
+			       void (*final)(struct irq_pin_list *entry))
 {
 	int pin;
 	struct irq_pin_list *entry;
-- 
cgit v1.2.3


From 890aeacf64c55a7ada7054a140d249ab13899f2d Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 8 Jun 2009 02:57:43 -0700
Subject: x86/ioapic.c: unify __mask_IO_APIC_irq()

The main difference between 32 and 64-bit __mask_IO_APIC_irq() does a
readback from the I/O APIC to synchronize it.

If there's a hardware requirement to do a readback sync after updating
an APIC register, then it will be a hardware requrement regardless of
whether the kernel is compiled 32 or 64-bit.

Unify __mask_IO_APIC_irq() using the 64-bit version which always syncs
with io_apic_sync().

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/apic/io_apic.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 82271eb87bb..f8aa5461071 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -580,7 +580,6 @@ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
 	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
 }
 
-#ifdef CONFIG_X86_64
 static void io_apic_sync(struct irq_pin_list *entry)
 {
 	/*
@@ -596,12 +595,8 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
 {
 	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
 }
-#else /* CONFIG_X86_32 */
-static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
-{
-	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
-}
 
+#ifdef CONFIG_X86_32
 static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
 {
 	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
-- 
cgit v1.2.3


From 916a0fe739f151664f7f07b42543ae6fd4caec49 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 8 Jun 2009 03:00:22 -0700
Subject: x86/ioapic.c: remove #ifdef for 82093AA workaround

While no 64-bit hardware will have a version 0x11 I/O APIC which needs
the level/edge bug workaround, that's not a particular reason to use
CONFIG_X86_32 to #ifdef the code out.  Most 32-bit machines will no
longer need the workaround either, so the test to see whether it is
necessary should be more fine-grained than "32-bit=yes, 64-bit=no".

(Also fix formatting of block comment.)

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/apic/io_apic.c | 47 +++++++++++++++++-------------------------
 1 file changed, 19 insertions(+), 28 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index f8aa5461071..1a341444258 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -596,7 +596,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
 	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
 }
 
-#ifdef CONFIG_X86_32
 static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
 {
 	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
@@ -608,7 +607,6 @@ static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
 	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
 			IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
 }
-#endif /* CONFIG_X86_32 */
 
 static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
 {
@@ -2510,11 +2508,8 @@ atomic_t irq_mis_count;
 static void ack_apic_level(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
-
-#ifdef CONFIG_X86_32
 	unsigned long v;
 	int i;
-#endif
 	struct irq_cfg *cfg;
 	int do_unmask_irq = 0;
 
@@ -2527,31 +2522,28 @@ static void ack_apic_level(unsigned int irq)
 	}
 #endif
 
-#ifdef CONFIG_X86_32
 	/*
-	* It appears there is an erratum which affects at least version 0x11
-	* of I/O APIC (that's the 82093AA and cores integrated into various
-	* chipsets).  Under certain conditions a level-triggered interrupt is
-	* erroneously delivered as edge-triggered one but the respective IRR
-	* bit gets set nevertheless.  As a result the I/O unit expects an EOI
-	* message but it will never arrive and further interrupts are blocked
-	* from the source.  The exact reason is so far unknown, but the
-	* phenomenon was observed when two consecutive interrupt requests
-	* from a given source get delivered to the same CPU and the source is
-	* temporarily disabled in between.
-	*
-	* A workaround is to simulate an EOI message manually.  We achieve it
-	* by setting the trigger mode to edge and then to level when the edge
-	* trigger mode gets detected in the TMR of a local APIC for a
-	* level-triggered interrupt.  We mask the source for the time of the
-	* operation to prevent an edge-triggered interrupt escaping meanwhile.
-	* The idea is from Manfred Spraul.  --macro
-	*/
+	 * It appears there is an erratum which affects at least version 0x11
+	 * of I/O APIC (that's the 82093AA and cores integrated into various
+	 * chipsets).  Under certain conditions a level-triggered interrupt is
+	 * erroneously delivered as edge-triggered one but the respective IRR
+	 * bit gets set nevertheless.  As a result the I/O unit expects an EOI
+	 * message but it will never arrive and further interrupts are blocked
+	 * from the source.  The exact reason is so far unknown, but the
+	 * phenomenon was observed when two consecutive interrupt requests
+	 * from a given source get delivered to the same CPU and the source is
+	 * temporarily disabled in between.
+	 *
+	 * A workaround is to simulate an EOI message manually.  We achieve it
+	 * by setting the trigger mode to edge and then to level when the edge
+	 * trigger mode gets detected in the TMR of a local APIC for a
+	 * level-triggered interrupt.  We mask the source for the time of the
+	 * operation to prevent an edge-triggered interrupt escaping meanwhile.
+	 * The idea is from Manfred Spraul.  --macro
+	 */
 	cfg = desc->chip_data;
 	i = cfg->vector;
-
 	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
-#endif
 
 	/*
 	 * We must acknowledge the irq before we move it or the acknowledge will
@@ -2593,7 +2585,7 @@ static void ack_apic_level(unsigned int irq)
 		unmask_IO_APIC_irq_desc(desc);
 	}
 
-#ifdef CONFIG_X86_32
+	/* Tail end of version 0x11 I/O APIC bug workaround */
 	if (!(v & (1 << (i & 0x1f)))) {
 		atomic_inc(&irq_mis_count);
 		spin_lock(&ioapic_lock);
@@ -2601,7 +2593,6 @@ static void ack_apic_level(unsigned int irq)
 		__unmask_and_level_IO_APIC_irq(cfg);
 		spin_unlock(&ioapic_lock);
 	}
-#endif
 }
 
 #ifdef CONFIG_INTR_REMAP
-- 
cgit v1.2.3


From 83c21bedf63ce92a2dd82ae2c7a96179b0aa4372 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 8 Jun 2009 03:13:04 -0700
Subject: x86/ioapic.c: remove redundant declaration of irq_pin_list

The structure is defined immediately below, so there's no need
to forward declare it.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/apic/io_apic.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 1a341444258..ec52e0c045c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -116,8 +116,6 @@ static int __init parse_noapic(char *str)
 }
 early_param("noapic", parse_noapic);
 
-struct irq_pin_list;
-
 /*
  * This is performance-critical, we want to do it O(1)
  *
-- 
cgit v1.2.3


From 8e13d697febc1ba17e70ed88789255c8bc25aa41 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 8 Jun 2009 03:14:59 -0700
Subject: x86/ioapic.c: move lost comment to what seems like appropriate place

The comment got separated from its subject, so move it to what
appears to be the right place, and update to describe the current
structure.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/apic/io_apic.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ec52e0c045c..a097a773bc7 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -116,13 +116,6 @@ static int __init parse_noapic(char *str)
 }
 early_param("noapic", parse_noapic);
 
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
-
 struct irq_pin_list {
 	int apic, pin;
 	struct irq_pin_list *next;
@@ -137,6 +130,11 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node)
 	return pin;
 }
 
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * Most irqs are mapped 1:1 with pins.
+ */
 struct irq_cfg {
 	struct irq_pin_list *irq_2_pin;
 	cpumask_var_t domain;
-- 
cgit v1.2.3


From d8c52063ed85dda61b70bc05b90711478db5dc17 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 8 Jun 2009 03:17:58 -0700
Subject: x86/ioapic.c: convert io_apic_level_ack_pending loop to normal for()
 loop

Convert the unconventional loop in io_apic_level_ack_pending() to
a conventional for() loop.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/apic/io_apic.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index a097a773bc7..0d0401802d4 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -410,13 +410,10 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
 	unsigned long flags;
 
 	spin_lock_irqsave(&ioapic_lock, flags);
-	entry = cfg->irq_2_pin;
-	for (;;) {
+	for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
 		unsigned int reg;
 		int pin;
 
-		if (!entry)
-			break;
 		pin = entry->pin;
 		reg = io_apic_read(entry->apic, 0x10 + pin*2);
 		/* Is the remote IRR bit set? */
@@ -424,9 +421,6 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
 			spin_unlock_irqrestore(&ioapic_lock, flags);
 			return true;
 		}
-		if (!entry->next)
-			break;
-		entry = entry->next;
 	}
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
-- 
cgit v1.2.3


From 875e68ec32fc5495f3edf987aaae1c52306184b7 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 8 Jun 2009 03:24:11 -0700
Subject: x86/ioapic.c: simplify add_pin_to_irq_node()

Rather than duplicating the same alloc/init code twice, restructure
the function to look for duplicates and then add an entry
if none is found.

This function is not performance critical; all but one of its callers
are __init functions, and the non-__init caller is for PCI device setup.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/apic/io_apic.c | 28 ++++++++--------------------
 1 file changed, 8 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 0d0401802d4..d9e8f19088d 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -490,34 +490,22 @@ static void ioapic_mask_entry(int apic, int pin)
  */
 static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
 {
-	struct irq_pin_list *entry;
+	struct irq_pin_list **entryp, *entry;
 
-	entry = cfg->irq_2_pin;
-	if (!entry) {
-		entry = get_one_free_irq_2_pin(node);
-		if (!entry) {
-			printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
-					apic, pin);
-			return;
-		}
-		cfg->irq_2_pin = entry;
-		entry->apic = apic;
-		entry->pin = pin;
-		return;
-	}
-
-	while (entry->next) {
+	for (entryp = &cfg->irq_2_pin;
+	     *entryp != NULL;
+	     entryp = &(*entryp)->next) {
+		entry = *entryp;
 		/* not again, please */
 		if (entry->apic == apic && entry->pin == pin)
 			return;
-
-		entry = entry->next;
 	}
 
-	entry->next = get_one_free_irq_2_pin(node);
-	entry = entry->next;
+	entry = get_one_free_irq_2_pin(node);
 	entry->apic = apic;
 	entry->pin = pin;
+
+	*entryp = entry;
 }
 
 /*
-- 
cgit v1.2.3


From 535b64291a9d1ff8bc54642494a5fce27e1e1170 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 8 Jun 2009 03:29:26 -0700
Subject: x86/ioapic.c: convert replace_pin_at_irq_node to conventional for()
 loop

Use a conventional for() loop in replace_pin_at_irq_node().

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/apic/io_apic.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index d9e8f19088d..9386976b675 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -515,10 +515,10 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
 				      int oldapic, int oldpin,
 				      int newapic, int newpin)
 {
-	struct irq_pin_list *entry = cfg->irq_2_pin;
+	struct irq_pin_list *entry;
 	int replaced = 0;
 
-	while (entry) {
+	for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
 		if (entry->apic == oldapic && entry->pin == oldpin) {
 			entry->apic = newapic;
 			entry->pin = newpin;
@@ -526,7 +526,6 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
 			/* every one is different, right? */
 			break;
 		}
-		entry = entry->next;
 	}
 
 	/* why? call replace before add? */
-- 
cgit v1.2.3


From 4eea6fff612f54380dd642b045bf03ac0613fe3e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 8 Jun 2009 03:32:15 -0700
Subject: x86/ioapic.c: clean up replace_pin_at_irq_node logic and comments

There's no need for a control variable in replace_pin_at_irq_node();
it can just return if it finds the old apic/pin to replace.

If the loop terminates, then it didn't find the old apic/pin, so it can
add the new ones.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/apic/io_apic.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 9386976b675..8245e62ed93 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -512,25 +512,22 @@ static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin
  * Reroute an IRQ to a different pin.
  */
 static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
-				      int oldapic, int oldpin,
-				      int newapic, int newpin)
+					   int oldapic, int oldpin,
+					   int newapic, int newpin)
 {
 	struct irq_pin_list *entry;
-	int replaced = 0;
 
 	for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
 		if (entry->apic == oldapic && entry->pin == oldpin) {
 			entry->apic = newapic;
 			entry->pin = newpin;
-			replaced = 1;
 			/* every one is different, right? */
-			break;
+			return;
 		}
 	}
 
-	/* why? call replace before add? */
-	if (!replaced)
-		add_pin_to_irq_node(cfg, node, newapic, newpin);
+	/* old apic/pin didn't exist, so just add new ones */
+	add_pin_to_irq_node(cfg, node, newapic, newpin);
 }
 
 static void io_apic_modify_irq(struct irq_cfg *cfg,
-- 
cgit v1.2.3


From 638f2f8c52a92c15ebda9e50d84c1ab56fc42e42 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 8 Jun 2009 03:37:52 -0700
Subject: x86/ioapic.c: convert __target_IO_APIC_irq to conventional for() loop

Use a normal for() loop in __target_IO_APIC_irq().

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/apic/io_apic.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 8245e62ed93..17883cd8259 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2236,13 +2236,9 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
 	struct irq_pin_list *entry;
 	u8 vector = cfg->vector;
 
-	entry = cfg->irq_2_pin;
-	for (;;) {
+	for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
 		unsigned int reg;
 
-		if (!entry)
-			break;
-
 		apic = entry->apic;
 		pin = entry->pin;
 		/*
@@ -2255,9 +2251,6 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
 		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
 		reg |= vector;
 		io_apic_modify(apic, 0x10 + pin*2, reg);
-		if (!entry->next)
-			break;
-		entry = entry->next;
 	}
 }
 
-- 
cgit v1.2.3


From e25371d60cb06a44d7a32d7966ab9bfbeacb9390 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 8 Jun 2009 03:49:01 -0700
Subject: x86/ioapic.c: unify ioapic_retrigger_irq()

The 32 and 64-bit versions of ioapic_retrigger_irq() are identical
except the 64-bit one takes vector_lock.  vector_lock is defined and
used on 32-bit too, so just use a common ioapic_retrigger_irq().

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/kernel/apic/io_apic.c | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 17883cd8259..cf51b0b58c5 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2178,7 +2178,6 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
 	return was_pending;
 }
 
-#ifdef CONFIG_X86_64
 static int ioapic_retrigger_irq(unsigned int irq)
 {
 
@@ -2191,14 +2190,6 @@ static int ioapic_retrigger_irq(unsigned int irq)
 
 	return 1;
 }
-#else
-static int ioapic_retrigger_irq(unsigned int irq)
-{
-	apic->send_IPI_self(irq_cfg(irq)->vector);
-
-	return 1;
-}
-#endif
 
 /*
  * Level and edge triggered IO-APIC interrupts need different handling,
-- 
cgit v1.2.3


From 1dacc76d0014a034b8aca14237c127d7c19d7726 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 1 Jul 2009 11:26:02 +0000
Subject: net/compat/wext: send different messages to compat tasks

Wireless extensions have the unfortunate problem that events
are multicast netlink messages, and are not independent of
pointer size. Thus, currently 32-bit tasks on 64-bit platforms
cannot properly receive events and fail with all kinds of
strange problems, for instance wpa_supplicant never notices
disassociations, due to the way the 64-bit event looks (to a
32-bit process), the fact that the address is all zeroes is
lost, it thinks instead it is 00:00:00:00:01:00.

The same problem existed with the ioctls, until David Miller
fixed those some time ago in an heroic effort.

A different problem caused by this is that we cannot send the
ASSOCREQIE/ASSOCRESPIE events because sending them causes a
32-bit wpa_supplicant on a 64-bit system to overwrite its
internal information, which is worse than it not getting the
information at all -- so we currently resort to sending a
custom string event that it then parses. This, however, has a
severe size limitation we are frequently hitting with modern
access points; this limitation would can be lifted after this
patch by sending the correct binary, not custom, event.

A similar problem apparently happens for some other netlink
users on x86_64 with 32-bit tasks due to the alignment for
64-bit quantities.

In order to fix these problems, I have implemented a way to
send compat messages to tasks. When sending an event, we send
the non-compat event data together with a compat event data in
skb_shinfo(main_skb)->frag_list. Then, when the event is read
from the socket, the netlink code makes sure to pass out only
the skb that is compatible with the task. This approach was
suggested by David Miller, my original approach required
always sending two skbs but that had various small problems.

To determine whether compat is needed or not, I have used the
MSG_CMSG_COMPAT flag, and adjusted the call path for recv and
recvfrom to include it, even if those calls do not have a cmsg
parameter.

I have not solved one small part of the problem, and I don't
think it is necessary to: if a 32-bit application uses read()
rather than any form of recvmsg() it will still get the wrong
(64-bit) event. However, neither do applications actually do
this, nor would it be a regression.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/mips/kernel/scall64-n32.S | 2 +-
 arch/mips/kernel/scall64-o32.S | 4 ++--
 arch/sparc/kernel/sys32.S      | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 15874f9812c..7c4a94f4370 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -164,7 +164,7 @@ EXPORT(sysn32_call_table)
 	PTR	sys_connect
 	PTR	sys_accept
 	PTR	sys_sendto
-	PTR	sys_recvfrom
+	PTR	compat_sys_recvfrom
 	PTR	compat_sys_sendmsg		/* 6045 */
 	PTR	compat_sys_recvmsg
 	PTR	sys_shutdown
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 781e0f1e953..821fc978673 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -378,8 +378,8 @@ sys_call_table:
 	PTR	sys_getsockname
 	PTR	sys_getsockopt
 	PTR	sys_listen
-	PTR	sys_recv			/* 4175 */
-	PTR	sys_recvfrom
+	PTR	compat_sys_recv			/* 4175 */
+	PTR	compat_sys_recvfrom
 	PTR	compat_sys_recvmsg
 	PTR	sys_send
 	PTR	compat_sys_sendmsg
diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S
index f061c4dda9e..3762f6c7894 100644
--- a/arch/sparc/kernel/sys32.S
+++ b/arch/sparc/kernel/sys32.S
@@ -121,7 +121,7 @@ SIGN2(sys32_syslog, sys_syslog, %o0, %o2)
 SIGN1(sys32_umask, sys_umask, %o0)
 SIGN3(sys32_tgkill, sys_tgkill, %o0, %o1, %o2)
 SIGN1(sys32_sendto, sys_sendto, %o0)
-SIGN1(sys32_recvfrom, sys_recvfrom, %o0)
+SIGN1(sys32_recvfrom, compat_sys_recvfrom, %o0)
 SIGN3(sys32_socket, sys_socket, %o0, %o1, %o2)
 SIGN2(sys32_connect, sys_connect, %o0, %o2)
 SIGN2(sys32_bind, sys_bind, %o0, %o2)
-- 
cgit v1.2.3


From 2a549c364aa11e658ae14b71861d25474e5808cf Mon Sep 17 00:00:00 2001
From: Sachin Sant <sachinp@in.ibm.com>
Date: Thu, 16 Jul 2009 19:58:42 +0800
Subject: crypto: s390 - Fix sha build failure

Use struct s390_sha_ctx instead of sha1/sha256_state struct to fix
s390 crypto build break.

Signed-off-by: Sachin Sant <sachinp@in.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/crypto/sha1_s390.c   | 2 +-
 arch/s390/crypto/sha256_s390.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index 2c5ec7969e3..4a943789c20 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -59,7 +59,7 @@ static int sha1_export(struct shash_desc *desc, void *out)
 
 static int sha1_import(struct shash_desc *desc, const u8 *in)
 {
-	struct sha1_state *sctx = shash_desc_ctx(desc);
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
 	struct sha1_state *ictx = in;
 
 	sctx->count = ictx->count;
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index 943a669452c..2bab5197789 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -55,7 +55,7 @@ static int sha256_export(struct shash_desc *desc, void *out)
 
 static int sha256_import(struct shash_desc *desc, const u8 *in)
 {
-	struct sha256_state *sctx = shash_desc_ctx(desc);
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
 	struct sha256_state *ictx = in;
 
 	sctx->count = ictx->count;
-- 
cgit v1.2.3


From 254e0a6bff87ab8b22293c4bd1443507df698407 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Sun, 19 Jul 2009 00:08:54 +0900
Subject: x86: Use get_desc_base()

Use get_desc_base() to get the base address in desc_struct

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
LKML-Reference: <20090718150853.GA11294@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/doublefault_32.c | 4 +---
 arch/x86/kernel/step.c           | 9 ++++-----
 2 files changed, 5 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c
index b4f14c6c09d..37250fe490b 100644
--- a/arch/x86/kernel/doublefault_32.c
+++ b/arch/x86/kernel/doublefault_32.c
@@ -27,9 +27,7 @@ static void doublefault_fn(void)
 
 	if (ptr_ok(gdt)) {
 		gdt += GDT_ENTRY_TSS << 3;
-		tss = *(u16 *)(gdt+2);
-		tss += *(u8 *)(gdt+4) << 16;
-		tss += *(u8 *)(gdt+7) << 24;
+		tss = get_desc_base((struct desc_struct *)gdt);
 		printk(KERN_EMERG "double fault, tss at %08lx\n", tss);
 
 		if (ptr_ok(tss)) {
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index e8b9863ef8c..3149032ff10 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -4,6 +4,7 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/ptrace.h>
+#include <asm/desc.h>
 
 unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs)
 {
@@ -23,7 +24,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
 	 * and APM bios ones we just ignore here.
 	 */
 	if ((seg & SEGMENT_TI_MASK) == SEGMENT_LDT) {
-		u32 *desc;
+		struct desc_struct *desc;
 		unsigned long base;
 
 		seg &= ~7UL;
@@ -33,12 +34,10 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
 			addr = -1L; /* bogus selector, access would fault */
 		else {
 			desc = child->mm->context.ldt + seg;
-			base = ((desc[0] >> 16) |
-				((desc[1] & 0xff) << 16) |
-				(desc[1] & 0xff000000));
+			base = get_desc_base(desc);
 
 			/* 16-bit code segment? */
-			if (!((desc[1] >> 22) & 1))
+			if (!desc->d)
 				addr &= 0xffff;
 			addr += base;
 		}
-- 
cgit v1.2.3


From fde0312d01b60a3fd5dc56e69a9613defbbc7097 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Sun, 19 Jul 2009 00:09:56 +0900
Subject: x86: Remove unused patch_espfix_desc()

patch_espfix_desc() is not used after commit
dc4c2a0aed3b09f6e255bd5c3faa50fe6e0b2ded

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
LKML-Reference: <20090718150955.GB11294@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/traps.h |  4 +---
 arch/x86/kernel/traps.c      | 21 ---------------------
 2 files changed, 1 insertion(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index bfd74c032fc..4da91ad69e0 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -81,9 +81,7 @@ extern int panic_on_unrecovered_nmi;
 
 void math_error(void __user *);
 void math_emulate(struct math_emu_info *);
-#ifdef CONFIG_X86_32
-unsigned long patch_espfix_desc(unsigned long, unsigned long);
-#else
+#ifndef CONFIG_X86_32
 asmlinkage void smp_thermal_interrupt(void);
 asmlinkage void mce_threshold_interrupt(void);
 #endif
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 5204332f475..23679411020 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -786,27 +786,6 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
 #endif
 }
 
-#ifdef CONFIG_X86_32
-unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
-{
-	struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id());
-	unsigned long base = (kesp - uesp) & -THREAD_SIZE;
-	unsigned long new_kesp = kesp - base;
-	unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
-	__u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
-
-	/* Set up base for espfix segment */
-	desc &= 0x00f0ff0000000000ULL;
-	desc |=	((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
-		((((__u64)base) << 32) & 0xff00000000000000ULL) |
-		((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
-		(lim_pages & 0xffff);
-	*(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
-
-	return new_kesp;
-}
-#endif
-
 asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
 {
 }
-- 
cgit v1.2.3


From 57594742a2b545f8f114cda34f15650be8ae976d Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Sun, 19 Jul 2009 00:11:06 +0900
Subject: x86: Introduce set_desc_base() and set_desc_limit()

Rename set_base()/set_limit to set_desc_base()/set_desc_limit()
and rewrite them in C. These are naturally introduced by the
idea of get_desc_base()/get_desc_limit().

The conversion actually found the bug in apm_32.c:
bad_bios_desc is written at run-time, but it is defined const
variable.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
LKML-Reference: <20090718151105.GC11294@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/desc.h           | 13 +++++++++++++
 arch/x86/include/asm/stackprotector.h |  4 +---
 arch/x86/include/asm/system.h         | 27 ---------------------------
 arch/x86/kernel/apm_32.c              | 18 +++++++++---------
 4 files changed, 23 insertions(+), 39 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index c993e9e0fed..e8de2f6f5ca 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -291,11 +291,24 @@ static inline unsigned long get_desc_base(const struct desc_struct *desc)
 	return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24);
 }
 
+static inline void set_desc_base(struct desc_struct *desc, unsigned long base)
+{
+	desc->base0 = base & 0xffff;
+	desc->base1 = (base >> 16) & 0xff;
+	desc->base2 = (base >> 24) & 0xff;
+}
+
 static inline unsigned long get_desc_limit(const struct desc_struct *desc)
 {
 	return desc->limit0 | (desc->limit << 16);
 }
 
+static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
+{
+	desc->limit0 = limit & 0xffff;
+	desc->limit = (limit >> 16) & 0xf;
+}
+
 static inline void _set_gate(int gate, unsigned type, void *addr,
 			     unsigned dpl, unsigned ist, unsigned seg)
 {
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index c2d742c6e15..cdc5e0b126a 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -90,9 +90,7 @@ static inline void setup_stack_canary_segment(int cpu)
 	struct desc_struct desc;
 
 	desc = gdt_table[GDT_ENTRY_STACK_CANARY];
-	desc.base0 = canary & 0xffff;
-	desc.base1 = (canary >> 16) & 0xff;
-	desc.base2 = (canary >> 24) & 0xff;
+	set_desc_base(&desc, canary);
 	write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S);
 #endif
 }
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 643c59b4bc6..75c49c782e2 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -150,33 +150,6 @@ do {									\
 #endif
 
 #ifdef __KERNEL__
-#define _set_base(addr, base) do { unsigned long __pr; \
-__asm__ __volatile__ ("movw %%dx,%1\n\t" \
-	"rorl $16,%%edx\n\t" \
-	"movb %%dl,%2\n\t" \
-	"movb %%dh,%3" \
-	:"=&d" (__pr) \
-	:"m" (*((addr)+2)), \
-	 "m" (*((addr)+4)), \
-	 "m" (*((addr)+7)), \
-	 "0" (base) \
-	); } while (0)
-
-#define _set_limit(addr, limit) do { unsigned long __lr; \
-__asm__ __volatile__ ("movw %%dx,%1\n\t" \
-	"rorl $16,%%edx\n\t" \
-	"movb %2,%%dh\n\t" \
-	"andb $0xf0,%%dh\n\t" \
-	"orb %%dh,%%dl\n\t" \
-	"movb %%dl,%2" \
-	:"=&d" (__lr) \
-	:"m" (*(addr)), \
-	 "m" (*((addr)+6)), \
-	 "0" (limit) \
-	); } while (0)
-
-#define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base))
-#define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1))
 
 extern void native_load_gs_index(unsigned);
 
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 79302e9a33a..b5e841bd60d 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -403,7 +403,7 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
 static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
 static struct apm_user *user_list;
 static DEFINE_SPINLOCK(user_list_lock);
-static const struct desc_struct	bad_bios_desc = { { { 0, 0x00409200 } } };
+static struct desc_struct bad_bios_desc = { { { 0, 0x00409200 } } };
 
 static const char driver_version[] = "1.16ac";	/* no spaces */
 
@@ -2337,8 +2337,8 @@ static int __init apm_init(void)
 	 * This is for buggy BIOS's that refer to (real mode) segment 0x40
 	 * even though they are called in protected mode.
 	 */
-	set_base(bad_bios_desc, __va((unsigned long)0x40 << 4));
-	_set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4));
+	set_desc_base(&bad_bios_desc, (unsigned long)__va(0x40UL << 4));
+	set_desc_limit(&bad_bios_desc, 4095 - (0x40 << 4));
 
 	/*
 	 * Set up the long jump entry point to the APM BIOS, which is called
@@ -2358,12 +2358,12 @@ static int __init apm_init(void)
 	 * code to that CPU.
 	 */
 	gdt = get_cpu_gdt_table(0);
-	set_base(gdt[APM_CS >> 3],
-		 __va((unsigned long)apm_info.bios.cseg << 4));
-	set_base(gdt[APM_CS_16 >> 3],
-		 __va((unsigned long)apm_info.bios.cseg_16 << 4));
-	set_base(gdt[APM_DS >> 3],
-		 __va((unsigned long)apm_info.bios.dseg << 4));
+	set_desc_base(&gdt[APM_CS >> 3],
+		 (unsigned long)__va((unsigned long)apm_info.bios.cseg << 4));
+	set_desc_base(&gdt[APM_CS_16 >> 3],
+		 (unsigned long)__va((unsigned long)apm_info.bios.cseg_16 << 4));
+	set_desc_base(&gdt[APM_DS >> 3],
+		 (unsigned long)__va((unsigned long)apm_info.bios.dseg << 4));
 
 	proc_create("apm", 0, NULL, &apm_file_ops);
 
-- 
cgit v1.2.3


From 4d4036e0e7299c6cbb2d2421b4b30b7a409ce61a Mon Sep 17 00:00:00 2001
From: Jason Yeh <jason.yeh@amd.com>
Date: Wed, 8 Jul 2009 13:49:38 +0200
Subject: oprofile: Implement performance counter multiplexing

The number of hardware counters is limited. The multiplexing feature
enables OProfile to gather more events than counters are provided by
the hardware. This is realized by switching between events at an user
specified time interval.

A new file (/dev/oprofile/time_slice) is added for the user to specify
the timer interval in ms. If the number of events to profile is higher
than the number of hardware counters available, the patch will
schedule a work queue that switches the event counter and re-writes
the different sets of values into it. The switching mechanism needs to
be implemented for each architecture to support multiplexing. This
patch only implements AMD CPU support, but multiplexing can be easily
extended for other models and architectures.

There are follow-on patches that rework parts of this patch.

Signed-off-by: Jason Yeh <jason.yeh@amd.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/Kconfig                      |  12 +++
 arch/x86/oprofile/nmi_int.c       | 162 ++++++++++++++++++++++++++++++++++++--
 arch/x86/oprofile/op_counter.h    |   2 +-
 arch/x86/oprofile/op_model_amd.c  | 110 ++++++++++++++++++++++----
 arch/x86/oprofile/op_model_p4.c   |   4 +
 arch/x86/oprofile/op_model_ppro.c |   2 +
 arch/x86/oprofile/op_x86_model.h  |   7 ++
 7 files changed, 279 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/Kconfig b/arch/Kconfig
index 99193b16023..beea3ccebb5 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -30,6 +30,18 @@ config OPROFILE_IBS
 
 	  If unsure, say N.
 
+config OPROFILE_EVENT_MULTIPLEX
+	bool "OProfile multiplexing support (EXPERIMENTAL)"
+	default n
+	depends on OPROFILE && X86
+	help
+	  The number of hardware counters is limited. The multiplexing
+	  feature enables OProfile to gather more events than counters
+	  are provided by the hardware. This is realized by switching
+	  between events at an user specified time interval.
+
+	  If unsure, say N.
+
 config HAVE_OPROFILE
 	bool
 
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index fca8dc94531..e54f6a0b35a 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -1,11 +1,14 @@
 /**
  * @file nmi_int.c
  *
- * @remark Copyright 2002-2008 OProfile authors
+ * @remark Copyright 2002-2009 OProfile authors
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@movementarian.org>
  * @author Robert Richter <robert.richter@amd.com>
+ * @author Barry Kasindorf <barry.kasindorf@amd.com>
+ * @author Jason Yeh <jason.yeh@amd.com>
+ * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
  */
 
 #include <linux/init.h>
@@ -24,6 +27,12 @@
 #include "op_counter.h"
 #include "op_x86_model.h"
 
+
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+DEFINE_PER_CPU(int, switch_index);
+#endif
+
+
 static struct op_x86_model_spec const *model;
 static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
 static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
@@ -31,6 +40,13 @@ static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
 /* 0 == registered but off, 1 == registered and on */
 static int nmi_enabled = 0;
 
+
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+extern atomic_t multiplex_counter;
+#endif
+
+struct op_counter_config counter_config[OP_MAX_COUNTER];
+
 /* common functions */
 
 u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
@@ -95,6 +111,11 @@ static void free_msrs(void)
 		per_cpu(cpu_msrs, i).counters = NULL;
 		kfree(per_cpu(cpu_msrs, i).controls);
 		per_cpu(cpu_msrs, i).controls = NULL;
+
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+		kfree(per_cpu(cpu_msrs, i).multiplex);
+		per_cpu(cpu_msrs, i).multiplex = NULL;
+#endif
 	}
 }
 
@@ -103,6 +124,9 @@ static int allocate_msrs(void)
 	int success = 1;
 	size_t controls_size = sizeof(struct op_msr) * model->num_controls;
 	size_t counters_size = sizeof(struct op_msr) * model->num_counters;
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+	size_t multiplex_size = sizeof(struct op_msr) * model->num_virt_counters;
+#endif
 
 	int i;
 	for_each_possible_cpu(i) {
@@ -118,6 +142,14 @@ static int allocate_msrs(void)
 			success = 0;
 			break;
 		}
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+		per_cpu(cpu_msrs, i).multiplex =
+				kmalloc(multiplex_size, GFP_KERNEL);
+		if (!per_cpu(cpu_msrs, i).multiplex) {
+			success = 0;
+			break;
+		}
+#endif
 	}
 
 	if (!success)
@@ -126,6 +158,25 @@ static int allocate_msrs(void)
 	return success;
 }
 
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+static void nmi_setup_cpu_mux(struct op_msrs const * const msrs)
+{
+	int i;
+	struct op_msr *multiplex = msrs->multiplex;
+
+	for (i = 0; i < model->num_virt_counters; ++i) {
+		if (counter_config[i].enabled) {
+			multiplex[i].saved = -(u64)counter_config[i].count;
+		} else {
+			multiplex[i].addr  = 0;
+			multiplex[i].saved = 0;
+		}
+	}
+}
+
+#endif
+
 static void nmi_cpu_setup(void *dummy)
 {
 	int cpu = smp_processor_id();
@@ -133,6 +184,9 @@ static void nmi_cpu_setup(void *dummy)
 	nmi_cpu_save_registers(msrs);
 	spin_lock(&oprofilefs_lock);
 	model->setup_ctrs(model, msrs);
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+	nmi_setup_cpu_mux(msrs);
+#endif
 	spin_unlock(&oprofilefs_lock);
 	per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
@@ -173,14 +227,52 @@ static int nmi_setup(void)
 			memcpy(per_cpu(cpu_msrs, cpu).controls,
 				per_cpu(cpu_msrs, 0).controls,
 				sizeof(struct op_msr) * model->num_controls);
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+			memcpy(per_cpu(cpu_msrs, cpu).multiplex,
+				per_cpu(cpu_msrs, 0).multiplex,
+				sizeof(struct op_msr) * model->num_virt_counters);
+#endif
 		}
-
 	}
 	on_each_cpu(nmi_cpu_setup, NULL, 1);
 	nmi_enabled = 1;
 	return 0;
 }
 
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs)
+{
+	unsigned int si = __get_cpu_var(switch_index);
+	struct op_msr *multiplex = msrs->multiplex;
+	unsigned int i;
+
+	for (i = 0; i < model->num_counters; ++i) {
+		int offset = i + si;
+		if (multiplex[offset].addr) {
+			rdmsrl(multiplex[offset].addr,
+			       multiplex[offset].saved);
+		}
+	}
+}
+
+static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs)
+{
+	unsigned int si = __get_cpu_var(switch_index);
+	struct op_msr *multiplex = msrs->multiplex;
+	unsigned int i;
+
+	for (i = 0; i < model->num_counters; ++i) {
+		int offset = i + si;
+		if (multiplex[offset].addr) {
+			wrmsrl(multiplex[offset].addr,
+			       multiplex[offset].saved);
+		}
+	}
+}
+
+#endif
+
 static void nmi_cpu_restore_registers(struct op_msrs *msrs)
 {
 	struct op_msr *counters = msrs->counters;
@@ -214,6 +306,9 @@ static void nmi_cpu_shutdown(void *dummy)
 	apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
 	apic_write(APIC_LVTERR, v);
 	nmi_cpu_restore_registers(msrs);
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+	__get_cpu_var(switch_index) = 0;
+#endif
 }
 
 static void nmi_shutdown(void)
@@ -252,16 +347,15 @@ static void nmi_stop(void)
 	on_each_cpu(nmi_cpu_stop, NULL, 1);
 }
 
-struct op_counter_config counter_config[OP_MAX_COUNTER];
-
 static int nmi_create_files(struct super_block *sb, struct dentry *root)
 {
 	unsigned int i;
 
-	for (i = 0; i < model->num_counters; ++i) {
+	for (i = 0; i < model->num_virt_counters; ++i) {
 		struct dentry *dir;
 		char buf[4];
 
+#ifndef CONFIG_OPROFILE_EVENT_MULTIPLEX
 		/* quick little hack to _not_ expose a counter if it is not
 		 * available for use.  This should protect userspace app.
 		 * NOTE:  assumes 1:1 mapping here (that counters are organized
@@ -269,6 +363,7 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root)
 		 */
 		if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
 			continue;
+#endif /* CONFIG_OPROFILE_EVENT_MULTIPLEX */
 
 		snprintf(buf,  sizeof(buf), "%d", i);
 		dir = oprofilefs_mkdir(sb, root, buf);
@@ -283,6 +378,57 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root)
 	return 0;
 }
 
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+static void nmi_cpu_switch(void *dummy)
+{
+	int cpu = smp_processor_id();
+	int si = per_cpu(switch_index, cpu);
+	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
+
+	nmi_cpu_stop(NULL);
+	nmi_cpu_save_mpx_registers(msrs);
+
+	/* move to next set */
+	si += model->num_counters;
+	if ((si > model->num_virt_counters) || (counter_config[si].count == 0))
+		per_cpu(switch_index, cpu) = 0;
+	else
+		per_cpu(switch_index, cpu) = si;
+
+	model->switch_ctrl(model, msrs);
+	nmi_cpu_restore_mpx_registers(msrs);
+
+	nmi_cpu_start(NULL);
+}
+
+
+/*
+ * Quick check to see if multiplexing is necessary.
+ * The check should be sufficient since counters are used
+ * in ordre.
+ */
+static int nmi_multiplex_on(void)
+{
+	return counter_config[model->num_counters].count ? 0 : -EINVAL;
+}
+
+static int nmi_switch_event(void)
+{
+	if (!model->switch_ctrl)
+		return -ENOSYS;		/* not implemented */
+	if (nmi_multiplex_on() < 0)
+		return -EINVAL;		/* not necessary */
+
+	on_each_cpu(nmi_cpu_switch, NULL, 1);
+
+	atomic_inc(&multiplex_counter);
+
+	return 0;
+}
+
+#endif
+
 #ifdef CONFIG_SMP
 static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
 				 void *data)
@@ -516,12 +662,18 @@ int __init op_nmi_init(struct oprofile_operations *ops)
 	register_cpu_notifier(&oprofile_cpu_nb);
 #endif
 	/* default values, can be overwritten by model */
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+	__raw_get_cpu_var(switch_index) = 0;
+#endif
 	ops->create_files	= nmi_create_files;
 	ops->setup		= nmi_setup;
 	ops->shutdown		= nmi_shutdown;
 	ops->start		= nmi_start;
 	ops->stop		= nmi_stop;
 	ops->cpu_type		= cpu_type;
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+	ops->switch_events	= nmi_switch_event;
+#endif
 
 	if (model->init)
 		ret = model->init(ops);
diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h
index 91b6a116165..e28398df0df 100644
--- a/arch/x86/oprofile/op_counter.h
+++ b/arch/x86/oprofile/op_counter.h
@@ -10,7 +10,7 @@
 #ifndef OP_COUNTER_H
 #define OP_COUNTER_H
 
-#define OP_MAX_COUNTER 8
+#define OP_MAX_COUNTER 32
 
 /* Per-perfctr configuration as set via
  * oprofilefs.
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index f676f8825a3..fdbed3a0c87 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -9,12 +9,15 @@
  * @author Philippe Elie
  * @author Graydon Hoare
  * @author Robert Richter <robert.richter@amd.com>
- * @author Barry Kasindorf
+ * @author Barry Kasindorf <barry.kasindorf@amd.com>
+ * @author Jason Yeh <jason.yeh@amd.com>
+ * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
  */
 
 #include <linux/oprofile.h>
 #include <linux/device.h>
 #include <linux/pci.h>
+#include <linux/percpu.h>
 
 #include <asm/ptrace.h>
 #include <asm/msr.h>
@@ -25,12 +28,23 @@
 
 #define NUM_COUNTERS 4
 #define NUM_CONTROLS 4
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+#define NUM_VIRT_COUNTERS 32
+#define NUM_VIRT_CONTROLS 32
+#else
+#define NUM_VIRT_COUNTERS NUM_COUNTERS
+#define NUM_VIRT_CONTROLS NUM_CONTROLS
+#endif
+
 #define OP_EVENT_MASK			0x0FFF
 #define OP_CTR_OVERFLOW			(1ULL<<31)
 
 #define MSR_AMD_EVENTSEL_RESERVED	((0xFFFFFCF0ULL<<32)|(1ULL<<21))
 
-static unsigned long reset_value[NUM_COUNTERS];
+static unsigned long reset_value[NUM_VIRT_COUNTERS];
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+DECLARE_PER_CPU(int, switch_index);
+#endif
 
 #ifdef CONFIG_OPROFILE_IBS
 
@@ -82,6 +96,16 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
 		else
 			msrs->controls[i].addr = 0;
 	}
+
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+	for (i = 0; i < NUM_VIRT_COUNTERS; i++) {
+		int hw_counter = i % NUM_CONTROLS;
+		if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
+			msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter;
+		else
+			msrs->multiplex[i].addr = 0;
+	}
+#endif
 }
 
 static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
@@ -90,6 +114,15 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 	u64 val;
 	int i;
 
+	/* setup reset_value */
+	for (i = 0; i < NUM_VIRT_COUNTERS; ++i) {
+		if (counter_config[i].enabled) {
+			reset_value[i] = counter_config[i].count;
+		} else {
+			reset_value[i] = 0;
+		}
+	}
+
 	/* clear all counters */
 	for (i = 0; i < NUM_CONTROLS; ++i) {
 		if (unlikely(!msrs->controls[i].addr))
@@ -108,20 +141,49 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 
 	/* enable active counters */
 	for (i = 0; i < NUM_COUNTERS; ++i) {
-		if (counter_config[i].enabled && msrs->counters[i].addr) {
-			reset_value[i] = counter_config[i].count;
-			wrmsrl(msrs->counters[i].addr,
-			       -(u64)counter_config[i].count);
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+		int offset = i + __get_cpu_var(switch_index);
+#else
+		int offset = i;
+#endif
+		if (counter_config[offset].enabled && msrs->counters[i].addr) {
+			/* setup counter registers */
+			wrmsrl(msrs->counters[i].addr, -(u64)reset_value[offset]);
+
+			/* setup control registers */
 			rdmsrl(msrs->controls[i].addr, val);
 			val &= model->reserved;
-			val |= op_x86_get_ctrl(model, &counter_config[i]);
+			val |= op_x86_get_ctrl(model, &counter_config[offset]);
+			wrmsrl(msrs->controls[i].addr, val);
+		}
+	}
+}
+
+
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+static void op_amd_switch_ctrl(struct op_x86_model_spec const *model,
+			       struct op_msrs const * const msrs)
+{
+	u64 val;
+	int i;
+
+	/* enable active counters */
+	for (i = 0; i < NUM_COUNTERS; ++i) {
+		int offset = i + __get_cpu_var(switch_index);
+		if (counter_config[offset].enabled) {
+			/* setup control registers */
+			rdmsrl(msrs->controls[i].addr, val);
+			val &= model->reserved;
+			val |= op_x86_get_ctrl(model, &counter_config[offset]);
 			wrmsrl(msrs->controls[i].addr, val);
-		} else {
-			reset_value[i] = 0;
 		}
 	}
 }
 
+#endif
+
+
 #ifdef CONFIG_OPROFILE_IBS
 
 static inline int
@@ -230,14 +292,19 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
 	int i;
 
 	for (i = 0; i < NUM_COUNTERS; ++i) {
-		if (!reset_value[i])
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+		int offset = i + __get_cpu_var(switch_index);
+#else
+		int offset = i;
+#endif
+		if (!reset_value[offset])
 			continue;
 		rdmsrl(msrs->counters[i].addr, val);
 		/* bit is clear if overflowed: */
 		if (val & OP_CTR_OVERFLOW)
 			continue;
-		oprofile_add_sample(regs, i);
-		wrmsrl(msrs->counters[i].addr, -(u64)reset_value[i]);
+		oprofile_add_sample(regs, offset);
+		wrmsrl(msrs->counters[i].addr, -(u64)reset_value[offset]);
 	}
 
 	op_amd_handle_ibs(regs, msrs);
@@ -250,8 +317,14 @@ static void op_amd_start(struct op_msrs const * const msrs)
 {
 	u64 val;
 	int i;
+
 	for (i = 0; i < NUM_COUNTERS; ++i) {
-		if (reset_value[i]) {
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+		int offset = i + __get_cpu_var(switch_index);
+#else
+		int offset = i;
+#endif
+		if (reset_value[offset]) {
 			rdmsrl(msrs->controls[i].addr, val);
 			val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
 			wrmsrl(msrs->controls[i].addr, val);
@@ -271,7 +344,11 @@ static void op_amd_stop(struct op_msrs const * const msrs)
 	 * pm callback
 	 */
 	for (i = 0; i < NUM_COUNTERS; ++i) {
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+		if (!reset_value[i + per_cpu(switch_index, smp_processor_id())])
+#else
 		if (!reset_value[i])
+#endif
 			continue;
 		rdmsrl(msrs->controls[i].addr, val);
 		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
@@ -289,7 +366,7 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
 		if (msrs->counters[i].addr)
 			release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
 	}
-	for (i = 0; i < NUM_CONTROLS; ++i) {
+	for (i = 0; i < NUM_COUNTERS; ++i) {
 		if (msrs->controls[i].addr)
 			release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
 	}
@@ -463,6 +540,8 @@ static void op_amd_exit(void) {}
 struct op_x86_model_spec const op_amd_spec = {
 	.num_counters		= NUM_COUNTERS,
 	.num_controls		= NUM_CONTROLS,
+	.num_virt_counters	= NUM_VIRT_COUNTERS,
+	.num_virt_controls	= NUM_VIRT_CONTROLS,
 	.reserved		= MSR_AMD_EVENTSEL_RESERVED,
 	.event_mask		= OP_EVENT_MASK,
 	.init			= op_amd_init,
@@ -473,4 +552,7 @@ struct op_x86_model_spec const op_amd_spec = {
 	.start			= &op_amd_start,
 	.stop			= &op_amd_stop,
 	.shutdown		= &op_amd_shutdown,
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+	.switch_ctrl		= &op_amd_switch_ctrl,
+#endif
 };
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 5921b7fc724..65b9237cde8 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -698,6 +698,8 @@ static void p4_shutdown(struct op_msrs const * const msrs)
 struct op_x86_model_spec const op_p4_ht2_spec = {
 	.num_counters		= NUM_COUNTERS_HT2,
 	.num_controls		= NUM_CONTROLS_HT2,
+	.num_virt_counters	= NUM_COUNTERS_HT2,
+	.num_virt_controls	= NUM_CONTROLS_HT2,
 	.fill_in_addresses	= &p4_fill_in_addresses,
 	.setup_ctrs		= &p4_setup_ctrs,
 	.check_ctrs		= &p4_check_ctrs,
@@ -710,6 +712,8 @@ struct op_x86_model_spec const op_p4_ht2_spec = {
 struct op_x86_model_spec const op_p4_spec = {
 	.num_counters		= NUM_COUNTERS_NON_HT,
 	.num_controls		= NUM_CONTROLS_NON_HT,
+	.num_virt_counters	= NUM_COUNTERS_NON_HT,
+	.num_virt_controls	= NUM_CONTROLS_NON_HT,
 	.fill_in_addresses	= &p4_fill_in_addresses,
 	.setup_ctrs		= &p4_setup_ctrs,
 	.check_ctrs		= &p4_check_ctrs,
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 570d717c330..098cbca5c0b 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -206,6 +206,8 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
 struct op_x86_model_spec const op_ppro_spec = {
 	.num_counters		= 2,
 	.num_controls		= 2,
+	.num_virt_counters	= 2,
+	.num_virt_controls	= 2,
 	.reserved		= MSR_PPRO_EVENTSEL_RESERVED,
 	.fill_in_addresses	= &ppro_fill_in_addresses,
 	.setup_ctrs		= &ppro_setup_ctrs,
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 505489873b9..0d07d23cb06 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -23,6 +23,7 @@ struct op_msr {
 struct op_msrs {
 	struct op_msr *counters;
 	struct op_msr *controls;
+	struct op_msr *multiplex;
 };
 
 struct pt_regs;
@@ -35,6 +36,8 @@ struct oprofile_operations;
 struct op_x86_model_spec {
 	unsigned int	num_counters;
 	unsigned int	num_controls;
+	unsigned int	num_virt_counters;
+	unsigned int	num_virt_controls;
 	u64		reserved;
 	u16		event_mask;
 	int		(*init)(struct oprofile_operations *ops);
@@ -47,6 +50,10 @@ struct op_x86_model_spec {
 	void		(*start)(struct op_msrs const * const msrs);
 	void		(*stop)(struct op_msrs const * const msrs);
 	void		(*shutdown)(struct op_msrs const * const msrs);
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+	void		(*switch_ctrl)(struct op_x86_model_spec const *model,
+				       struct op_msrs const * const msrs);
+#endif
 };
 
 struct op_counter_config;
-- 
cgit v1.2.3


From 5e766e3e433fa2d5d2fdfd8e2432804c91393387 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 8 Jul 2009 14:54:17 +0200
Subject: x86/oprofile: Fix usage of NUM_CONTROLS/NUM_COUNTERS macros

Use the corresponding macros when iterating over counter and control
registers. Since NUM_CONTROLS and NUM_COUNTERS are equal for AMD cpus
the fix is more a cosmetical change.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index fdbed3a0c87..dcfd4505cac 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -99,7 +99,7 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
 
 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
 	for (i = 0; i < NUM_VIRT_COUNTERS; i++) {
-		int hw_counter = i % NUM_CONTROLS;
+		int hw_counter = i % NUM_COUNTERS;
 		if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
 			msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter;
 		else
@@ -366,7 +366,7 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
 		if (msrs->counters[i].addr)
 			release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
 	}
-	for (i = 0; i < NUM_COUNTERS; ++i) {
+	for (i = 0; i < NUM_CONTROLS; ++i) {
 		if (msrs->controls[i].addr)
 			release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
 	}
-- 
cgit v1.2.3


From 82a225283fb0d9438549595d9e6f3ecc42b42ad6 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 9 Jul 2009 16:29:34 +0200
Subject: x86/oprofile: Use per_cpu() instead of __get_cpu_var()

__get_cpu_var() calls smp_processor_id(). When the cpu id is already
known, instead use per_cpu() to avoid generating the id again.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index e54f6a0b35a..8cd4658370b 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -294,7 +294,7 @@ static void nmi_cpu_shutdown(void *dummy)
 {
 	unsigned int v;
 	int cpu = smp_processor_id();
-	struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
+	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
 
 	/* restoring APIC_LVTPC can trigger an apic error because the delivery
 	 * mode and vector nr combination can be illegal. That's by design: on
@@ -307,7 +307,7 @@ static void nmi_cpu_shutdown(void *dummy)
 	apic_write(APIC_LVTERR, v);
 	nmi_cpu_restore_registers(msrs);
 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-	__get_cpu_var(switch_index) = 0;
+	per_cpu(switch_index, cpu) = 0;
 #endif
 }
 
-- 
cgit v1.2.3


From 6bfccd099c2841e1c42530f1b6d2553bfa13be3a Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 9 Jul 2009 19:23:50 +0200
Subject: x86/oprofile: Fix initialization of switch_index

Variable switch_index must be initialized for each cpu. This patch
fixes the initialization by moving it to the per-cpu init function
nmi_cpu_setup().

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 8cd4658370b..b211d335e07 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -160,7 +160,7 @@ static int allocate_msrs(void)
 
 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
 
-static void nmi_setup_cpu_mux(struct op_msrs const * const msrs)
+static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs)
 {
 	int i;
 	struct op_msr *multiplex = msrs->multiplex;
@@ -173,8 +173,15 @@ static void nmi_setup_cpu_mux(struct op_msrs const * const msrs)
 			multiplex[i].saved = 0;
 		}
 	}
+
+	per_cpu(switch_index, cpu) = 0;
 }
 
+#else
+
+static inline void
+nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) { }
+
 #endif
 
 static void nmi_cpu_setup(void *dummy)
@@ -184,9 +191,7 @@ static void nmi_cpu_setup(void *dummy)
 	nmi_cpu_save_registers(msrs);
 	spin_lock(&oprofilefs_lock);
 	model->setup_ctrs(model, msrs);
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-	nmi_setup_cpu_mux(msrs);
-#endif
+	nmi_cpu_setup_mux(cpu, msrs);
 	spin_unlock(&oprofilefs_lock);
 	per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
@@ -662,9 +667,6 @@ int __init op_nmi_init(struct oprofile_operations *ops)
 	register_cpu_notifier(&oprofile_cpu_nb);
 #endif
 	/* default values, can be overwritten by model */
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-	__raw_get_cpu_var(switch_index) = 0;
-#endif
 	ops->create_files	= nmi_create_files;
 	ops->setup		= nmi_setup;
 	ops->shutdown		= nmi_shutdown;
-- 
cgit v1.2.3


From d8471ad3ab613a1ba7abd3aad46659de39a2871c Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 16 Jul 2009 13:04:43 +0200
Subject: oprofile: Introduce op_x86_phys_to_virt()

This new function translates physical to virtual counter numbers.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c      | 43 +++++++++++----------
 arch/x86/oprofile/op_model_amd.c | 80 ++++++++++++++++------------------------
 arch/x86/oprofile/op_x86_model.h |  1 +
 3 files changed, 55 insertions(+), 69 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index b211d335e07..02b57b8d0e6 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -27,12 +27,6 @@
 #include "op_counter.h"
 #include "op_x86_model.h"
 
-
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-DEFINE_PER_CPU(int, switch_index);
-#endif
-
-
 static struct op_x86_model_spec const *model;
 static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
 static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
@@ -103,6 +97,21 @@ static void nmi_cpu_save_registers(struct op_msrs *msrs)
 	}
 }
 
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+static DEFINE_PER_CPU(int, switch_index);
+
+inline int op_x86_phys_to_virt(int phys)
+{
+	return __get_cpu_var(switch_index) + phys;
+}
+
+#else
+
+inline int op_x86_phys_to_virt(int phys) { return phys; }
+
+#endif
+
 static void free_msrs(void)
 {
 	int i;
@@ -248,31 +257,25 @@ static int nmi_setup(void)
 
 static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs)
 {
-	unsigned int si = __get_cpu_var(switch_index);
 	struct op_msr *multiplex = msrs->multiplex;
-	unsigned int i;
+	int i;
 
 	for (i = 0; i < model->num_counters; ++i) {
-		int offset = i + si;
-		if (multiplex[offset].addr) {
-			rdmsrl(multiplex[offset].addr,
-			       multiplex[offset].saved);
-		}
+		int virt = op_x86_phys_to_virt(i);
+		if (multiplex[virt].addr)
+			rdmsrl(multiplex[virt].addr, multiplex[virt].saved);
 	}
 }
 
 static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs)
 {
-	unsigned int si = __get_cpu_var(switch_index);
 	struct op_msr *multiplex = msrs->multiplex;
-	unsigned int i;
+	int i;
 
 	for (i = 0; i < model->num_counters; ++i) {
-		int offset = i + si;
-		if (multiplex[offset].addr) {
-			wrmsrl(multiplex[offset].addr,
-			       multiplex[offset].saved);
-		}
+		int virt = op_x86_phys_to_virt(i);
+		if (multiplex[virt].addr)
+			wrmsrl(multiplex[virt].addr, multiplex[virt].saved);
 	}
 }
 
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index dcfd4505cac..67f830d12e0 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -42,9 +42,6 @@
 #define MSR_AMD_EVENTSEL_RESERVED	((0xFFFFFCF0ULL<<32)|(1ULL<<21))
 
 static unsigned long reset_value[NUM_VIRT_COUNTERS];
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-DECLARE_PER_CPU(int, switch_index);
-#endif
 
 #ifdef CONFIG_OPROFILE_IBS
 
@@ -141,21 +138,20 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 
 	/* enable active counters */
 	for (i = 0; i < NUM_COUNTERS; ++i) {
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-		int offset = i + __get_cpu_var(switch_index);
-#else
-		int offset = i;
-#endif
-		if (counter_config[offset].enabled && msrs->counters[i].addr) {
-			/* setup counter registers */
-			wrmsrl(msrs->counters[i].addr, -(u64)reset_value[offset]);
-
-			/* setup control registers */
-			rdmsrl(msrs->controls[i].addr, val);
-			val &= model->reserved;
-			val |= op_x86_get_ctrl(model, &counter_config[offset]);
-			wrmsrl(msrs->controls[i].addr, val);
-		}
+		int virt = op_x86_phys_to_virt(i);
+		if (!counter_config[virt].enabled)
+			continue;
+		if (!msrs->counters[i].addr)
+			continue;
+
+		/* setup counter registers */
+		wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
+
+		/* setup control registers */
+		rdmsrl(msrs->controls[i].addr, val);
+		val &= model->reserved;
+		val |= op_x86_get_ctrl(model, &counter_config[virt]);
+		wrmsrl(msrs->controls[i].addr, val);
 	}
 }
 
@@ -170,14 +166,13 @@ static void op_amd_switch_ctrl(struct op_x86_model_spec const *model,
 
 	/* enable active counters */
 	for (i = 0; i < NUM_COUNTERS; ++i) {
-		int offset = i + __get_cpu_var(switch_index);
-		if (counter_config[offset].enabled) {
-			/* setup control registers */
-			rdmsrl(msrs->controls[i].addr, val);
-			val &= model->reserved;
-			val |= op_x86_get_ctrl(model, &counter_config[offset]);
-			wrmsrl(msrs->controls[i].addr, val);
-		}
+		int virt = op_x86_phys_to_virt(i);
+		if (!counter_config[virt].enabled)
+			continue;
+		rdmsrl(msrs->controls[i].addr, val);
+		val &= model->reserved;
+		val |= op_x86_get_ctrl(model, &counter_config[virt]);
+		wrmsrl(msrs->controls[i].addr, val);
 	}
 }
 
@@ -292,19 +287,15 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
 	int i;
 
 	for (i = 0; i < NUM_COUNTERS; ++i) {
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-		int offset = i + __get_cpu_var(switch_index);
-#else
-		int offset = i;
-#endif
-		if (!reset_value[offset])
+		int virt = op_x86_phys_to_virt(i);
+		if (!reset_value[virt])
 			continue;
 		rdmsrl(msrs->counters[i].addr, val);
 		/* bit is clear if overflowed: */
 		if (val & OP_CTR_OVERFLOW)
 			continue;
-		oprofile_add_sample(regs, offset);
-		wrmsrl(msrs->counters[i].addr, -(u64)reset_value[offset]);
+		oprofile_add_sample(regs, virt);
+		wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
 	}
 
 	op_amd_handle_ibs(regs, msrs);
@@ -319,16 +310,11 @@ static void op_amd_start(struct op_msrs const * const msrs)
 	int i;
 
 	for (i = 0; i < NUM_COUNTERS; ++i) {
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-		int offset = i + __get_cpu_var(switch_index);
-#else
-		int offset = i;
-#endif
-		if (reset_value[offset]) {
-			rdmsrl(msrs->controls[i].addr, val);
-			val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-			wrmsrl(msrs->controls[i].addr, val);
-		}
+		if (!reset_value[op_x86_phys_to_virt(i)])
+			continue;
+		rdmsrl(msrs->controls[i].addr, val);
+		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+		wrmsrl(msrs->controls[i].addr, val);
 	}
 
 	op_amd_start_ibs();
@@ -344,11 +330,7 @@ static void op_amd_stop(struct op_msrs const * const msrs)
 	 * pm callback
 	 */
 	for (i = 0; i < NUM_COUNTERS; ++i) {
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-		if (!reset_value[i + per_cpu(switch_index, smp_processor_id())])
-#else
-		if (!reset_value[i])
-#endif
+		if (!reset_value[op_x86_phys_to_virt(i)])
 			continue;
 		rdmsrl(msrs->controls[i].addr, val);
 		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 0d07d23cb06..e874dc3565a 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -60,6 +60,7 @@ struct op_counter_config;
 
 extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
 			   struct op_counter_config *counter_config);
+extern int op_x86_phys_to_virt(int phys);
 
 extern struct op_x86_model_spec const op_ppro_spec;
 extern struct op_x86_model_spec const op_p4_spec;
-- 
cgit v1.2.3


From 7e7478c6bc0e011d2854b21f190cc3a1dba89905 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 16 Jul 2009 13:09:53 +0200
Subject: oprofile: Grouping multiplexing code in op_model_amd.c

This patch moves some multiplexing code to the new function
op_mux_fill_in_addresses(). Also, the whole multiplexing code is now
at a single location.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 75 ++++++++++++++++++++++------------------
 1 file changed, 41 insertions(+), 34 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 67f830d12e0..644980f0392 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -74,6 +74,45 @@ static struct op_ibs_config ibs_config;
 
 #endif
 
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+static void op_mux_fill_in_addresses(struct op_msrs * const msrs)
+{
+	int i;
+
+	for (i = 0; i < NUM_VIRT_COUNTERS; i++) {
+		int hw_counter = i % NUM_COUNTERS;
+		if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
+			msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter;
+		else
+			msrs->multiplex[i].addr = 0;
+	}
+}
+
+static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
+			       struct op_msrs const * const msrs)
+{
+	u64 val;
+	int i;
+
+	/* enable active counters */
+	for (i = 0; i < NUM_COUNTERS; ++i) {
+		int virt = op_x86_phys_to_virt(i);
+		if (!counter_config[virt].enabled)
+			continue;
+		rdmsrl(msrs->controls[i].addr, val);
+		val &= model->reserved;
+		val |= op_x86_get_ctrl(model, &counter_config[virt]);
+		wrmsrl(msrs->controls[i].addr, val);
+	}
+}
+
+#else
+
+static inline void op_mux_fill_in_addresses(struct op_msrs * const msrs) { }
+
+#endif
+
 /* functions for op_amd_spec */
 
 static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
@@ -94,15 +133,7 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
 			msrs->controls[i].addr = 0;
 	}
 
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-	for (i = 0; i < NUM_VIRT_COUNTERS; i++) {
-		int hw_counter = i % NUM_COUNTERS;
-		if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
-			msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter;
-		else
-			msrs->multiplex[i].addr = 0;
-	}
-#endif
+	op_mux_fill_in_addresses(msrs);
 }
 
 static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
@@ -155,30 +186,6 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 	}
 }
 
-
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-
-static void op_amd_switch_ctrl(struct op_x86_model_spec const *model,
-			       struct op_msrs const * const msrs)
-{
-	u64 val;
-	int i;
-
-	/* enable active counters */
-	for (i = 0; i < NUM_COUNTERS; ++i) {
-		int virt = op_x86_phys_to_virt(i);
-		if (!counter_config[virt].enabled)
-			continue;
-		rdmsrl(msrs->controls[i].addr, val);
-		val &= model->reserved;
-		val |= op_x86_get_ctrl(model, &counter_config[virt]);
-		wrmsrl(msrs->controls[i].addr, val);
-	}
-}
-
-#endif
-
-
 #ifdef CONFIG_OPROFILE_IBS
 
 static inline int
@@ -535,6 +542,6 @@ struct op_x86_model_spec const op_amd_spec = {
 	.stop			= &op_amd_stop,
 	.shutdown		= &op_amd_shutdown,
 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-	.switch_ctrl		= &op_amd_switch_ctrl,
+	.switch_ctrl		= &op_mux_switch_ctrl,
 #endif
 };
-- 
cgit v1.2.3


From 6ab82f958a5dca591a6ea17a3ca6f2aca06f4f2f Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 9 Jul 2009 14:40:04 +0200
Subject: x86/oprofile: Implement multiplexing setup/shutdown functions

This patch implements nmi_setup_mux() and nmi_shutdown_mux() functions
to setup/shutdown multiplexing. Multiplexing code in nmi_int.c is now
much more separated.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c | 76 ++++++++++++++++++++++++---------------------
 1 file changed, 40 insertions(+), 36 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 02b57b8d0e6..674fa37d150 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -106,9 +106,35 @@ inline int op_x86_phys_to_virt(int phys)
 	return __get_cpu_var(switch_index) + phys;
 }
 
+static void nmi_shutdown_mux(void)
+{
+	int i;
+	for_each_possible_cpu(i) {
+		kfree(per_cpu(cpu_msrs, i).multiplex);
+		per_cpu(cpu_msrs, i).multiplex = NULL;
+		per_cpu(switch_index, i) = 0;
+	}
+}
+
+static int nmi_setup_mux(void)
+{
+	size_t multiplex_size =
+		sizeof(struct op_msr) * model->num_virt_counters;
+	int i;
+	for_each_possible_cpu(i) {
+		per_cpu(cpu_msrs, i).multiplex =
+			kmalloc(multiplex_size, GFP_KERNEL);
+		if (!per_cpu(cpu_msrs, i).multiplex)
+			return 0;
+	}
+	return 1;
+}
+
 #else
 
 inline int op_x86_phys_to_virt(int phys) { return phys; }
+static inline void nmi_shutdown_mux(void) { }
+static inline int nmi_setup_mux(void) { return 1; }
 
 #endif
 
@@ -120,51 +146,27 @@ static void free_msrs(void)
 		per_cpu(cpu_msrs, i).counters = NULL;
 		kfree(per_cpu(cpu_msrs, i).controls);
 		per_cpu(cpu_msrs, i).controls = NULL;
-
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-		kfree(per_cpu(cpu_msrs, i).multiplex);
-		per_cpu(cpu_msrs, i).multiplex = NULL;
-#endif
 	}
 }
 
 static int allocate_msrs(void)
 {
-	int success = 1;
 	size_t controls_size = sizeof(struct op_msr) * model->num_controls;
 	size_t counters_size = sizeof(struct op_msr) * model->num_counters;
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-	size_t multiplex_size = sizeof(struct op_msr) * model->num_virt_counters;
-#endif
 
 	int i;
 	for_each_possible_cpu(i) {
 		per_cpu(cpu_msrs, i).counters = kmalloc(counters_size,
-								GFP_KERNEL);
-		if (!per_cpu(cpu_msrs, i).counters) {
-			success = 0;
-			break;
-		}
+							GFP_KERNEL);
+		if (!per_cpu(cpu_msrs, i).counters)
+			return 0;
 		per_cpu(cpu_msrs, i).controls = kmalloc(controls_size,
-								GFP_KERNEL);
-		if (!per_cpu(cpu_msrs, i).controls) {
-			success = 0;
-			break;
-		}
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-		per_cpu(cpu_msrs, i).multiplex =
-				kmalloc(multiplex_size, GFP_KERNEL);
-		if (!per_cpu(cpu_msrs, i).multiplex) {
-			success = 0;
-			break;
-		}
-#endif
+							GFP_KERNEL);
+		if (!per_cpu(cpu_msrs, i).controls)
+			return 0;
 	}
 
-	if (!success)
-		free_msrs();
-
-	return success;
+	return 1;
 }
 
 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
@@ -218,11 +220,15 @@ static int nmi_setup(void)
 	int cpu;
 
 	if (!allocate_msrs())
-		return -ENOMEM;
+		err = -ENOMEM;
+	else if (!nmi_setup_mux())
+		err = -ENOMEM;
+	else
+		err = register_die_notifier(&profile_exceptions_nb);
 
-	err = register_die_notifier(&profile_exceptions_nb);
 	if (err) {
 		free_msrs();
+		nmi_shutdown_mux();
 		return err;
 	}
 
@@ -314,9 +320,6 @@ static void nmi_cpu_shutdown(void *dummy)
 	apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
 	apic_write(APIC_LVTERR, v);
 	nmi_cpu_restore_registers(msrs);
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-	per_cpu(switch_index, cpu) = 0;
-#endif
 }
 
 static void nmi_shutdown(void)
@@ -326,6 +329,7 @@ static void nmi_shutdown(void)
 	nmi_enabled = 0;
 	on_each_cpu(nmi_cpu_shutdown, NULL, 1);
 	unregister_die_notifier(&profile_exceptions_nb);
+	nmi_shutdown_mux();
 	msrs = &get_cpu_var(cpu_msrs);
 	model->shutdown(msrs);
 	free_msrs();
-- 
cgit v1.2.3


From 48fb4b46712c7d3e8adc79826311abd9ccbf7f1d Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 9 Jul 2009 14:38:49 +0200
Subject: x86/oprofile: Moving nmi_setup_cpu_mux() in nmi_int.c

This patch moves some code in nmi_int.c to get a single separate
multiplexing code section.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c | 45 +++++++++++++++++++--------------------------
 1 file changed, 19 insertions(+), 26 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 674fa37d150..b1edfc922e7 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -130,11 +130,30 @@ static int nmi_setup_mux(void)
 	return 1;
 }
 
+static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs)
+{
+	int i;
+	struct op_msr *multiplex = msrs->multiplex;
+
+	for (i = 0; i < model->num_virt_counters; ++i) {
+		if (counter_config[i].enabled) {
+			multiplex[i].saved = -(u64)counter_config[i].count;
+		} else {
+			multiplex[i].addr  = 0;
+			multiplex[i].saved = 0;
+		}
+	}
+
+	per_cpu(switch_index, cpu) = 0;
+}
+
 #else
 
 inline int op_x86_phys_to_virt(int phys) { return phys; }
 static inline void nmi_shutdown_mux(void) { }
 static inline int nmi_setup_mux(void) { return 1; }
+static inline void
+nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) { }
 
 #endif
 
@@ -169,32 +188,6 @@ static int allocate_msrs(void)
 	return 1;
 }
 
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-
-static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs)
-{
-	int i;
-	struct op_msr *multiplex = msrs->multiplex;
-
-	for (i = 0; i < model->num_virt_counters; ++i) {
-		if (counter_config[i].enabled) {
-			multiplex[i].saved = -(u64)counter_config[i].count;
-		} else {
-			multiplex[i].addr  = 0;
-			multiplex[i].saved = 0;
-		}
-	}
-
-	per_cpu(switch_index, cpu) = 0;
-}
-
-#else
-
-static inline void
-nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) { }
-
-#endif
-
 static void nmi_cpu_setup(void *dummy)
 {
 	int cpu = smp_processor_id();
-- 
cgit v1.2.3


From d0f585dd20010f8479e56b5c6f391ef18e26877e Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 9 Jul 2009 14:38:49 +0200
Subject: x86/oprofile: Moving nmi_cpu_save/restore_mpx_registers() in
 nmi_int.c

This patch moves some code in nmi_int.c to get a single separate
multiplexing code section.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c | 52 +++++++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 28 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index b1edfc922e7..f38c5cf0fdb 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -147,6 +147,30 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs)
 	per_cpu(switch_index, cpu) = 0;
 }
 
+static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs)
+{
+	struct op_msr *multiplex = msrs->multiplex;
+	int i;
+
+	for (i = 0; i < model->num_counters; ++i) {
+		int virt = op_x86_phys_to_virt(i);
+		if (multiplex[virt].addr)
+			rdmsrl(multiplex[virt].addr, multiplex[virt].saved);
+	}
+}
+
+static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs)
+{
+	struct op_msr *multiplex = msrs->multiplex;
+	int i;
+
+	for (i = 0; i < model->num_counters; ++i) {
+		int virt = op_x86_phys_to_virt(i);
+		if (multiplex[virt].addr)
+			wrmsrl(multiplex[virt].addr, multiplex[virt].saved);
+	}
+}
+
 #else
 
 inline int op_x86_phys_to_virt(int phys) { return phys; }
@@ -252,34 +276,6 @@ static int nmi_setup(void)
 	return 0;
 }
 
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-
-static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs)
-{
-	struct op_msr *multiplex = msrs->multiplex;
-	int i;
-
-	for (i = 0; i < model->num_counters; ++i) {
-		int virt = op_x86_phys_to_virt(i);
-		if (multiplex[virt].addr)
-			rdmsrl(multiplex[virt].addr, multiplex[virt].saved);
-	}
-}
-
-static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs)
-{
-	struct op_msr *multiplex = msrs->multiplex;
-	int i;
-
-	for (i = 0; i < model->num_counters; ++i) {
-		int virt = op_x86_phys_to_virt(i);
-		if (multiplex[virt].addr)
-			wrmsrl(multiplex[virt].addr, multiplex[virt].saved);
-	}
-}
-
-#endif
-
 static void nmi_cpu_restore_registers(struct op_msrs *msrs)
 {
 	struct op_msr *counters = msrs->counters;
-- 
cgit v1.2.3


From b28d1b923ab52d535c0719155dccf3b3d98bab9f Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 9 Jul 2009 14:38:49 +0200
Subject: x86/oprofile: Moving nmi_cpu_switch() in nmi_int.c

This patch moves some code in nmi_int.c to get a single separate
multiplexing code section.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c | 144 +++++++++++++++++++++-----------------------
 1 file changed, 70 insertions(+), 74 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index f38c5cf0fdb..998c7dca31e 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -97,6 +97,29 @@ static void nmi_cpu_save_registers(struct op_msrs *msrs)
 	}
 }
 
+static void nmi_cpu_start(void *dummy)
+{
+	struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
+	model->start(msrs);
+}
+
+static int nmi_start(void)
+{
+	on_each_cpu(nmi_cpu_start, NULL, 1);
+	return 0;
+}
+
+static void nmi_cpu_stop(void *dummy)
+{
+	struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
+	model->stop(msrs);
+}
+
+static void nmi_stop(void)
+{
+	on_each_cpu(nmi_cpu_stop, NULL, 1);
+}
+
 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
 
 static DEFINE_PER_CPU(int, switch_index);
@@ -171,6 +194,53 @@ static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs)
 	}
 }
 
+static void nmi_cpu_switch(void *dummy)
+{
+	int cpu = smp_processor_id();
+	int si = per_cpu(switch_index, cpu);
+	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
+
+	nmi_cpu_stop(NULL);
+	nmi_cpu_save_mpx_registers(msrs);
+
+	/* move to next set */
+	si += model->num_counters;
+	if ((si > model->num_virt_counters) || (counter_config[si].count == 0))
+		per_cpu(switch_index, cpu) = 0;
+	else
+		per_cpu(switch_index, cpu) = si;
+
+	model->switch_ctrl(model, msrs);
+	nmi_cpu_restore_mpx_registers(msrs);
+
+	nmi_cpu_start(NULL);
+}
+
+
+/*
+ * Quick check to see if multiplexing is necessary.
+ * The check should be sufficient since counters are used
+ * in ordre.
+ */
+static int nmi_multiplex_on(void)
+{
+	return counter_config[model->num_counters].count ? 0 : -EINVAL;
+}
+
+static int nmi_switch_event(void)
+{
+	if (!model->switch_ctrl)
+		return -ENOSYS;		/* not implemented */
+	if (nmi_multiplex_on() < 0)
+		return -EINVAL;		/* not necessary */
+
+	on_each_cpu(nmi_cpu_switch, NULL, 1);
+
+	atomic_inc(&multiplex_counter);
+
+	return 0;
+}
+
 #else
 
 inline int op_x86_phys_to_virt(int phys) { return phys; }
@@ -325,29 +395,6 @@ static void nmi_shutdown(void)
 	put_cpu_var(cpu_msrs);
 }
 
-static void nmi_cpu_start(void *dummy)
-{
-	struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
-	model->start(msrs);
-}
-
-static int nmi_start(void)
-{
-	on_each_cpu(nmi_cpu_start, NULL, 1);
-	return 0;
-}
-
-static void nmi_cpu_stop(void *dummy)
-{
-	struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
-	model->stop(msrs);
-}
-
-static void nmi_stop(void)
-{
-	on_each_cpu(nmi_cpu_stop, NULL, 1);
-}
-
 static int nmi_create_files(struct super_block *sb, struct dentry *root)
 {
 	unsigned int i;
@@ -379,57 +426,6 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root)
 	return 0;
 }
 
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-
-static void nmi_cpu_switch(void *dummy)
-{
-	int cpu = smp_processor_id();
-	int si = per_cpu(switch_index, cpu);
-	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
-
-	nmi_cpu_stop(NULL);
-	nmi_cpu_save_mpx_registers(msrs);
-
-	/* move to next set */
-	si += model->num_counters;
-	if ((si > model->num_virt_counters) || (counter_config[si].count == 0))
-		per_cpu(switch_index, cpu) = 0;
-	else
-		per_cpu(switch_index, cpu) = si;
-
-	model->switch_ctrl(model, msrs);
-	nmi_cpu_restore_mpx_registers(msrs);
-
-	nmi_cpu_start(NULL);
-}
-
-
-/*
- * Quick check to see if multiplexing is necessary.
- * The check should be sufficient since counters are used
- * in ordre.
- */
-static int nmi_multiplex_on(void)
-{
-	return counter_config[model->num_counters].count ? 0 : -EINVAL;
-}
-
-static int nmi_switch_event(void)
-{
-	if (!model->switch_ctrl)
-		return -ENOSYS;		/* not implemented */
-	if (nmi_multiplex_on() < 0)
-		return -EINVAL;		/* not necessary */
-
-	on_each_cpu(nmi_cpu_switch, NULL, 1);
-
-	atomic_inc(&multiplex_counter);
-
-	return 0;
-}
-
-#endif
-
 #ifdef CONFIG_SMP
 static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
 				 void *data)
-- 
cgit v1.2.3


From 259a83a8abdb9d2664819ec80ad12ebaeb251e32 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 9 Jul 2009 15:12:35 +0200
Subject: x86/oprofile: Remove const qualifier from struct op_x86_model_spec

This patch removes the const qualifier from struct
op_x86_model_spec to make model parameters changable.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c       | 4 ++--
 arch/x86/oprofile/op_model_amd.c  | 2 +-
 arch/x86/oprofile/op_model_p4.c   | 4 ++--
 arch/x86/oprofile/op_model_ppro.c | 2 +-
 arch/x86/oprofile/op_x86_model.h  | 8 ++++----
 5 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 998c7dca31e..826f391b422 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -27,7 +27,7 @@
 #include "op_counter.h"
 #include "op_x86_model.h"
 
-static struct op_x86_model_spec const *model;
+static struct op_x86_model_spec *model;
 static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
 static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
 
@@ -542,7 +542,7 @@ module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
 static int __init ppro_init(char **cpu_type)
 {
 	__u8 cpu_model = boot_cpu_data.x86_model;
-	struct op_x86_model_spec const *spec = &op_ppro_spec;	/* default */
+	struct op_x86_model_spec *spec = &op_ppro_spec;	/* default */
 
 	if (force_arch_perfmon && cpu_has_arch_perfmon)
 		return 0;
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 644980f0392..39604b429d6 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -526,7 +526,7 @@ static void op_amd_exit(void) {}
 
 #endif /* CONFIG_OPROFILE_IBS */
 
-struct op_x86_model_spec const op_amd_spec = {
+struct op_x86_model_spec op_amd_spec = {
 	.num_counters		= NUM_COUNTERS,
 	.num_controls		= NUM_CONTROLS,
 	.num_virt_counters	= NUM_VIRT_COUNTERS,
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 65b9237cde8..40df028d0d9 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -695,7 +695,7 @@ static void p4_shutdown(struct op_msrs const * const msrs)
 
 
 #ifdef CONFIG_SMP
-struct op_x86_model_spec const op_p4_ht2_spec = {
+struct op_x86_model_spec op_p4_ht2_spec = {
 	.num_counters		= NUM_COUNTERS_HT2,
 	.num_controls		= NUM_CONTROLS_HT2,
 	.num_virt_counters	= NUM_COUNTERS_HT2,
@@ -709,7 +709,7 @@ struct op_x86_model_spec const op_p4_ht2_spec = {
 };
 #endif
 
-struct op_x86_model_spec const op_p4_spec = {
+struct op_x86_model_spec op_p4_spec = {
 	.num_counters		= NUM_COUNTERS_NON_HT,
 	.num_controls		= NUM_CONTROLS_NON_HT,
 	.num_virt_counters	= NUM_COUNTERS_NON_HT,
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 098cbca5c0b..659f3b6f86f 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -203,7 +203,7 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
 }
 
 
-struct op_x86_model_spec const op_ppro_spec = {
+struct op_x86_model_spec op_ppro_spec = {
 	.num_counters		= 2,
 	.num_controls		= 2,
 	.num_virt_counters	= 2,
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index e874dc3565a..0c886fa0369 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -62,10 +62,10 @@ extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
 			   struct op_counter_config *counter_config);
 extern int op_x86_phys_to_virt(int phys);
 
-extern struct op_x86_model_spec const op_ppro_spec;
-extern struct op_x86_model_spec const op_p4_spec;
-extern struct op_x86_model_spec const op_p4_ht2_spec;
-extern struct op_x86_model_spec const op_amd_spec;
+extern struct op_x86_model_spec op_ppro_spec;
+extern struct op_x86_model_spec op_p4_spec;
+extern struct op_x86_model_spec op_p4_ht2_spec;
+extern struct op_x86_model_spec op_amd_spec;
 extern struct op_x86_model_spec op_arch_perfmon_spec;
 
 #endif /* OP_X86_MODEL_H */
-- 
cgit v1.2.3


From 2904a527575344a804fdd82b1f8d09a8731d8d49 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 9 Jul 2009 12:33:41 +0200
Subject: x86/oprofile: Remove unused num_virt_controls from struct
 op_x86_model_spec

The member num_virt_controls of struct op_x86_model_spec is not
used. This patch removes it.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c  | 1 -
 arch/x86/oprofile/op_model_p4.c   | 2 --
 arch/x86/oprofile/op_model_ppro.c | 1 -
 arch/x86/oprofile/op_x86_model.h  | 1 -
 4 files changed, 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 39604b429d6..dce69b5979e 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -530,7 +530,6 @@ struct op_x86_model_spec op_amd_spec = {
 	.num_counters		= NUM_COUNTERS,
 	.num_controls		= NUM_CONTROLS,
 	.num_virt_counters	= NUM_VIRT_COUNTERS,
-	.num_virt_controls	= NUM_VIRT_CONTROLS,
 	.reserved		= MSR_AMD_EVENTSEL_RESERVED,
 	.event_mask		= OP_EVENT_MASK,
 	.init			= op_amd_init,
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 40df028d0d9..0a4f2deb9e8 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -699,7 +699,6 @@ struct op_x86_model_spec op_p4_ht2_spec = {
 	.num_counters		= NUM_COUNTERS_HT2,
 	.num_controls		= NUM_CONTROLS_HT2,
 	.num_virt_counters	= NUM_COUNTERS_HT2,
-	.num_virt_controls	= NUM_CONTROLS_HT2,
 	.fill_in_addresses	= &p4_fill_in_addresses,
 	.setup_ctrs		= &p4_setup_ctrs,
 	.check_ctrs		= &p4_check_ctrs,
@@ -713,7 +712,6 @@ struct op_x86_model_spec op_p4_spec = {
 	.num_counters		= NUM_COUNTERS_NON_HT,
 	.num_controls		= NUM_CONTROLS_NON_HT,
 	.num_virt_counters	= NUM_COUNTERS_NON_HT,
-	.num_virt_controls	= NUM_CONTROLS_NON_HT,
 	.fill_in_addresses	= &p4_fill_in_addresses,
 	.setup_ctrs		= &p4_setup_ctrs,
 	.check_ctrs		= &p4_check_ctrs,
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 659f3b6f86f..753a02ab215 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -207,7 +207,6 @@ struct op_x86_model_spec op_ppro_spec = {
 	.num_counters		= 2,
 	.num_controls		= 2,
 	.num_virt_counters	= 2,
-	.num_virt_controls	= 2,
 	.reserved		= MSR_PPRO_EVENTSEL_RESERVED,
 	.fill_in_addresses	= &ppro_fill_in_addresses,
 	.setup_ctrs		= &ppro_setup_ctrs,
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 0c886fa0369..4e2e7c2c519 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -37,7 +37,6 @@ struct op_x86_model_spec {
 	unsigned int	num_counters;
 	unsigned int	num_controls;
 	unsigned int	num_virt_counters;
-	unsigned int	num_virt_controls;
 	u64		reserved;
 	u16		event_mask;
 	int		(*init)(struct oprofile_operations *ops);
-- 
cgit v1.2.3


From 52471c67ee2fa5ed6f700ef57bf27833c63b2192 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Mon, 6 Jul 2009 14:43:55 +0200
Subject: x86/oprofile: Modify initialization of num_virt_counters

Models that do not yet support counter multiplexing have to setup
num_virt_counters. This patch implements the setup from num_counters
if num_virt_counters is not set. Thus, num_virt_counters must be setup
only for multiplexing support.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c       | 3 +++
 arch/x86/oprofile/op_model_p4.c   | 2 --
 arch/x86/oprofile/op_model_ppro.c | 1 -
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 826f391b422..82ee29517f1 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -674,6 +674,9 @@ int __init op_nmi_init(struct oprofile_operations *ops)
 	if (ret)
 		return ret;
 
+	if (!model->num_virt_counters)
+		model->num_virt_counters = model->num_counters;
+
 	init_sysfs();
 	using_nmi = 1;
 	printk(KERN_INFO "oprofile: using NMI interrupt.\n");
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 0a4f2deb9e8..ac6b354becd 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -698,7 +698,6 @@ static void p4_shutdown(struct op_msrs const * const msrs)
 struct op_x86_model_spec op_p4_ht2_spec = {
 	.num_counters		= NUM_COUNTERS_HT2,
 	.num_controls		= NUM_CONTROLS_HT2,
-	.num_virt_counters	= NUM_COUNTERS_HT2,
 	.fill_in_addresses	= &p4_fill_in_addresses,
 	.setup_ctrs		= &p4_setup_ctrs,
 	.check_ctrs		= &p4_check_ctrs,
@@ -711,7 +710,6 @@ struct op_x86_model_spec op_p4_ht2_spec = {
 struct op_x86_model_spec op_p4_spec = {
 	.num_counters		= NUM_COUNTERS_NON_HT,
 	.num_controls		= NUM_CONTROLS_NON_HT,
-	.num_virt_counters	= NUM_COUNTERS_NON_HT,
 	.fill_in_addresses	= &p4_fill_in_addresses,
 	.setup_ctrs		= &p4_setup_ctrs,
 	.check_ctrs		= &p4_check_ctrs,
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 753a02ab215..4899215999d 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -206,7 +206,6 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
 struct op_x86_model_spec op_ppro_spec = {
 	.num_counters		= 2,
 	.num_controls		= 2,
-	.num_virt_counters	= 2,
 	.reserved		= MSR_PPRO_EVENTSEL_RESERVED,
 	.fill_in_addresses	= &ppro_fill_in_addresses,
 	.setup_ctrs		= &ppro_setup_ctrs,
-- 
cgit v1.2.3


From 39e97f40c3a5e71de0532368deaa683e09b74ba2 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 9 Jul 2009 15:11:45 +0200
Subject: x86/oprofile: Add function has_mux() to check multiplexing support

The check is used to prevent running multiplexing code for models not
supporting multiplexing. Before, the code was running but without
effect.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 82ee29517f1..dca7240aeb2 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -124,6 +124,11 @@ static void nmi_stop(void)
 
 static DEFINE_PER_CPU(int, switch_index);
 
+static inline int has_mux(void)
+{
+	return !!model->switch_ctrl;
+}
+
 inline int op_x86_phys_to_virt(int phys)
 {
 	return __get_cpu_var(switch_index) + phys;
@@ -132,6 +137,10 @@ inline int op_x86_phys_to_virt(int phys)
 static void nmi_shutdown_mux(void)
 {
 	int i;
+
+	if (!has_mux())
+		return;
+
 	for_each_possible_cpu(i) {
 		kfree(per_cpu(cpu_msrs, i).multiplex);
 		per_cpu(cpu_msrs, i).multiplex = NULL;
@@ -144,12 +153,17 @@ static int nmi_setup_mux(void)
 	size_t multiplex_size =
 		sizeof(struct op_msr) * model->num_virt_counters;
 	int i;
+
+	if (!has_mux())
+		return 1;
+
 	for_each_possible_cpu(i) {
 		per_cpu(cpu_msrs, i).multiplex =
 			kmalloc(multiplex_size, GFP_KERNEL);
 		if (!per_cpu(cpu_msrs, i).multiplex)
 			return 0;
 	}
+
 	return 1;
 }
 
@@ -158,6 +172,9 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs)
 	int i;
 	struct op_msr *multiplex = msrs->multiplex;
 
+	if (!has_mux())
+		return;
+
 	for (i = 0; i < model->num_virt_counters; ++i) {
 		if (counter_config[i].enabled) {
 			multiplex[i].saved = -(u64)counter_config[i].count;
@@ -229,7 +246,7 @@ static int nmi_multiplex_on(void)
 
 static int nmi_switch_event(void)
 {
-	if (!model->switch_ctrl)
+	if (!has_mux())
 		return -ENOSYS;		/* not implemented */
 	if (nmi_multiplex_on() < 0)
 		return -EINVAL;		/* not necessary */
-- 
cgit v1.2.3


From 5280514471c2803776701c43c027038decac1103 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 9 Jul 2009 16:02:44 +0200
Subject: x86/oprofile: Enable multiplexing only if the model supports it

This patch checks if the model supports multiplexing. Only then
multiplexing will be enabled. The code is added to the common x86
initialization.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index dca7240aeb2..f0fb44725d8 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -258,6 +258,12 @@ static int nmi_switch_event(void)
 	return 0;
 }
 
+static inline void mux_init(struct oprofile_operations *ops)
+{
+	if (has_mux())
+		ops->switch_events = nmi_switch_event;
+}
+
 #else
 
 inline int op_x86_phys_to_virt(int phys) { return phys; }
@@ -265,6 +271,7 @@ static inline void nmi_shutdown_mux(void) { }
 static inline int nmi_setup_mux(void) { return 1; }
 static inline void
 nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) { }
+static inline void mux_init(struct oprofile_operations *ops) { }
 
 #endif
 
@@ -682,9 +689,6 @@ int __init op_nmi_init(struct oprofile_operations *ops)
 	ops->start		= nmi_start;
 	ops->stop		= nmi_stop;
 	ops->cpu_type		= cpu_type;
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-	ops->switch_events	= nmi_switch_event;
-#endif
 
 	if (model->init)
 		ret = model->init(ops);
@@ -694,6 +698,8 @@ int __init op_nmi_init(struct oprofile_operations *ops)
 	if (!model->num_virt_counters)
 		model->num_virt_counters = model->num_counters;
 
+	mux_init(ops);
+
 	init_sysfs();
 	using_nmi = 1;
 	printk(KERN_INFO "oprofile: using NMI interrupt.\n");
-- 
cgit v1.2.3


From 4d015f79e972cea1761cfee8872b1c0992ccd8b2 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 9 Jul 2009 21:42:51 +0200
Subject: x86/oprofile: Implement mux_clone()

To setup a counter for all cpus, its structure is cloned from cpu
0. This patch implements mux_clone() to do this part for multiplexing
data.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c | 37 +++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index f0fb44725d8..da6d2ab31c6 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -264,6 +264,16 @@ static inline void mux_init(struct oprofile_operations *ops)
 		ops->switch_events = nmi_switch_event;
 }
 
+static void mux_clone(int cpu)
+{
+	if (!has_mux())
+		return;
+
+	memcpy(per_cpu(cpu_msrs, cpu).multiplex,
+	       per_cpu(cpu_msrs, 0).multiplex,
+	       sizeof(struct op_msr) * model->num_virt_counters);
+}
+
 #else
 
 inline int op_x86_phys_to_virt(int phys) { return phys; }
@@ -272,6 +282,7 @@ static inline int nmi_setup_mux(void) { return 1; }
 static inline void
 nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) { }
 static inline void mux_init(struct oprofile_operations *ops) { }
+static void mux_clone(int cpu) { }
 
 #endif
 
@@ -350,20 +361,18 @@ static int nmi_setup(void)
 	/* Assume saved/restored counters are the same on all CPUs */
 	model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
 	for_each_possible_cpu(cpu) {
-		if (cpu != 0) {
-			memcpy(per_cpu(cpu_msrs, cpu).counters,
-				per_cpu(cpu_msrs, 0).counters,
-				sizeof(struct op_msr) * model->num_counters);
-
-			memcpy(per_cpu(cpu_msrs, cpu).controls,
-				per_cpu(cpu_msrs, 0).controls,
-				sizeof(struct op_msr) * model->num_controls);
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-			memcpy(per_cpu(cpu_msrs, cpu).multiplex,
-				per_cpu(cpu_msrs, 0).multiplex,
-				sizeof(struct op_msr) * model->num_virt_counters);
-#endif
-		}
+		if (!cpu)
+			continue;
+
+		memcpy(per_cpu(cpu_msrs, cpu).counters,
+		       per_cpu(cpu_msrs, 0).counters,
+		       sizeof(struct op_msr) * model->num_counters);
+
+		memcpy(per_cpu(cpu_msrs, cpu).controls,
+		       per_cpu(cpu_msrs, 0).controls,
+		       sizeof(struct op_msr) * model->num_controls);
+
+		mux_clone(cpu);
 	}
 	on_each_cpu(nmi_cpu_setup, NULL, 1);
 	nmi_enabled = 1;
-- 
cgit v1.2.3


From 1b294f5960cd89e49eeb3e797860c552b03f2272 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 9 Jul 2009 14:56:25 +0200
Subject: oprofile: Adding switch counter to oprofile statistic variables

This patch moves the multiplexing switch counter from x86 code to
common oprofile statistic variables. Now the value will be available
and usable for all architectures. The initialization and
incrementation also moved to common code.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index da6d2ab31c6..7b3362f9abd 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -34,11 +34,6 @@ static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
 /* 0 == registered but off, 1 == registered and on */
 static int nmi_enabled = 0;
 
-
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-extern atomic_t multiplex_counter;
-#endif
-
 struct op_counter_config counter_config[OP_MAX_COUNTER];
 
 /* common functions */
@@ -253,8 +248,6 @@ static int nmi_switch_event(void)
 
 	on_each_cpu(nmi_cpu_switch, NULL, 1);
 
-	atomic_inc(&multiplex_counter);
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From 61d149d5248ad7428801cdede0f5fcc2b90cd61c Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Fri, 10 Jul 2009 15:47:17 +0200
Subject: x86/oprofile: Implement op_x86_virt_to_phys()

This patch implements a common x86 function to convert virtual counter
numbers to physical.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c      | 6 ++++++
 arch/x86/oprofile/op_model_amd.c | 2 +-
 arch/x86/oprofile/op_x86_model.h | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 7b3362f9abd..5856e61cb09 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -129,6 +129,11 @@ inline int op_x86_phys_to_virt(int phys)
 	return __get_cpu_var(switch_index) + phys;
 }
 
+inline int op_x86_virt_to_phys(int virt)
+{
+	return virt % model->num_counters;
+}
+
 static void nmi_shutdown_mux(void)
 {
 	int i;
@@ -270,6 +275,7 @@ static void mux_clone(int cpu)
 #else
 
 inline int op_x86_phys_to_virt(int phys) { return phys; }
+inline int op_x86_virt_to_phys(int virt) { return virt; }
 static inline void nmi_shutdown_mux(void) { }
 static inline int nmi_setup_mux(void) { return 1; }
 static inline void
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index dce69b5979e..1ea19829d98 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -81,7 +81,7 @@ static void op_mux_fill_in_addresses(struct op_msrs * const msrs)
 	int i;
 
 	for (i = 0; i < NUM_VIRT_COUNTERS; i++) {
-		int hw_counter = i % NUM_COUNTERS;
+		int hw_counter = op_x86_virt_to_phys(i);
 		if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
 			msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter;
 		else
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 4e2e7c2c519..b83776180c7 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -60,6 +60,7 @@ struct op_counter_config;
 extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
 			   struct op_counter_config *counter_config);
 extern int op_x86_phys_to_virt(int phys);
+extern int op_x86_virt_to_phys(int virt);
 
 extern struct op_x86_model_spec op_ppro_spec;
 extern struct op_x86_model_spec op_p4_spec;
-- 
cgit v1.2.3


From 11be1a7b54283021777f409aa983ce125945e67c Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Fri, 10 Jul 2009 18:15:21 +0200
Subject: x86/oprofile: Add counter reservation check for virtual counters

This patch adds a check for the availability of a counter. A virtual
counter is used only if its physical counter is not reserved.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 5856e61cb09..cb88b1a0bd5 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -435,15 +435,13 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root)
 		struct dentry *dir;
 		char buf[4];
 
-#ifndef CONFIG_OPROFILE_EVENT_MULTIPLEX
 		/* quick little hack to _not_ expose a counter if it is not
 		 * available for use.  This should protect userspace app.
 		 * NOTE:  assumes 1:1 mapping here (that counters are organized
 		 *        sequentially in their struct assignment).
 		 */
-		if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
+		if (!avail_to_resrv_perfctr_nmi_bit(op_x86_virt_to_phys(i)))
 			continue;
-#endif /* CONFIG_OPROFILE_EVENT_MULTIPLEX */
 
 		snprintf(buf,  sizeof(buf), "%d", i);
 		dir = oprofilefs_mkdir(sb, root, buf);
-- 
cgit v1.2.3


From c550091edd6fac2ed9dac1b30d986b6c58b216fa Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 16 Jul 2009 13:11:16 +0200
Subject: x86/oprofile: Small coding style fixes

Some small coding style fixes.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 1ea19829d98..827beecb67a 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -144,11 +144,10 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 
 	/* setup reset_value */
 	for (i = 0; i < NUM_VIRT_COUNTERS; ++i) {
-		if (counter_config[i].enabled) {
+		if (counter_config[i].enabled)
 			reset_value[i] = counter_config[i].count;
-		} else {
+		else
 			reset_value[i] = 0;
-		}
 	}
 
 	/* clear all counters */
-- 
cgit v1.2.3


From 9fadfd1adff28a8895de8df9e8a778c44958840f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 22 Jul 2009 12:29:41 +0800
Subject: crypto: sha512-s390 - Add export/import support

This patch adds export/import support to sha512-s390 (which includes
sha384-s390).  The exported type is defined by struct sha512_state,
which is basically the entire descriptor state of sha512_generic.

Since sha512-s390 only supports a 64-bit byte count the import
function will reject anything that exceeds that.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/crypto/sha512_s390.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index 83192bfc804..b4b3438ae77 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -13,7 +13,10 @@
  *
  */
 #include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <linux/errno.h>
 #include <linux/init.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
 
 #include "sha.h"
@@ -37,12 +40,42 @@ static int sha512_init(struct shash_desc *desc)
 	return 0;
 }
 
+static int sha512_export(struct shash_desc *desc, void *out)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+	struct sha512_state *octx = out;
+
+	octx->count[0] = sctx->count;
+	octx->count[1] = 0;
+	memcpy(octx->state, sctx->state, sizeof(octx->state));
+	memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
+	return 0;
+}
+
+static int sha512_import(struct shash_desc *desc, const u8 *in)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+	struct sha512_state *ictx = in;
+
+	if (unlikely(ictx->count[1]))
+		return -ERANGE;
+	sctx->count = ictx->count[0];
+
+	memcpy(sctx->state, ictx->state, sizeof(ictx->state));
+	memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
+	sctx->func = KIMD_SHA_512;
+	return 0;
+}
+
 static struct shash_alg sha512_alg = {
 	.digestsize	=	SHA512_DIGEST_SIZE,
 	.init		=	sha512_init,
 	.update		=	s390_sha_update,
 	.final		=	s390_sha_final,
+	.export		=	sha512_export,
+	.import		=	sha512_import,
 	.descsize	=	sizeof(struct s390_sha_ctx),
+	.statesize	=	sizeof(struct sha512_state),
 	.base		=	{
 		.cra_name	=	"sha512",
 		.cra_driver_name=	"sha512-s390",
@@ -78,7 +111,10 @@ static struct shash_alg sha384_alg = {
 	.init		=	sha384_init,
 	.update		=	s390_sha_update,
 	.final		=	s390_sha_final,
+	.export		=	sha512_export,
+	.import		=	sha512_import,
 	.descsize	=	sizeof(struct s390_sha_ctx),
+	.statesize	=	sizeof(struct sha512_state),
 	.base		=	{
 		.cra_name	=	"sha384",
 		.cra_driver_name=	"sha384-s390",
-- 
cgit v1.2.3


From 3885123da8335dc6b67387e5e626acbffc56f664 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 10 Jul 2009 10:04:50 +0900
Subject: swiotlb: remove unused swiotlb_alloc_boot()

Nobody uses swiotlb_alloc_boot().

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Becky Bruce <beckyb@kernel.crashing.org>
---
 arch/x86/kernel/pci-swiotlb.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 6af96ee4420..0ac7cd52478 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -13,11 +13,6 @@
 
 int swiotlb __read_mostly;
 
-void * __init swiotlb_alloc_boot(size_t size, unsigned long nslabs)
-{
-	return alloc_bootmem_low_pages(size);
-}
-
 void *swiotlb_alloc(unsigned order, unsigned long nslabs)
 {
 	return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
-- 
cgit v1.2.3


From bb52196be37ce154ddc50b1f39496146d181cbe7 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 10 Jul 2009 10:04:51 +0900
Subject: swiotlb: remove unused swiotlb_alloc()

Nobody uses swiotlb_alloc().

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Becky Bruce <beckyb@kernel.crashing.org>
---
 arch/x86/kernel/pci-swiotlb.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 0ac7cd52478..ea675cfe76f 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -13,11 +13,6 @@
 
 int swiotlb __read_mostly;
 
-void *swiotlb_alloc(unsigned order, unsigned long nslabs)
-{
-	return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
-}
-
 dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
 {
 	return paddr;
-- 
cgit v1.2.3


From cf56e3f2e8a8d5b7bc719980869b0e7985c256f3 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 10 Jul 2009 10:04:52 +0900
Subject: swiotlb: remove swiotlb_arch_range_needs_mapping

Nobody uses swiotlb_arch_range_needs_mapping().

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Becky Bruce <beckyb@kernel.crashing.org>
---
 arch/x86/kernel/pci-swiotlb.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index ea675cfe76f..165bd7f93bb 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -23,11 +23,6 @@ phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
 	return baddr;
 }
 
-int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
-{
-	return 0;
-}
-
 static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 					dma_addr_t *dma_handle, gfp_t flags)
 {
-- 
cgit v1.2.3


From 02ca646e73f3cb9be69e339841b94edae675e248 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Thu, 23 Jul 2009 11:18:49 +0900
Subject: swiotlb: remove unnecessary swiotlb_bus_to_virt

swiotlb_bus_to_virt is unncessary; we can use swiotlb_bus_to_phys and
phys_to_virt instead.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Becky Bruce <beckyb@kernel.crashing.org>
---
 arch/powerpc/kernel/dma-swiotlb.c | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index 68ccf11e4f1..41534ae2f58 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -24,16 +24,6 @@
 int swiotlb __read_mostly;
 unsigned int ppc_swiotlb_enable;
 
-void *swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t addr)
-{
-	unsigned long pfn = PFN_DOWN(swiotlb_bus_to_phys(hwdev, addr));
-	void *pageaddr = page_address(pfn_to_page(pfn));
-
-	if (pageaddr != NULL)
-		return pageaddr + (addr % PAGE_SIZE);
-	return NULL;
-}
-
 dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
 {
 	return paddr + get_dma_direct_offset(hwdev);
-- 
cgit v1.2.3


From 99becaca86d184a4433e9fde879ff97303d7669f Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 10 Jul 2009 10:04:54 +0900
Subject: x86: add dma_capable() to replace is_buffer_dma_capable()

dma_capable() eventually replaces is_buffer_dma_capable(), which tells
if a memory area is dma-capable or not. The problem of
is_buffer_dma_capable() is that it doesn't take a pointer to struct
device so it doesn't work for POWERPC.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
---
 arch/x86/include/asm/dma-mapping.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 1c3f9435f1c..adac59c8f69 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -55,6 +55,14 @@ extern int dma_set_mask(struct device *dev, u64 mask);
 extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
 					dma_addr_t *dma_addr, gfp_t flag);
 
+static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
+{
+	if (!dev->dma_mask)
+		return 0;
+
+	return addr + size <= *dev->dma_mask;
+}
+
 static inline void
 dma_cache_sync(struct device *dev, void *vaddr, size_t size,
 	enum dma_data_direction dir)
-- 
cgit v1.2.3


From a4c2baa6e148adfb27beaf16b6fb6d465b5b3acb Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 10 Jul 2009 10:04:55 +0900
Subject: x86: replace is_buffer_dma_capable() with dma_capable

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
---
 arch/x86/kernel/pci-dma.c     | 2 +-
 arch/x86/kernel/pci-gart_64.c | 5 ++---
 arch/x86/kernel/pci-nommu.c   | 2 +-
 3 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 1a041bcf506..3c945c0b350 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -147,7 +147,7 @@ again:
 		return NULL;
 
 	addr = page_to_phys(page);
-	if (!is_buffer_dma_capable(dma_mask, addr, size)) {
+	if (addr + size > dma_mask) {
 		__free_pages(page, get_order(size));
 
 		if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) {
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index d2e56b8f48e..98a827ee9ed 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -190,14 +190,13 @@ static void iommu_full(struct device *dev, size_t size, int dir)
 static inline int
 need_iommu(struct device *dev, unsigned long addr, size_t size)
 {
-	return force_iommu ||
-		!is_buffer_dma_capable(*dev->dma_mask, addr, size);
+	return force_iommu || !dma_capable(dev, addr, size);
 }
 
 static inline int
 nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
 {
-	return !is_buffer_dma_capable(*dev->dma_mask, addr, size);
+	return !dma_capable(dev, addr, size);
 }
 
 /* Map a single continuous physical area into the IOMMU.
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 71d412a09f3..c0a4222bf62 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -14,7 +14,7 @@
 static int
 check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
 {
-	if (hwdev && !is_buffer_dma_capable(*hwdev->dma_mask, bus, size)) {
+	if (hwdev && !dma_capable(hwdev, bus, size)) {
 		if (*hwdev->dma_mask >= DMA_BIT_MASK(32))
 			printk(KERN_ERR
 			    "nommu_%s: overflow %Lx+%zu of device mask %Lx\n",
-- 
cgit v1.2.3


From a0b00ca84b3ecb9eebd62ad34880d8cc0d988c8a Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 10 Jul 2009 10:04:56 +0900
Subject: ia64: add dma_capable() to replace is_buffer_dma_capable()

dma_capable() eventually replaces is_buffer_dma_capable(), which tells
if a memory area is dma-capable or not. The problem of
is_buffer_dma_capable() is that it doesn't take a pointer to struct
device so it doesn't work for POWERPC.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
---
 arch/ia64/include/asm/dma-mapping.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h
index 5a61b5c2e18..88d0f860394 100644
--- a/arch/ia64/include/asm/dma-mapping.h
+++ b/arch/ia64/include/asm/dma-mapping.h
@@ -69,6 +69,14 @@ dma_set_mask (struct device *dev, u64 mask)
 	return 0;
 }
 
+static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
+{
+	if (!dev->dma_mask)
+		return 0;
+
+	return addr + size <= *dev->dma_mask;
+}
+
 extern int dma_get_cache_alignment(void);
 
 static inline void
-- 
cgit v1.2.3


From 9a937c91eea31c4b594ea49a2a23c57003e04987 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 10 Jul 2009 10:04:57 +0900
Subject: powerpc: add dma_capable() to replace is_buffer_dma_capable()

dma_capable() eventually replaces is_buffer_dma_capable(), which tells
if a memory area is dma-capable or not. The problem of
is_buffer_dma_capable() is that it doesn't take a pointer to struct
device so it doesn't work for POWERPC.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Becky Bruce <beckyb@kernel.crashing.org>
---
 arch/powerpc/include/asm/dma-mapping.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index b44aaabdd1a..6ff1f8581d7 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -424,6 +424,19 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 #endif
 }
 
+static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
+{
+	struct dma_mapping_ops *ops = get_dma_ops(dev);
+
+	if (ops->addr_needs_map && ops->addr_needs_map(dev, addr, size))
+		return 0;
+
+	if (!dev->dma_mask)
+		return 0;
+
+	return addr + size <= *dev->dma_mask;
+}
+
 #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
 #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
 #ifdef CONFIG_NOT_COHERENT_CACHE
-- 
cgit v1.2.3


From 30945fdf6a08f52370dab3bc162e96c4b4e36082 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 10 Jul 2009 10:04:59 +0900
Subject: powerpc: remove unncesary swiotlb_arch_address_needs_mapping

swiotlb doesn't use swiotlb_arch_address_needs_mapping(); it uses
dma_capalbe(). We can remove unnecessary
swiotlb_arch_address_needs_mapping().

We can remove swiotlb_addr_needs_map() and is_buffer_dma_capable() in
swiotlb_pci_addr_needs_map() too; dma_capable() handles the features
that both provide.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Becky Bruce <beckyb@kernel.crashing.org>
---
 arch/powerpc/kernel/dma-swiotlb.c | 27 +--------------------------
 1 file changed, 1 insertion(+), 26 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index 41534ae2f58..a3bbe02cda9 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -35,29 +35,12 @@ phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
 	return baddr - get_dma_direct_offset(hwdev);
 }
 
-/*
- * Determine if an address needs bounce buffering via swiotlb.
- * Going forward I expect the swiotlb code to generalize on using
- * a dma_ops->addr_needs_map, and this function will move from here to the
- * generic swiotlb code.
- */
-int
-swiotlb_arch_address_needs_mapping(struct device *hwdev, dma_addr_t addr,
-				   size_t size)
-{
-	struct dma_mapping_ops *dma_ops = get_dma_ops(hwdev);
-
-	BUG_ON(!dma_ops);
-	return dma_ops->addr_needs_map(hwdev, addr, size);
-}
-
 /*
  * Determine if an address is reachable by a pci device, or if we must bounce.
  */
 static int
 swiotlb_pci_addr_needs_map(struct device *hwdev, dma_addr_t addr, size_t size)
 {
-	u64 mask = dma_get_mask(hwdev);
 	dma_addr_t max;
 	struct pci_controller *hose;
 	struct pci_dev *pdev = to_pci_dev(hwdev);
@@ -69,16 +52,9 @@ swiotlb_pci_addr_needs_map(struct device *hwdev, dma_addr_t addr, size_t size)
 	if ((addr + size > max) | (addr < hose->dma_window_base_cur))
 		return 1;
 
-	return !is_buffer_dma_capable(mask, addr, size);
-}
-
-static int
-swiotlb_addr_needs_map(struct device *hwdev, dma_addr_t addr, size_t size)
-{
-	return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
+	return 0;
 }
 
-
 /*
  * At the moment, all platforms that use this code only require
  * swiotlb to be used if we're operating on HIGHMEM.  Since
@@ -94,7 +70,6 @@ struct dma_mapping_ops swiotlb_dma_ops = {
 	.dma_supported = swiotlb_dma_supported,
 	.map_page = swiotlb_map_page,
 	.unmap_page = swiotlb_unmap_page,
-	.addr_needs_map = swiotlb_addr_needs_map,
 	.sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
 	.sync_single_range_for_device = swiotlb_sync_single_range_for_device,
 	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
-- 
cgit v1.2.3


From 8d4f5339d1ee4027c07e6b2a1cfa9dc41b0d383b Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 10 Jul 2009 10:05:01 +0900
Subject: x86, IA64, powerpc: add phys_to_dma() and dma_to_phys()

This adds two functions, phys_to_dma() and dma_to_phys() to x86, IA64
and powerpc. swiotlb uses them. phys_to_dma() converts a physical
address to a dma address. dma_to_phys() does the opposite.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Becky Bruce <beckyb@kernel.crashing.org>
---
 arch/ia64/include/asm/dma-mapping.h    | 10 ++++++++++
 arch/powerpc/include/asm/dma-mapping.h | 10 ++++++++++
 arch/x86/include/asm/dma-mapping.h     | 10 ++++++++++
 3 files changed, 30 insertions(+)

(limited to 'arch')

diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h
index 88d0f860394..f91829de329 100644
--- a/arch/ia64/include/asm/dma-mapping.h
+++ b/arch/ia64/include/asm/dma-mapping.h
@@ -77,6 +77,16 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 	return addr + size <= *dev->dma_mask;
 }
 
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+	return paddr;
+}
+
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+	return daddr;
+}
+
 extern int dma_get_cache_alignment(void);
 
 static inline void
diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index 6ff1f8581d7..0c34371ec49 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -437,6 +437,16 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 	return addr + size <= *dev->dma_mask;
 }
 
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+	return paddr + get_dma_direct_offset(dev);
+}
+
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+	return daddr - get_dma_direct_offset(dev);
+}
+
 #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
 #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
 #ifdef CONFIG_NOT_COHERENT_CACHE
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index adac59c8f69..0ee770d23d0 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -63,6 +63,16 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 	return addr + size <= *dev->dma_mask;
 }
 
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+	return paddr;
+}
+
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+	return daddr;
+}
+
 static inline void
 dma_cache_sync(struct device *dev, void *vaddr, size_t size,
 	enum dma_data_direction dir)
-- 
cgit v1.2.3


From 8ab7ff42c9dd9231706a4ba7120eed22ff52a93b Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 10 Jul 2009 10:05:03 +0900
Subject: powerpc: remove unused swiotlb_phys_to_bus() and
 swiotlb_bus_to_phys()

phys_to_dma() and dma_to_phys() are used instead of
swiotlb_phys_to_bus() and swiotlb_bus_to_phys().

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Becky Bruce <beckyb@kernel.crashing.org>
---
 arch/powerpc/kernel/dma-swiotlb.c | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index a3bbe02cda9..e8a57de85bc 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -24,17 +24,6 @@
 int swiotlb __read_mostly;
 unsigned int ppc_swiotlb_enable;
 
-dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
-{
-	return paddr + get_dma_direct_offset(hwdev);
-}
-
-phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
-
-{
-	return baddr - get_dma_direct_offset(hwdev);
-}
-
 /*
  * Determine if an address is reachable by a pci device, or if we must bounce.
  */
-- 
cgit v1.2.3


From b683d42693c4e92b838117f5c6f7b90bfa1525c9 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 10 Jul 2009 10:05:04 +0900
Subject: x86: remove unused swiotlb_phys_to_bus() and swiotlb_bus_to_phys()

phys_to_dma() and dma_to_phys() are used instead of
swiotlb_phys_to_bus() and swiotlb_bus_to_phys().

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
---
 arch/x86/kernel/pci-swiotlb.c | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 165bd7f93bb..e8a35016115 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -13,16 +13,6 @@
 
 int swiotlb __read_mostly;
 
-dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
-{
-	return paddr;
-}
-
-phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
-{
-	return baddr;
-}
-
 static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 					dma_addr_t *dma_handle, gfp_t flags)
 {
-- 
cgit v1.2.3


From c1dc0b9c0c8979ce4d411caadff5c0d79dee58bc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 2 Aug 2009 11:28:21 +0200
Subject: debug lockups: Improve lockup detection

When debugging a recent lockup bug i found various deficiencies
in how our current lockup detection helpers work:

 - SysRq-L is not very efficient as it uses a workqueue, hence
   it cannot punch through hard lockups and cannot see through
   most soft lockups either.

 - The SysRq-L code depends on the NMI watchdog - which is off
   by default.

 - We dont print backtraces from the RCU code's built-in
   'RCU state machine is stuck' debug code. This debug
   code tends to be one of the first (and only) mechanisms
   that show that a lockup has occured.

This patch changes the code so taht we:

 - Trigger the NMI backtrace code from SysRq-L instead of using
   a workqueue (which cannot punch through hard lockups)

 - Trigger print-all-CPU-backtraces from the RCU lockup detection
   code

Also decouple the backtrace printing code from the NMI watchdog:

 - Dont use variable size cpumasks (it might not be initialized
   and they are a bit more fragile anyway)

 - Trigger an NMI immediately via an IPI, instead of waiting
   for the NMI tick to occur. This is a lot faster and can
   produce more relevant backtraces. It will also work if the
   NMI watchdog is disabled.

 - Dont print the 'dazed and confused' message when we print
   a backtrace from the NMI

 - Do a show_regs() plus a dump_stack() to get maximum info
   out of the dump. Worst-case we get two stacktraces - which
   is not a big deal. Sometimes, if register content is
   corrupted, the precise stack walker in show_regs() wont
   give us a full backtrace - in this case dump_stack() will
   do it.

Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/nmi.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index b3025b43b63..1bb1ac20e9e 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -39,7 +39,7 @@
 int unknown_nmi_panic;
 int nmi_watchdog_enabled;
 
-static cpumask_var_t backtrace_mask;
+static cpumask_t backtrace_mask __read_mostly;
 
 /* nmi_active:
  * >0: the lapic NMI watchdog is active, but can be disabled
@@ -138,7 +138,6 @@ int __init check_nmi_watchdog(void)
 	if (!prev_nmi_count)
 		goto error;
 
-	alloc_cpumask_var(&backtrace_mask, GFP_KERNEL|__GFP_ZERO);
 	printk(KERN_INFO "Testing NMI watchdog ... ");
 
 #ifdef CONFIG_SMP
@@ -415,14 +414,17 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
 	}
 
 	/* We can be called before check_nmi_watchdog, hence NULL check. */
-	if (backtrace_mask != NULL && cpumask_test_cpu(cpu, backtrace_mask)) {
+	if (cpumask_test_cpu(cpu, &backtrace_mask)) {
 		static DEFINE_SPINLOCK(lock);	/* Serialise the printks */
 
 		spin_lock(&lock);
 		printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
+		show_regs(regs);
 		dump_stack();
 		spin_unlock(&lock);
-		cpumask_clear_cpu(cpu, backtrace_mask);
+		cpumask_clear_cpu(cpu, &backtrace_mask);
+
+		rc = 1;
 	}
 
 	/* Could check oops_in_progress here too, but it's safer not to */
@@ -556,10 +558,14 @@ void __trigger_all_cpu_backtrace(void)
 {
 	int i;
 
-	cpumask_copy(backtrace_mask, cpu_online_mask);
+	cpumask_copy(&backtrace_mask, cpu_online_mask);
+
+	printk(KERN_INFO "sending NMI to all CPUs:\n");
+	apic->send_IPI_all(NMI_VECTOR);
+
 	/* Wait for up to 10 seconds for all CPUs to do the backtrace */
 	for (i = 0; i < 10 * 1000; i++) {
-		if (cpumask_empty(backtrace_mask))
+		if (cpumask_empty(&backtrace_mask))
 			break;
 		mdelay(1);
 	}
-- 
cgit v1.2.3


From 25f6e89bedd29cc49bfa0d55497e91a671b9ae6e Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 30 Jul 2009 23:21:18 +0200
Subject: x86: Remove superfluous NULL pointer check in destroy_irq()

This takes care of the following entry from Dan's list:

  arch/x86/kernel/apic/io_apic.c +3241 destroy_irq(11) warning: variable derefenced before check 'desc'

Reported-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Eugene Teo <eteo@redhat.com>
Cc: Julia Lawall <julia@diku.dk>
LKML-Reference: <200907302321.19086.bzolnier@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/io_apic.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index cf51b0b58c5..7e92a9212fd 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3185,8 +3185,7 @@ void destroy_irq(unsigned int irq)
 	cfg = desc->chip_data;
 	dynamic_irq_cleanup(irq);
 	/* connect back irq_cfg */
-	if (desc)
-		desc->chip_data = cfg;
+	desc->chip_data = cfg;
 
 	free_irte(irq);
 	spin_lock_irqsave(&vector_lock, flags);
-- 
cgit v1.2.3


From 47cab6a722d44c71c4f8224017ef548522243cf4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Aug 2009 09:31:54 +0200
Subject: debug lockups: Improve lockup detection, fix generic arch fallback

As Andrew noted, my previous patch ("debug lockups: Improve lockup
detection") broke/removed SysRq-L support from architecture that do
not provide a __trigger_all_cpu_backtrace implementation.

Restore a fallback path and clean up the SysRq-L machinery a bit:

 - Rename the arch method to arch_trigger_all_cpu_backtrace()

 - Simplify the define

 - Document the method a bit - in the hope of more architectures
   adding support for it.

[ The patch touches Sparc code for the rename. ]

Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "David S. Miller" <davem@davemloft.net>
LKML-Reference: <20090802140809.7ec4bb6b.akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/sparc/include/asm/irq_64.h | 4 ++--
 arch/sparc/kernel/process_64.c  | 4 ++--
 arch/x86/include/asm/nmi.h      | 4 ++--
 arch/x86/kernel/apic/nmi.c      | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h
index 1934f2cbf51..a0b443cb3c1 100644
--- a/arch/sparc/include/asm/irq_64.h
+++ b/arch/sparc/include/asm/irq_64.h
@@ -89,8 +89,8 @@ static inline unsigned long get_softint(void)
 	return retval;
 }
 
-void __trigger_all_cpu_backtrace(void);
-#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace()
+void arch_trigger_all_cpu_backtrace(void);
+#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
 
 extern void *hardirq_stack[NR_CPUS];
 extern void *softirq_stack[NR_CPUS];
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 4041f94e772..18d67854a1b 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -251,7 +251,7 @@ static void __global_reg_poll(struct global_reg_snapshot *gp)
 	}
 }
 
-void __trigger_all_cpu_backtrace(void)
+void arch_trigger_all_cpu_backtrace(void)
 {
 	struct thread_info *tp = current_thread_info();
 	struct pt_regs *regs = get_irq_regs();
@@ -304,7 +304,7 @@ void __trigger_all_cpu_backtrace(void)
 
 static void sysrq_handle_globreg(int key, struct tty_struct *tty)
 {
-	__trigger_all_cpu_backtrace();
+	arch_trigger_all_cpu_backtrace();
 }
 
 static struct sysrq_key_op sparc_globalreg_op = {
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index c86e5ed4af5..e63cf7d441e 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -45,8 +45,8 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
 			void __user *, size_t *, loff_t *);
 extern int unknown_nmi_panic;
 
-void __trigger_all_cpu_backtrace(void);
-#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace()
+void arch_trigger_all_cpu_backtrace(void);
+#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
 
 static inline void localise_nmi_watchdog(void)
 {
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index 1bb1ac20e9e..db7220220d0 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -554,7 +554,7 @@ int do_nmi_callback(struct pt_regs *regs, int cpu)
 	return 0;
 }
 
-void __trigger_all_cpu_backtrace(void)
+void arch_trigger_all_cpu_backtrace(void)
 {
 	int i;
 
-- 
cgit v1.2.3


From ed8d9adf357ec331603fa1049510399812cea7e5 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 3 Aug 2009 14:08:48 +0900
Subject: x86, percpu: Add 'percpu_read_stable()' interface for cacheable
 accesses

This is very useful for some common things like 'get_current()' and
'get_thread_info()', which can be used multiple times in a function, and
where the result is cacheable.

tj: Added the magical undocumented "P" modifier to UP __percpu_arg()
    to force gcc to dereference the pointer value passed in via the
    "p" input constraint.  Without this, percpu_read_stable() returns
    the address of the percpu variable.  Also added comment explaining
    the difference between percpu_read() and percpu_read_stable().

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/current.h     |  2 +-
 arch/x86/include/asm/percpu.h      | 26 +++++++++++++++++++-------
 arch/x86/include/asm/thread_info.h |  2 +-
 3 files changed, 21 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index c68c361697e..4d447b732d8 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -11,7 +11,7 @@ DECLARE_PER_CPU(struct task_struct *, current_task);
 
 static __always_inline struct task_struct *get_current(void)
 {
-	return percpu_read(current_task);
+	return percpu_read_stable(current_task);
 }
 
 #define current get_current()
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 103f1ddb0d8..04eacefcfd2 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -49,7 +49,7 @@
 #define __percpu_arg(x)		"%%"__stringify(__percpu_seg)":%P" #x
 #define __my_cpu_offset		percpu_read(this_cpu_off)
 #else
-#define __percpu_arg(x)		"%" #x
+#define __percpu_arg(x)		"%P" #x
 #endif
 
 /*
@@ -104,36 +104,48 @@ do {							\
 	}						\
 } while (0)
 
-#define percpu_from_op(op, var)				\
+#define percpu_from_op(op, var, constraint)		\
 ({							\
 	typeof(var) ret__;				\
 	switch (sizeof(var)) {				\
 	case 1:						\
 		asm(op "b "__percpu_arg(1)",%0"		\
 		    : "=q" (ret__)			\
-		    : "m" (var));			\
+		    : constraint);			\
 		break;					\
 	case 2:						\
 		asm(op "w "__percpu_arg(1)",%0"		\
 		    : "=r" (ret__)			\
-		    : "m" (var));			\
+		    : constraint);			\
 		break;					\
 	case 4:						\
 		asm(op "l "__percpu_arg(1)",%0"		\
 		    : "=r" (ret__)			\
-		    : "m" (var));			\
+		    : constraint);			\
 		break;					\
 	case 8:						\
 		asm(op "q "__percpu_arg(1)",%0"		\
 		    : "=r" (ret__)			\
-		    : "m" (var));			\
+		    : constraint);			\
 		break;					\
 	default: __bad_percpu_size();			\
 	}						\
 	ret__;						\
 })
 
-#define percpu_read(var)	percpu_from_op("mov", per_cpu__##var)
+/*
+ * percpu_read() makes gcc load the percpu variable every time it is
+ * accessed while percpu_read_stable() allows the value to be cached.
+ * percpu_read_stable() is more efficient and can be used if its value
+ * is guaranteed to be valid across cpus.  The current users include
+ * get_current() and get_thread_info() both of which are actually
+ * per-thread variables implemented as per-cpu variables and thus
+ * stable for the duration of the respective task.
+ */
+#define percpu_read(var)	percpu_from_op("mov", per_cpu__##var,	\
+					       "m" (per_cpu__##var))
+#define percpu_read_stable(var)	percpu_from_op("mov", per_cpu__##var,	\
+					       "p" (&per_cpu__##var))
 #define percpu_write(var, val)	percpu_to_op("mov", per_cpu__##var, val)
 #define percpu_add(var, val)	percpu_to_op("add", per_cpu__##var, val)
 #define percpu_sub(var, val)	percpu_to_op("sub", per_cpu__##var, val)
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index fad7d40b75f..a1bb5a114bf 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -213,7 +213,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
 static inline struct thread_info *current_thread_info(void)
 {
 	struct thread_info *ti;
-	ti = (void *)(percpu_read(kernel_stack) +
+	ti = (void *)(percpu_read_stable(kernel_stack) +
 		      KERNEL_STACK_OFFSET - THREAD_SIZE);
 	return ti;
 }
-- 
cgit v1.2.3


From 3e352aa8ee2bd48f1a19c7742810b3a4a7ba605e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 3 Aug 2009 14:10:11 +0900
Subject: x86, percpu: Fix DECLARE/DEFINE_PER_CPU_PAGE_ALIGNED()

DECLARE/DEFINE_PER_CPU_PAGE_ALIGNED() put percpu variables in
.page_aligned section without adding any alignment restrictions.
Currently, this doesn't cause any problem because all users of the
macros have explicit page alignment and page-sized but it's much safer
to enforce page alignment from the macros.  After all, it's what they
claim to do.

Add __aligned(PAGE_SIZE) to DECLARE/DEFINE_PER_CPU_PAGE_ALIGNED() and
drop explicit alignment from it users.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/common.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f1961c07af9..12493c5485e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1008,8 +1008,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
 };
 
 static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
-	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
-	__aligned(PAGE_SIZE);
+	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
 
 /* May not be marked __init: used by software suspend */
 void syscall_init(void)
-- 
cgit v1.2.3


From bdf977b37418cdf8a2252504779a7e12a09b7575 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 3 Aug 2009 14:12:19 +0900
Subject: x86, percpu: Collect hot percpu variables into one cacheline

On x86_64, percpu variables current_task and kernel_stack are used for
get_current() and current_thread_info() respectively and thus are
often used close to each other.  Move definition of current_task to
kernel/cpu/common.c right above kernel_stack definition and align it
to cacheline so that they always fall into the same cacheline.  Two
percpu variables defined there together - irq_stack_ptr and irq_count
- are also pretty hot and will benefit from sharing the cacheline.

For consistency, current_task definition for x86_32 is also moved to
kernel/cpu/common.c.

Putting current_task and kernel_stack into the same cacheline was
suggested by Linus Torvalds.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/common.c | 15 +++++++++++++--
 arch/x86/kernel/process_32.c |  3 ---
 arch/x86/kernel/process_64.c |  3 ---
 3 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 12493c5485e..1bd88ed978b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -987,13 +987,21 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
 DEFINE_PER_CPU_FIRST(union irq_stack_union,
 		     irq_stack_union) __aligned(PAGE_SIZE);
 
-DEFINE_PER_CPU(char *, irq_stack_ptr) =
-	init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
+/*
+ * The following four percpu variables are hot.  Align current_task to
+ * cacheline size such that all four fall in the same cacheline.
+ */
+DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
+	&init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
 
 DEFINE_PER_CPU(unsigned long, kernel_stack) =
 	(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
 EXPORT_PER_CPU_SYMBOL(kernel_stack);
 
+DEFINE_PER_CPU(char *, irq_stack_ptr) =
+	init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
+
 DEFINE_PER_CPU(unsigned int, irq_count) = -1;
 
 /*
@@ -1041,6 +1049,9 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist);
 
 #else	/* CONFIG_X86_64 */
 
+DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
+
 #ifdef CONFIG_CC_STACKPROTECTOR
 DEFINE_PER_CPU(unsigned long, stack_canary);
 #endif
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 59f4524984a..daa4107be3b 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -61,9 +61,6 @@
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
-DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
-
 /*
  * Return saved PC of a blocked thread.
  */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ebefb5407b9..c4c675d5ba1 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -55,9 +55,6 @@
 
 asmlinkage extern void ret_from_fork(void);
 
-DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
-
 DEFINE_PER_CPU(unsigned long, old_rsp);
 static DEFINE_PER_CPU(unsigned char, is_idle);
 
-- 
cgit v1.2.3


From c7bd0414d681706a32105895cae20fb9090db52e Mon Sep 17 00:00:00 2001
From: Frans Pop <elendil@planet.nl>
Date: Thu, 23 Jul 2009 20:56:27 +0200
Subject: x86: Simplify the Makefile in a minor way through use of cc-ifversion

Signed-off-by: Frans Pop <elendil@planet.nl>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Reviewed-by: WANG Cong <xiyou.wangcong@gmail.com>
LKML-Reference: <200907232056.28635.elendil@planet.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 1b68659c41b..1f3851a626d 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -32,8 +32,8 @@ ifeq ($(CONFIG_X86_32),y)
 
         # Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
         # a lot more stack due to the lack of sharing of stacklots:
-        KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then \
-                echo $(call cc-option,-fno-unit-at-a-time); fi ;)
+        KBUILD_CFLAGS += $(call cc-ifversion, -lt, 0400, \
+				$(call cc-option,-fno-unit-at-a-time))
 
         # CPU-specific tuning. Anything which can be shared with UML should go here.
         include $(srctree)/arch/x86/Makefile_32.cpu
-- 
cgit v1.2.3


From 54a0bf3c2cad3fd118ea725f26a493aece6ea01d Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Tue, 4 Aug 2009 15:52:38 +0200
Subject: Revert "x86: oprofile/op_model_amd.c set return values for
 op_amd_handle_ibs()"

This reverts commit 21e70878215f620fe99ea7d7c74bc641aeec932f.

Instead Andrew's patch will be applied he posted at the same time.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 827beecb67a..37d19c768d5 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -195,7 +195,7 @@ op_amd_handle_ibs(struct pt_regs * const regs,
 	struct op_entry entry;
 
 	if (!has_ibs)
-		return 0;
+		return 1;
 
 	if (ibs_config.fetch_enabled) {
 		rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
@@ -277,10 +277,7 @@ static void op_amd_stop_ibs(void)
 #else
 
 static inline int op_amd_handle_ibs(struct pt_regs * const regs,
-				    struct op_msrs const * const msrs)
-{
-	return 0;
-}
+				    struct op_msrs const * const msrs) { }
 static inline void op_amd_start_ibs(void) { }
 static inline void op_amd_stop_ibs(void) { }
 
-- 
cgit v1.2.3


From 4680e64a88c4ce2c4e736dade99233e3def13fa7 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 23 Jun 2009 12:36:08 -0700
Subject: arch/x86/oprofile/op_model_amd.c: fix op_amd_handle_ibs() return type

arch/x86/oprofile/op_model_amd.c: In function 'op_amd_handle_ibs':
arch/x86/oprofile/op_model_amd.c:217: warning: no return statement in function returning non-void

Fix this by making op_amd_handle_ibs() return void.

Cc: Robert Richter <robert.richter@amd.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 37d19c768d5..39686c29f03 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -187,7 +187,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 
 #ifdef CONFIG_OPROFILE_IBS
 
-static inline int
+static inline void
 op_amd_handle_ibs(struct pt_regs * const regs,
 		  struct op_msrs const * const msrs)
 {
@@ -195,7 +195,7 @@ op_amd_handle_ibs(struct pt_regs * const regs,
 	struct op_entry entry;
 
 	if (!has_ibs)
-		return 1;
+		return;
 
 	if (ibs_config.fetch_enabled) {
 		rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
@@ -241,8 +241,6 @@ op_amd_handle_ibs(struct pt_regs * const regs,
 			wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
 		}
 	}
-
-	return 1;
 }
 
 static inline void op_amd_start_ibs(void)
@@ -276,7 +274,7 @@ static void op_amd_stop_ibs(void)
 
 #else
 
-static inline int op_amd_handle_ibs(struct pt_regs * const regs,
+static inline void op_amd_handle_ibs(struct pt_regs * const regs,
 				    struct op_msrs const * const msrs) { }
 static inline void op_amd_start_ibs(void) { }
 static inline void op_amd_stop_ibs(void) { }
-- 
cgit v1.2.3


From 2977fb3ffc8493a2f4f0a362e8660a6cde9f1bb9 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Sat, 1 Aug 2009 11:47:59 +0400
Subject: x86, ioapic: Introduce for_each_irq_pin() helper

This allow us to save a few lines of code.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: yinghai@kernel.org
LKML-Reference: <20090801075435.597863129@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/io_apic.c | 43 +++++++++++++++---------------------------
 1 file changed, 15 insertions(+), 28 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 7e92a9212fd..ffd8fdfcbe4 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -66,6 +66,8 @@
 #include <asm/apic.h>
 
 #define __apicdebuginit(type) static type __init
+#define for_each_irq_pin(entry, head) \
+	for (entry = head; entry; entry = entry->next)
 
 /*
  *      Is the SiS APIC rmw bug present ?
@@ -410,7 +412,7 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
 	unsigned long flags;
 
 	spin_lock_irqsave(&ioapic_lock, flags);
-	for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
+	for_each_irq_pin(entry, cfg->irq_2_pin) {
 		unsigned int reg;
 		int pin;
 
@@ -490,22 +492,21 @@ static void ioapic_mask_entry(int apic, int pin)
  */
 static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
 {
-	struct irq_pin_list **entryp, *entry;
+	struct irq_pin_list **last, *entry;
 
-	for (entryp = &cfg->irq_2_pin;
-	     *entryp != NULL;
-	     entryp = &(*entryp)->next) {
-		entry = *entryp;
-		/* not again, please */
+	/* don't allow duplicates */
+	last = &cfg->irq_2_pin;
+	for_each_irq_pin(entry, cfg->irq_2_pin) {
 		if (entry->apic == apic && entry->pin == pin)
 			return;
+		last = &entry->next;
 	}
 
 	entry = get_one_free_irq_2_pin(node);
 	entry->apic = apic;
 	entry->pin = pin;
 
-	*entryp = entry;
+	*last = entry;
 }
 
 /*
@@ -517,7 +518,7 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
 {
 	struct irq_pin_list *entry;
 
-	for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
+	for_each_irq_pin(entry, cfg->irq_2_pin) {
 		if (entry->apic == oldapic && entry->pin == oldpin) {
 			entry->apic = newapic;
 			entry->pin = newpin;
@@ -537,7 +538,7 @@ static void io_apic_modify_irq(struct irq_cfg *cfg,
 	int pin;
 	struct irq_pin_list *entry;
 
-	for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
+	for_each_irq_pin(entry, cfg->irq_2_pin) {
 		unsigned int reg;
 		pin = entry->pin;
 		reg = io_apic_read(entry->apic, 0x10 + pin * 2);
@@ -1669,12 +1670,8 @@ __apicdebuginit(void) print_IO_APIC(void)
 		if (!entry)
 			continue;
 		printk(KERN_DEBUG "IRQ%d ", irq);
-		for (;;) {
+		for_each_irq_pin(entry, cfg->irq_2_pin)
 			printk("-> %d:%d", entry->apic, entry->pin);
-			if (!entry->next)
-				break;
-			entry = entry->next;
-		}
 		printk("\n");
 	}
 
@@ -2227,7 +2224,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
 	struct irq_pin_list *entry;
 	u8 vector = cfg->vector;
 
-	for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
+	for_each_irq_pin(entry, cfg->irq_2_pin) {
 		unsigned int reg;
 
 		apic = entry->apic;
@@ -2556,20 +2553,10 @@ static void ack_apic_level(unsigned int irq)
 #ifdef CONFIG_INTR_REMAP
 static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
 {
-	int apic, pin;
 	struct irq_pin_list *entry;
 
-	entry = cfg->irq_2_pin;
-	for (;;) {
-
-		if (!entry)
-			break;
-
-		apic = entry->apic;
-		pin = entry->pin;
-		io_apic_eoi(apic, pin);
-		entry = entry->next;
-	}
+	for_each_irq_pin(entry, cfg->irq_2_pin)
+		io_apic_eoi(entry->apic, entry->pin);
 }
 
 static void
-- 
cgit v1.2.3


From a7428cd2ef77734465e36bceb43290e37e2a97c6 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Sat, 1 Aug 2009 11:48:00 +0400
Subject: x86, ioapic: Throw BUG instead of NULL dereference

Instead of plain NULL deref we better throw error
message with a backtrace. Actually we need more
gracious error handling here. Meanwhile leave it
as is.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: yinghai@kernel.org
LKML-Reference: <20090801075435.769301745@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/io_apic.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ffd8fdfcbe4..2a145d3a837 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -503,6 +503,10 @@ static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin
 	}
 
 	entry = get_one_free_irq_2_pin(node);
+	if (!entry) {
+		printk(KERN_ERR "can not alloc irq_pin_list\n");
+		BUG_ON(1);
+	}
 	entry->apic = apic;
 	entry->pin = pin;
 
-- 
cgit v1.2.3


From 9910887af84e33ba98fd6792029470ae80166208 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Thu, 23 Jul 2009 00:52:59 +0400
Subject: x86, apic: Drop redundant bit assignment

cpu_has_apic has already investigated boot_cpu_data
X86_FEATURE_APIC bit for being clear if condition is
triggered.

So there is no need to clear this bit second time.

Signed-off-by: Cyrill Gorcuno v <gorcunov@openvz.org>
Cc: "Maciej W. Rozycki" <macro@linux-mips.org>
LKML-Reference: <20090722205259.GE15805@lenovo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/apic.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 0a1c2830ec6..0b021c56e82 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1651,7 +1651,6 @@ int __init APIC_init_uniprocessor(void)
 	    APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
 		pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
 			boot_cpu_physical_apicid);
-		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
 		return -1;
 	}
 #endif
-- 
cgit v1.2.3


From ce69a784504222c3ab6f1b3c357d09ec5772127a Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 20 Jul 2009 15:24:17 +0300
Subject: x86/apic: Enable x2APIC without interrupt remapping under KVM

KVM would like to provide x2APIC interface to a guest without emulating
interrupt remapping device. The reason KVM prefers guest to use x2APIC
is that x2APIC interface is better virtualizable and provides better
performance than mmio xAPIC interface:

 - msr exits are faster than mmio (no page table walk, emulation)
 - no need to read back ICR to look at the busy bit
 - one 64 bit ICR write instead of two 32 bit writes
 - shared code with the Hyper-V paravirt interface

Included patch changes x2APIC enabling logic to enable it even if IR
initialization failed, but kernel runs under KVM and no apic id is
greater than 255 (if there is one spec requires BIOS to move to x2apic
mode before starting an OS).

-v2: fix build
-v3: fix bug causing compiler warning

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Sheng Yang <sheng@linux.intel.com>
Cc: "avi@redhat.com" <avi@redhat.com>
LKML-Reference: <20090720122417.GR5638@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/apic.h     |  7 ++++
 arch/x86/kernel/apic/apic.c     | 83 +++++++++++++++++++++++------------------
 arch/x86/kernel/apic/probe_64.c |  6 +--
 3 files changed, 56 insertions(+), 40 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index bb7d4792584..586b7adb8e5 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -183,6 +183,10 @@ static inline int x2apic_enabled(void)
 }
 
 #define x2apic_supported()	(cpu_has_x2apic)
+static inline void x2apic_force_phys(void)
+{
+	x2apic_phys = 1;
+}
 #else
 static inline void check_x2apic(void)
 {
@@ -194,6 +198,9 @@ static inline int x2apic_enabled(void)
 {
 	return 0;
 }
+static inline void x2apic_force_phys(void)
+{
+}
 
 #define	x2apic_preenabled 0
 #define	x2apic_supported()	0
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 0b021c56e82..de039fcdd05 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -49,6 +49,7 @@
 #include <asm/mtrr.h>
 #include <asm/smp.h>
 #include <asm/mce.h>
+#include <asm/kvm_para.h>
 
 unsigned int num_processors;
 
@@ -1361,52 +1362,76 @@ void enable_x2apic(void)
 }
 #endif /* CONFIG_X86_X2APIC */
 
-void __init enable_IR_x2apic(void)
+int __init enable_IR(void)
 {
 #ifdef CONFIG_INTR_REMAP
 	int ret;
-	unsigned long flags;
-	struct IO_APIC_route_entry **ioapic_entries = NULL;
 
 	ret = dmar_table_init();
 	if (ret) {
 		pr_debug("dmar_table_init() failed with %d:\n", ret);
-		goto ir_failed;
+		return 0;
 	}
 
 	if (!intr_remapping_supported()) {
 		pr_debug("intr-remapping not supported\n");
-		goto ir_failed;
+		return 0;
 	}
 
-
 	if (!x2apic_preenabled && skip_ioapic_setup) {
 		pr_info("Skipped enabling intr-remap because of skipping "
 			"io-apic setup\n");
-		return;
+		return 0;
 	}
 
+	if (enable_intr_remapping(x2apic_supported()))
+		return 0;
+
+	pr_info("Enabled Interrupt-remapping\n");
+
+	return 1;
+
+#endif
+	return 0;
+}
+
+void __init enable_IR_x2apic(void)
+{
+	unsigned long flags;
+	struct IO_APIC_route_entry **ioapic_entries = NULL;
+	int ret, x2apic_enabled = 0;
+
 	ioapic_entries = alloc_ioapic_entries();
 	if (!ioapic_entries) {
-		pr_info("Allocate ioapic_entries failed: %d\n", ret);
-		goto end;
+		pr_err("Allocate ioapic_entries failed\n");
+		goto out;
 	}
 
 	ret = save_IO_APIC_setup(ioapic_entries);
 	if (ret) {
 		pr_info("Saving IO-APIC state failed: %d\n", ret);
-		goto end;
+		goto out;
 	}
 
 	local_irq_save(flags);
-	mask_IO_APIC_setup(ioapic_entries);
 	mask_8259A();
+	mask_IO_APIC_setup(ioapic_entries);
 
-	ret = enable_intr_remapping(x2apic_supported());
-	if (ret)
-		goto end_restore;
+	ret = enable_IR();
+	if (!ret) {
+		/* IR is required if there is APIC ID > 255 even when running
+		 * under KVM
+		 */
+		if (max_physical_apicid > 255 || !kvm_para_available())
+			goto nox2apic;
+		/*
+		 * without IR all CPUs can be addressed by IOAPIC/MSI
+		 * only in physical mode
+		 */
+		x2apic_force_phys();
+	}
 
-	pr_info("Enabled Interrupt-remapping\n");
+	x2apic_enabled = 1;
 
 	if (x2apic_supported() && !x2apic_mode) {
 		x2apic_mode = 1;
@@ -1414,41 +1439,25 @@ void __init enable_IR_x2apic(void)
 		pr_info("Enabled x2apic\n");
 	}
 
-end_restore:
-	if (ret)
-		/*
-		 * IR enabling failed
-		 */
+nox2apic:
+	if (!ret) /* IR enabling failed */
 		restore_IO_APIC_setup(ioapic_entries);
-
 	unmask_8259A();
 	local_irq_restore(flags);
 
-end:
+out:
 	if (ioapic_entries)
 		free_ioapic_entries(ioapic_entries);
 
-	if (!ret)
+	if (x2apic_enabled)
 		return;
 
-ir_failed:
 	if (x2apic_preenabled)
-		panic("x2apic enabled by bios. But IR enabling failed");
+		panic("x2apic: enabled by BIOS but kernel init failed.");
 	else if (cpu_has_x2apic)
-		pr_info("Not enabling x2apic,Intr-remapping\n");
-#else
-	if (!cpu_has_x2apic)
-		return;
-
-	if (x2apic_preenabled)
-		panic("x2apic enabled prior OS handover,"
-		      " enable CONFIG_X86_X2APIC, CONFIG_INTR_REMAP");
-#endif
-
-	return;
+		pr_info("Not enabling x2apic, Intr-remapping init failed.\n");
 }
 
-
 #ifdef CONFIG_X86_64
 /*
  * Detect and enable local APICs on non-SMP boards.
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index bc3e880f9b8..f3b1037076e 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -50,11 +50,11 @@ static struct apic *apic_probe[] __initdata = {
 void __init default_setup_apic_routing(void)
 {
 #ifdef CONFIG_X86_X2APIC
-	if (x2apic_mode && (apic != &apic_x2apic_phys &&
+	if (x2apic_mode
 #ifdef CONFIG_X86_UV
-		       apic != &apic_x2apic_uv_x &&
+		       && apic != &apic_x2apic_uv_x
 #endif
-		       apic != &apic_x2apic_cluster)) {
+		       ) {
 		if (x2apic_phys)
 			apic = &apic_x2apic_phys;
 		else
-- 
cgit v1.2.3


From 1e2c8d830a74c24ccb25601bd89dcd0dbcb4e9f5 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <florian@openwrt.org>
Date: Tue, 4 Aug 2009 10:52:47 +0000
Subject: ar7: add fixed PHY support for the two on-board cpmac

This patch adds fixed PHY support for the two on-chip
cpmac Ethernet adapters.

Signed-off-by: Florian Fainelli <florian@openwrt.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/mips/ar7/platform.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c
index 54224496178..c4737ce6d29 100644
--- a/arch/mips/ar7/platform.c
+++ b/arch/mips/ar7/platform.c
@@ -33,6 +33,8 @@
 #include <linux/leds.h>
 #include <linux/string.h>
 #include <linux/etherdevice.h>
+#include <linux/phy.h>
+#include <linux/phy_fixed.h>
 
 #include <asm/addrspace.h>
 #include <asm/mach-ar7/ar7.h>
@@ -209,6 +211,12 @@ static struct physmap_flash_data physmap_flash_data = {
 	.width = 2,
 };
 
+static struct fixed_phy_status fixed_phy_status __initdata = {
+	.link = 1,
+	.speed = 100,
+	.duplex = 1,
+};
+
 static struct plat_cpmac_data cpmac_low_data = {
 	.reset_bit = 17,
 	.power_bit = 20,
@@ -530,6 +538,9 @@ static int __init ar7_register_devices(void)
 	}
 
 	if (ar7_has_high_cpmac()) {
+		res = fixed_phy_add(PHY_POLL, cpmac_high.id, &fixed_phy_status);
+		if (res && res != -ENODEV)
+			return res;
 		cpmac_get_mac(1, cpmac_high_data.dev_addr);
 		res = platform_device_register(&cpmac_high);
 		if (res)
@@ -538,6 +549,10 @@ static int __init ar7_register_devices(void)
 		cpmac_low_data.phy_mask = 0xffffffff;
 	}
 
+	res = fixed_phy_add(PHY_POLL, cpmac_low.id, &fixed_phy_status);
+	if (res && res != -ENODEV)
+		return res;
+
 	cpmac_get_mac(0, cpmac_low_data.dev_addr);
 	res = platform_device_register(&cpmac_low);
 	if (res)
-- 
cgit v1.2.3


From 49c794e94649020248e37b78db16cd25bad38b4f Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 4 Aug 2009 07:28:28 +0000
Subject: net: implement a SO_PROTOCOL getsockoption

Similar to SO_TYPE returning the socket type, SO_PROTOCOL allows to
retrieve the protocol used with a given socket.

I am not quite sure why we have that-many copies of socket.h, and why
the values are not the same on all arches either, but for where hex
numbers dominate, I use 0x1029 for SO_PROTOCOL as that seems to be
the next free unused number across a bunch of operating systems, or
so Google results make me want to believe. SO_PROTOCOL for others
just uses the next free Linux number, 38.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/alpha/include/asm/socket.h      | 1 +
 arch/arm/include/asm/socket.h        | 2 ++
 arch/avr32/include/asm/socket.h      | 2 ++
 arch/cris/include/asm/socket.h       | 2 ++
 arch/frv/include/asm/socket.h        | 2 ++
 arch/h8300/include/asm/socket.h      | 2 ++
 arch/ia64/include/asm/socket.h       | 2 ++
 arch/m32r/include/asm/socket.h       | 2 ++
 arch/m68k/include/asm/socket.h       | 2 ++
 arch/microblaze/include/asm/socket.h | 2 ++
 arch/mips/include/asm/socket.h       | 1 +
 arch/mn10300/include/asm/socket.h    | 2 ++
 arch/parisc/include/asm/socket.h     | 1 +
 arch/powerpc/include/asm/socket.h    | 2 ++
 arch/s390/include/asm/socket.h       | 2 ++
 arch/sparc/include/asm/socket.h      | 2 ++
 arch/x86/include/asm/socket.h        | 2 ++
 arch/xtensa/include/asm/socket.h     | 2 ++
 18 files changed, 33 insertions(+)

(limited to 'arch')

diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h
index 3641ec1452f..2f8b4d37774 100644
--- a/arch/alpha/include/asm/socket.h
+++ b/arch/alpha/include/asm/socket.h
@@ -32,6 +32,7 @@
 #define	SO_RCVTIMEO	0x1012
 #define	SO_SNDTIMEO	0x1013
 #define SO_ACCEPTCONN	0x1014
+#define SO_PROTOCOL	0x1028
 
 /* linux-specific, might as well be the same as on i386 */
 #define SO_NO_CHECK	11
diff --git a/arch/arm/include/asm/socket.h b/arch/arm/include/asm/socket.h
index 537de4e0ef5..7f47454ffbf 100644
--- a/arch/arm/include/asm/socket.h
+++ b/arch/arm/include/asm/socket.h
@@ -57,4 +57,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/avr32/include/asm/socket.h b/arch/avr32/include/asm/socket.h
index 04c86061970..6af2866a4f0 100644
--- a/arch/avr32/include/asm/socket.h
+++ b/arch/avr32/include/asm/socket.h
@@ -57,4 +57,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* __ASM_AVR32_SOCKET_H */
diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/asm/socket.h
index d5cf7400540..f3859fb0990 100644
--- a/arch/cris/include/asm/socket.h
+++ b/arch/cris/include/asm/socket.h
@@ -59,6 +59,8 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* _ASM_SOCKET_H */
 
 
diff --git a/arch/frv/include/asm/socket.h b/arch/frv/include/asm/socket.h
index 57c3d4054e8..8dab3486ffa 100644
--- a/arch/frv/include/asm/socket.h
+++ b/arch/frv/include/asm/socket.h
@@ -57,5 +57,7 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* _ASM_SOCKET_H */
 
diff --git a/arch/h8300/include/asm/socket.h b/arch/h8300/include/asm/socket.h
index 602518a70a1..ba770d09cd6 100644
--- a/arch/h8300/include/asm/socket.h
+++ b/arch/h8300/include/asm/socket.h
@@ -57,4 +57,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/ia64/include/asm/socket.h b/arch/ia64/include/asm/socket.h
index 745421225ec..091cd9d47d0 100644
--- a/arch/ia64/include/asm/socket.h
+++ b/arch/ia64/include/asm/socket.h
@@ -66,4 +66,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/asm/socket.h b/arch/m32r/include/asm/socket.h
index be7ed589af5..d36f5928fb7 100644
--- a/arch/m32r/include/asm/socket.h
+++ b/arch/m32r/include/asm/socket.h
@@ -57,4 +57,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/m68k/include/asm/socket.h b/arch/m68k/include/asm/socket.h
index ca87f938b03..060cb7ed024 100644
--- a/arch/m68k/include/asm/socket.h
+++ b/arch/m68k/include/asm/socket.h
@@ -57,4 +57,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/microblaze/include/asm/socket.h b/arch/microblaze/include/asm/socket.h
index 82593686031..96bf8bfa935 100644
--- a/arch/microblaze/include/asm/socket.h
+++ b/arch/microblaze/include/asm/socket.h
@@ -66,4 +66,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* _ASM_MICROBLAZE_SOCKET_H */
diff --git a/arch/mips/include/asm/socket.h b/arch/mips/include/asm/socket.h
index 2abca178016..289ce5f5f2a 100644
--- a/arch/mips/include/asm/socket.h
+++ b/arch/mips/include/asm/socket.h
@@ -42,6 +42,7 @@ To add: #define SO_REUSEPORT 0x0200	/* Allow local address and port reuse.  */
 #define SO_SNDTIMEO	0x1005	/* send timeout */
 #define SO_RCVTIMEO 	0x1006	/* receive timeout */
 #define SO_ACCEPTCONN	0x1009
+#define SO_PROTOCOL	0x1028	/* protocol type */
 
 /* linux-specific, might as well be the same as on i386 */
 #define SO_NO_CHECK	11
diff --git a/arch/mn10300/include/asm/socket.h b/arch/mn10300/include/asm/socket.h
index fb5daf438ec..19d7cf709b7 100644
--- a/arch/mn10300/include/asm/socket.h
+++ b/arch/mn10300/include/asm/socket.h
@@ -57,4 +57,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h
index 885472bf7b7..a658b09df62 100644
--- a/arch/parisc/include/asm/socket.h
+++ b/arch/parisc/include/asm/socket.h
@@ -24,6 +24,7 @@
 #define SO_RCVTIMEO	0x1006
 #define SO_ERROR	0x1007
 #define SO_TYPE		0x1008
+#define SO_PROTOCOL	0x1028
 #define SO_PEERNAME	0x2000
 
 #define SO_NO_CHECK	0x400b
diff --git a/arch/powerpc/include/asm/socket.h b/arch/powerpc/include/asm/socket.h
index 1e5cfad0e3f..609049d7117 100644
--- a/arch/powerpc/include/asm/socket.h
+++ b/arch/powerpc/include/asm/socket.h
@@ -64,4 +64,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif	/* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h
index 02330c50241..65baa9a83ab 100644
--- a/arch/s390/include/asm/socket.h
+++ b/arch/s390/include/asm/socket.h
@@ -65,4 +65,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/asm/socket.h b/arch/sparc/include/asm/socket.h
index 982a12f959f..9cbbfafd053 100644
--- a/arch/sparc/include/asm/socket.h
+++ b/arch/sparc/include/asm/socket.h
@@ -29,6 +29,8 @@
 #define SO_RCVBUFFORCE	0x100b
 #define SO_ERROR	0x1007
 #define SO_TYPE		0x1008
+#define SO_PROTOCOL	0x1028
+
 
 /* Linux specific, keep the same. */
 #define SO_NO_CHECK	0x000b
diff --git a/arch/x86/include/asm/socket.h b/arch/x86/include/asm/socket.h
index ca8bf2cd0ba..1077d2535a3 100644
--- a/arch/x86/include/asm/socket.h
+++ b/arch/x86/include/asm/socket.h
@@ -57,4 +57,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* _ASM_X86_SOCKET_H */
diff --git a/arch/xtensa/include/asm/socket.h b/arch/xtensa/include/asm/socket.h
index dd1a7a4a1ce..e47f172142f 100644
--- a/arch/xtensa/include/asm/socket.h
+++ b/arch/xtensa/include/asm/socket.h
@@ -68,4 +68,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif	/* _XTENSA_SOCKET_H */
-- 
cgit v1.2.3


From 0d6038ee76f2e06b79d0465807f67e86bf4025de Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 4 Aug 2009 07:28:29 +0000
Subject: net: implement a SO_DOMAIN getsockoption
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This sockopt goes in line with SO_TYPE and SO_PROTOCOL. It makes it
possible for userspace programs to pass around file descriptors — I
am referring to arguments-to-functions, but it may even work for the
fd passing over UNIX sockets — without needing to also pass the
auxiliary information (PF_INET6/IPPROTO_TCP).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/alpha/include/asm/socket.h      | 1 +
 arch/arm/include/asm/socket.h        | 1 +
 arch/avr32/include/asm/socket.h      | 1 +
 arch/cris/include/asm/socket.h       | 1 +
 arch/frv/include/asm/socket.h        | 1 +
 arch/h8300/include/asm/socket.h      | 1 +
 arch/ia64/include/asm/socket.h       | 1 +
 arch/m32r/include/asm/socket.h       | 1 +
 arch/m68k/include/asm/socket.h       | 1 +
 arch/microblaze/include/asm/socket.h | 1 +
 arch/mips/include/asm/socket.h       | 1 +
 arch/mn10300/include/asm/socket.h    | 1 +
 arch/parisc/include/asm/socket.h     | 1 +
 arch/powerpc/include/asm/socket.h    | 1 +
 arch/s390/include/asm/socket.h       | 1 +
 arch/sparc/include/asm/socket.h      | 1 +
 arch/x86/include/asm/socket.h        | 1 +
 arch/xtensa/include/asm/socket.h     | 1 +
 18 files changed, 18 insertions(+)

(limited to 'arch')

diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h
index 2f8b4d37774..26773e3246e 100644
--- a/arch/alpha/include/asm/socket.h
+++ b/arch/alpha/include/asm/socket.h
@@ -33,6 +33,7 @@
 #define	SO_SNDTIMEO	0x1013
 #define SO_ACCEPTCONN	0x1014
 #define SO_PROTOCOL	0x1028
+#define SO_DOMAIN	0x1029
 
 /* linux-specific, might as well be the same as on i386 */
 #define SO_NO_CHECK	11
diff --git a/arch/arm/include/asm/socket.h b/arch/arm/include/asm/socket.h
index 7f47454ffbf..92ac61d294f 100644
--- a/arch/arm/include/asm/socket.h
+++ b/arch/arm/include/asm/socket.h
@@ -58,5 +58,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/avr32/include/asm/socket.h b/arch/avr32/include/asm/socket.h
index 6af2866a4f0..fe863f9794d 100644
--- a/arch/avr32/include/asm/socket.h
+++ b/arch/avr32/include/asm/socket.h
@@ -58,5 +58,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* __ASM_AVR32_SOCKET_H */
diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/asm/socket.h
index f3859fb0990..45ec49bdb7b 100644
--- a/arch/cris/include/asm/socket.h
+++ b/arch/cris/include/asm/socket.h
@@ -60,6 +60,7 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* _ASM_SOCKET_H */
 
diff --git a/arch/frv/include/asm/socket.h b/arch/frv/include/asm/socket.h
index 8dab3486ffa..2dea726095c 100644
--- a/arch/frv/include/asm/socket.h
+++ b/arch/frv/include/asm/socket.h
@@ -58,6 +58,7 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* _ASM_SOCKET_H */
 
diff --git a/arch/h8300/include/asm/socket.h b/arch/h8300/include/asm/socket.h
index ba770d09cd6..1547f01c8e2 100644
--- a/arch/h8300/include/asm/socket.h
+++ b/arch/h8300/include/asm/socket.h
@@ -58,5 +58,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/ia64/include/asm/socket.h b/arch/ia64/include/asm/socket.h
index 091cd9d47d0..0b0d5ff062e 100644
--- a/arch/ia64/include/asm/socket.h
+++ b/arch/ia64/include/asm/socket.h
@@ -67,5 +67,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/asm/socket.h b/arch/m32r/include/asm/socket.h
index d36f5928fb7..3390a864f22 100644
--- a/arch/m32r/include/asm/socket.h
+++ b/arch/m32r/include/asm/socket.h
@@ -58,5 +58,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/m68k/include/asm/socket.h b/arch/m68k/include/asm/socket.h
index 060cb7ed024..eee01cce921 100644
--- a/arch/m68k/include/asm/socket.h
+++ b/arch/m68k/include/asm/socket.h
@@ -58,5 +58,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/microblaze/include/asm/socket.h b/arch/microblaze/include/asm/socket.h
index 96bf8bfa935..7361ae7cfcd 100644
--- a/arch/microblaze/include/asm/socket.h
+++ b/arch/microblaze/include/asm/socket.h
@@ -67,5 +67,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* _ASM_MICROBLAZE_SOCKET_H */
diff --git a/arch/mips/include/asm/socket.h b/arch/mips/include/asm/socket.h
index 289ce5f5f2a..ae05accd9fe 100644
--- a/arch/mips/include/asm/socket.h
+++ b/arch/mips/include/asm/socket.h
@@ -43,6 +43,7 @@ To add: #define SO_REUSEPORT 0x0200	/* Allow local address and port reuse.  */
 #define SO_RCVTIMEO 	0x1006	/* receive timeout */
 #define SO_ACCEPTCONN	0x1009
 #define SO_PROTOCOL	0x1028	/* protocol type */
+#define SO_DOMAIN	0x1029	/* domain/socket family */
 
 /* linux-specific, might as well be the same as on i386 */
 #define SO_NO_CHECK	11
diff --git a/arch/mn10300/include/asm/socket.h b/arch/mn10300/include/asm/socket.h
index 19d7cf709b7..4df75af29d7 100644
--- a/arch/mn10300/include/asm/socket.h
+++ b/arch/mn10300/include/asm/socket.h
@@ -58,5 +58,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h
index a658b09df62..960b1e5d8e1 100644
--- a/arch/parisc/include/asm/socket.h
+++ b/arch/parisc/include/asm/socket.h
@@ -25,6 +25,7 @@
 #define SO_ERROR	0x1007
 #define SO_TYPE		0x1008
 #define SO_PROTOCOL	0x1028
+#define SO_DOMAIN	0x1029
 #define SO_PEERNAME	0x2000
 
 #define SO_NO_CHECK	0x400b
diff --git a/arch/powerpc/include/asm/socket.h b/arch/powerpc/include/asm/socket.h
index 609049d7117..3ab8b3e6feb 100644
--- a/arch/powerpc/include/asm/socket.h
+++ b/arch/powerpc/include/asm/socket.h
@@ -65,5 +65,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif	/* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h
index 65baa9a83ab..e42df89a0b8 100644
--- a/arch/s390/include/asm/socket.h
+++ b/arch/s390/include/asm/socket.h
@@ -66,5 +66,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/asm/socket.h b/arch/sparc/include/asm/socket.h
index 9cbbfafd053..3a5ae3d1208 100644
--- a/arch/sparc/include/asm/socket.h
+++ b/arch/sparc/include/asm/socket.h
@@ -30,6 +30,7 @@
 #define SO_ERROR	0x1007
 #define SO_TYPE		0x1008
 #define SO_PROTOCOL	0x1028
+#define SO_DOMAIN	0x1029
 
 
 /* Linux specific, keep the same. */
diff --git a/arch/x86/include/asm/socket.h b/arch/x86/include/asm/socket.h
index 1077d2535a3..b2a8c74f2d0 100644
--- a/arch/x86/include/asm/socket.h
+++ b/arch/x86/include/asm/socket.h
@@ -58,5 +58,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* _ASM_X86_SOCKET_H */
diff --git a/arch/xtensa/include/asm/socket.h b/arch/xtensa/include/asm/socket.h
index e47f172142f..beb3a6bdb61 100644
--- a/arch/xtensa/include/asm/socket.h
+++ b/arch/xtensa/include/asm/socket.h
@@ -69,5 +69,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif	/* _XTENSA_SOCKET_H */
-- 
cgit v1.2.3


From 07868b086cca784f4b532fc2ab574ec3a73b468a Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 29 Jul 2009 03:33:51 +0200
Subject: tracing/function-graph-tracer: Drop the useless nmi protection

The function graph tracer used to have a protection against NMI
while entering a function entry tracing. But this is useless now,
this tracer is reentrant and the ring buffer supports the NMI tracing.
We can then drop this protection.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/ftrace.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index d94e1ea3b9f..8e9663413b7 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -417,10 +417,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 	unsigned long return_hooker = (unsigned long)
 				&return_to_handler;
 
-	/* Nmi's are currently unsupported */
-	if (unlikely(in_nmi()))
-		return;
-
 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
 		return;
 
-- 
cgit v1.2.3


From c12abc012e18b362204345c323536f228d65c4db Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jhnikula@gmail.com>
Date: Fri, 7 Aug 2009 09:59:47 +0300
Subject: ARM: OMAP: McBSP: Fix ASoC on OMAP1510 by fixing API of
 omap_mcbsp_start/stop

Simultaneous audio playback and capture on OMAP1510 can cause that second
stream is stalled if there is enough delay between startup of the audio
streams.

Current implementation of the omap_mcbsp_start is starting both transmitter
and receiver at the same time and it is called only for firstly started
audio stream from the OMAP McBSP based ASoC DAI driver.

Since DMA request lines on OMAP1510 are edge sensitive, the DMA request is
missed if there is no DMA transfer set up at that time when the first word
after McBSP startup is transmitted. The problem hasn't noted before since
later OMAPs are using level sensitive DMA request lines.

Fix the problem by changing API of omap_mcbsp_start and omap_mcbsp_stop by
allowing to start and stop individually McBSP transmitter and receiver
logics. Then call those functions individually for both audio playback
and capture streams. This ensures that DMA transfer is setup before
transmitter or receiver is started.

Thanks to Janusz Krzysztofik <jkrzyszt@tis.icnet.pl> for detailed problem
analysis and Peter Ujfalusi <peter.ujfalusi@nokia.com> for info about DMA
request line behavior differences between the OMAP generations.

Reported-and-tested-by: Janusz Krzysztofik <jkrzyszt@tis.icnet.pl>
Signed-off-by: Jarkko Nikula <jhnikula@gmail.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Acked-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 arch/arm/plat-omap/include/mach/mcbsp.h |  4 +--
 arch/arm/plat-omap/mcbsp.c              | 50 +++++++++++++++++++++------------
 2 files changed, 34 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/plat-omap/include/mach/mcbsp.h b/arch/arm/plat-omap/include/mach/mcbsp.h
index bb154ea7676..57249bb1e9b 100644
--- a/arch/arm/plat-omap/include/mach/mcbsp.h
+++ b/arch/arm/plat-omap/include/mach/mcbsp.h
@@ -387,8 +387,8 @@ void omap_mcbsp_register_board_cfg(struct omap_mcbsp_platform_data *config,
 void omap_mcbsp_config(unsigned int id, const struct omap_mcbsp_reg_cfg * config);
 int omap_mcbsp_request(unsigned int id);
 void omap_mcbsp_free(unsigned int id);
-void omap_mcbsp_start(unsigned int id);
-void omap_mcbsp_stop(unsigned int id);
+void omap_mcbsp_start(unsigned int id, int tx, int rx);
+void omap_mcbsp_stop(unsigned int id, int tx, int rx);
 void omap_mcbsp_xmit_word(unsigned int id, u32 word);
 u32 omap_mcbsp_recv_word(unsigned int id);
 
diff --git a/arch/arm/plat-omap/mcbsp.c b/arch/arm/plat-omap/mcbsp.c
index efa0e0111f3..a3d2313460b 100644
--- a/arch/arm/plat-omap/mcbsp.c
+++ b/arch/arm/plat-omap/mcbsp.c
@@ -328,14 +328,15 @@ void omap_mcbsp_free(unsigned int id)
 EXPORT_SYMBOL(omap_mcbsp_free);
 
 /*
- * Here we start the McBSP, by enabling the sample
- * generator, both transmitter and receivers,
- * and the frame sync.
+ * Here we start the McBSP, by enabling transmitter, receiver or both.
+ * If no transmitter or receiver is active prior calling, then sample-rate
+ * generator and frame sync are started.
  */
-void omap_mcbsp_start(unsigned int id)
+void omap_mcbsp_start(unsigned int id, int tx, int rx)
 {
 	struct omap_mcbsp *mcbsp;
 	void __iomem *io_base;
+	int idle;
 	u16 w;
 
 	if (!omap_mcbsp_check_valid_id(id)) {
@@ -348,32 +349,40 @@ void omap_mcbsp_start(unsigned int id)
 	mcbsp->rx_word_length = (OMAP_MCBSP_READ(io_base, RCR1) >> 5) & 0x7;
 	mcbsp->tx_word_length = (OMAP_MCBSP_READ(io_base, XCR1) >> 5) & 0x7;
 
-	/* Start the sample generator */
-	w = OMAP_MCBSP_READ(io_base, SPCR2);
-	OMAP_MCBSP_WRITE(io_base, SPCR2, w | (1 << 6));
+	idle = !((OMAP_MCBSP_READ(io_base, SPCR2) |
+		  OMAP_MCBSP_READ(io_base, SPCR1)) & 1);
+
+	if (idle) {
+		/* Start the sample generator */
+		w = OMAP_MCBSP_READ(io_base, SPCR2);
+		OMAP_MCBSP_WRITE(io_base, SPCR2, w | (1 << 6));
+	}
 
 	/* Enable transmitter and receiver */
 	w = OMAP_MCBSP_READ(io_base, SPCR2);
-	OMAP_MCBSP_WRITE(io_base, SPCR2, w | 1);
+	OMAP_MCBSP_WRITE(io_base, SPCR2, w | (tx & 1));
 
 	w = OMAP_MCBSP_READ(io_base, SPCR1);
-	OMAP_MCBSP_WRITE(io_base, SPCR1, w | 1);
+	OMAP_MCBSP_WRITE(io_base, SPCR1, w | (rx & 1));
 
 	udelay(100);
 
-	/* Start frame sync */
-	w = OMAP_MCBSP_READ(io_base, SPCR2);
-	OMAP_MCBSP_WRITE(io_base, SPCR2, w | (1 << 7));
+	if (idle) {
+		/* Start frame sync */
+		w = OMAP_MCBSP_READ(io_base, SPCR2);
+		OMAP_MCBSP_WRITE(io_base, SPCR2, w | (1 << 7));
+	}
 
 	/* Dump McBSP Regs */
 	omap_mcbsp_dump_reg(id);
 }
 EXPORT_SYMBOL(omap_mcbsp_start);
 
-void omap_mcbsp_stop(unsigned int id)
+void omap_mcbsp_stop(unsigned int id, int tx, int rx)
 {
 	struct omap_mcbsp *mcbsp;
 	void __iomem *io_base;
+	int idle;
 	u16 w;
 
 	if (!omap_mcbsp_check_valid_id(id)) {
@@ -386,15 +395,20 @@ void omap_mcbsp_stop(unsigned int id)
 
 	/* Reset transmitter */
 	w = OMAP_MCBSP_READ(io_base, SPCR2);
-	OMAP_MCBSP_WRITE(io_base, SPCR2, w & ~(1));
+	OMAP_MCBSP_WRITE(io_base, SPCR2, w & ~(tx & 1));
 
 	/* Reset receiver */
 	w = OMAP_MCBSP_READ(io_base, SPCR1);
-	OMAP_MCBSP_WRITE(io_base, SPCR1, w & ~(1));
+	OMAP_MCBSP_WRITE(io_base, SPCR1, w & ~(rx & 1));
 
-	/* Reset the sample rate generator */
-	w = OMAP_MCBSP_READ(io_base, SPCR2);
-	OMAP_MCBSP_WRITE(io_base, SPCR2, w & ~(1 << 6));
+	idle = !((OMAP_MCBSP_READ(io_base, SPCR2) |
+		  OMAP_MCBSP_READ(io_base, SPCR1)) & 1);
+
+	if (idle) {
+		/* Reset the sample rate generator */
+		w = OMAP_MCBSP_READ(io_base, SPCR2);
+		OMAP_MCBSP_WRITE(io_base, SPCR2, w & ~(1 << 6));
+	}
 }
 EXPORT_SYMBOL(omap_mcbsp_stop);
 
-- 
cgit v1.2.3


From f3d1915a8623b9248572d3ee44e19a80b7a3520b Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Thu, 6 Aug 2009 00:09:31 +0400
Subject: x86, ioapic: Panic on irq-pin binding only if needed

Though the most time we are to panic on irq-pin allocation
fails, for PCI interrupts it's not the case and we could
continue operate even if irq-pin allocation failed.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <20090805200931.GB5319@lenovo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/io_apic.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 2a145d3a837..2999f3dd588 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -490,7 +490,8 @@ static void ioapic_mask_entry(int apic, int pin)
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
+static int
+add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin)
 {
 	struct irq_pin_list **last, *entry;
 
@@ -498,19 +499,27 @@ static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin
 	last = &cfg->irq_2_pin;
 	for_each_irq_pin(entry, cfg->irq_2_pin) {
 		if (entry->apic == apic && entry->pin == pin)
-			return;
+			return 0;
 		last = &entry->next;
 	}
 
 	entry = get_one_free_irq_2_pin(node);
 	if (!entry) {
-		printk(KERN_ERR "can not alloc irq_pin_list\n");
-		BUG_ON(1);
+		printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n",
+				node, apic, pin);
+		return -ENOMEM;
 	}
 	entry->apic = apic;
 	entry->pin = pin;
 
 	*last = entry;
+	return 0;
+}
+
+static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
+{
+	if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin))
+		panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
 }
 
 /*
@@ -3843,7 +3852,11 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq,
 	 */
 	if (irq >= NR_IRQS_LEGACY) {
 		cfg = desc->chip_data;
-		add_pin_to_irq_node(cfg, node, ioapic, pin);
+		if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
+			printk(KERN_INFO "can not add pin %d for irq %d\n",
+				pin, irq);
+			return 0;
+		}
 	}
 
 	setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
-- 
cgit v1.2.3


From 1e5de18278e6862f4198412b5059a03770fa816a Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Sun, 19 Jul 2009 00:12:20 +0900
Subject: x86: Introduce GDT_ENTRY_INIT()

GDT_ENTRY_INIT is static initializer of desc_struct.

We already have similar macro GDT_ENTRY() but it's static
initializer for u64 and it cannot be used for desc_struct.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
LKML-Reference: <20090718151219.GD11294@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/desc_defs.h      |  6 ++++++
 arch/x86/include/asm/lguest.h         |  5 +++--
 arch/x86/include/asm/stackprotector.h |  2 +-
 arch/x86/kernel/apm_32.c              |  2 +-
 arch/x86/kernel/cpu/common.c          | 40 +++++++++++++++++------------------
 5 files changed, 31 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
index a6adefa28b9..9d6684849fd 100644
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -34,6 +34,12 @@ struct desc_struct {
 	};
 } __attribute__((packed));
 
+#define GDT_ENTRY_INIT(flags, base, limit) { { { \
+		.a = ((limit) & 0xffff) | (((base) & 0xffff) << 16), \
+		.b = (((base) & 0xff0000) >> 16) | (((flags) & 0xf0ff) << 8) | \
+			((limit) & 0xf0000) | ((base) & 0xff000000), \
+	} } }
+
 enum {
 	GATE_INTERRUPT = 0xE,
 	GATE_TRAP = 0xF,
diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h
index 313389cd50d..94cd69858b1 100644
--- a/arch/x86/include/asm/lguest.h
+++ b/arch/x86/include/asm/lguest.h
@@ -91,8 +91,9 @@ static inline void lguest_set_ts(void)
 }
 
 /* Full 4G segment descriptors, suitable for CS and DS. */
-#define FULL_EXEC_SEGMENT ((struct desc_struct){ { {0x0000ffff, 0x00cf9b00} } })
-#define FULL_SEGMENT ((struct desc_struct){ { {0x0000ffff, 0x00cf9300} } })
+#define FULL_EXEC_SEGMENT \
+	((struct desc_struct)GDT_ENTRY_INIT(0xc09b, 0, 0xfffff))
+#define FULL_SEGMENT ((struct desc_struct)GDT_ENTRY_INIT(0xc093, 0, 0xfffff))
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index cdc5e0b126a..44efdff3975 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -48,7 +48,7 @@
  * head_32 for boot CPU and setup_per_cpu_areas() for others.
  */
 #define GDT_STACK_CANARY_INIT						\
-	[GDT_ENTRY_STACK_CANARY] = { { { 0x00000018, 0x00409000 } } },
+	[GDT_ENTRY_STACK_CANARY] = GDT_ENTRY_INIT(0x4090, 0, 0x18),
 
 /*
  * Initialize the stackprotector canary value.
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index b5e841bd60d..febb2dab254 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -403,7 +403,7 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
 static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
 static struct apm_user *user_list;
 static DEFINE_SPINLOCK(user_list_lock);
-static struct desc_struct bad_bios_desc = { { { 0, 0x00409200 } } };
+static struct desc_struct bad_bios_desc = GDT_ENTRY_INIT(0x4092, 0, 0);
 
 static const char driver_version[] = "1.16ac";	/* no spaces */
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f1961c07af9..8c9bc287f8f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -71,45 +71,45 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
 	 * TLS descriptors are currently at a different place compared to i386.
 	 * Hopefully nobody expects them at a fixed place (Wine?)
 	 */
-	[GDT_ENTRY_KERNEL32_CS]		= { { { 0x0000ffff, 0x00cf9b00 } } },
-	[GDT_ENTRY_KERNEL_CS]		= { { { 0x0000ffff, 0x00af9b00 } } },
-	[GDT_ENTRY_KERNEL_DS]		= { { { 0x0000ffff, 0x00cf9300 } } },
-	[GDT_ENTRY_DEFAULT_USER32_CS]	= { { { 0x0000ffff, 0x00cffb00 } } },
-	[GDT_ENTRY_DEFAULT_USER_DS]	= { { { 0x0000ffff, 0x00cff300 } } },
-	[GDT_ENTRY_DEFAULT_USER_CS]	= { { { 0x0000ffff, 0x00affb00 } } },
+	[GDT_ENTRY_KERNEL32_CS]		= GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
+	[GDT_ENTRY_KERNEL_CS]		= GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
+	[GDT_ENTRY_KERNEL_DS]		= GDT_ENTRY_INIT(0xc093, 0, 0xfffff),
+	[GDT_ENTRY_DEFAULT_USER32_CS]	= GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff),
+	[GDT_ENTRY_DEFAULT_USER_DS]	= GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff),
+	[GDT_ENTRY_DEFAULT_USER_CS]	= GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff),
 #else
-	[GDT_ENTRY_KERNEL_CS]		= { { { 0x0000ffff, 0x00cf9a00 } } },
-	[GDT_ENTRY_KERNEL_DS]		= { { { 0x0000ffff, 0x00cf9200 } } },
-	[GDT_ENTRY_DEFAULT_USER_CS]	= { { { 0x0000ffff, 0x00cffa00 } } },
-	[GDT_ENTRY_DEFAULT_USER_DS]	= { { { 0x0000ffff, 0x00cff200 } } },
+	[GDT_ENTRY_KERNEL_CS]		= GDT_ENTRY_INIT(0xc09a, 0, 0xfffff),
+	[GDT_ENTRY_KERNEL_DS]		= GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
+	[GDT_ENTRY_DEFAULT_USER_CS]	= GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff),
+	[GDT_ENTRY_DEFAULT_USER_DS]	= GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff),
 	/*
 	 * Segments used for calling PnP BIOS have byte granularity.
 	 * They code segments and data segments have fixed 64k limits,
 	 * the transfer segment sizes are set at run time.
 	 */
 	/* 32-bit code */
-	[GDT_ENTRY_PNPBIOS_CS32]	= { { { 0x0000ffff, 0x00409a00 } } },
+	[GDT_ENTRY_PNPBIOS_CS32]	= GDT_ENTRY_INIT(0x409a, 0, 0xffff),
 	/* 16-bit code */
-	[GDT_ENTRY_PNPBIOS_CS16]	= { { { 0x0000ffff, 0x00009a00 } } },
+	[GDT_ENTRY_PNPBIOS_CS16]	= GDT_ENTRY_INIT(0x009a, 0, 0xffff),
 	/* 16-bit data */
-	[GDT_ENTRY_PNPBIOS_DS]		= { { { 0x0000ffff, 0x00009200 } } },
+	[GDT_ENTRY_PNPBIOS_DS]		= GDT_ENTRY_INIT(0x0092, 0, 0xffff),
 	/* 16-bit data */
-	[GDT_ENTRY_PNPBIOS_TS1]		= { { { 0x00000000, 0x00009200 } } },
+	[GDT_ENTRY_PNPBIOS_TS1]		= GDT_ENTRY_INIT(0x0092, 0, 0),
 	/* 16-bit data */
-	[GDT_ENTRY_PNPBIOS_TS2]		= { { { 0x00000000, 0x00009200 } } },
+	[GDT_ENTRY_PNPBIOS_TS2]		= GDT_ENTRY_INIT(0x0092, 0, 0),
 	/*
 	 * The APM segments have byte granularity and their bases
 	 * are set at run time.  All have 64k limits.
 	 */
 	/* 32-bit code */
-	[GDT_ENTRY_APMBIOS_BASE]	= { { { 0x0000ffff, 0x00409a00 } } },
+	[GDT_ENTRY_APMBIOS_BASE]	= GDT_ENTRY_INIT(0x409a, 0, 0xffff),
 	/* 16-bit code */
-	[GDT_ENTRY_APMBIOS_BASE+1]	= { { { 0x0000ffff, 0x00009a00 } } },
+	[GDT_ENTRY_APMBIOS_BASE+1]	= GDT_ENTRY_INIT(0x009a, 0, 0xffff),
 	/* data */
-	[GDT_ENTRY_APMBIOS_BASE+2]	= { { { 0x0000ffff, 0x00409200 } } },
+	[GDT_ENTRY_APMBIOS_BASE+2]	= GDT_ENTRY_INIT(0x409a, 0, 0xffff),
 
-	[GDT_ENTRY_ESPFIX_SS]		= { { { 0x0000ffff, 0x00cf9200 } } },
-	[GDT_ENTRY_PERCPU]		= { { { 0x0000ffff, 0x00cf9200 } } },
+	[GDT_ENTRY_ESPFIX_SS]		= GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
+	[GDT_ENTRY_PERCPU]		= GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
 	GDT_STACK_CANARY_INIT
 #endif
 } };
-- 
cgit v1.2.3


From 72c4d8530244264317a662de9a55cc47e6c8e9df Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Aug 2009 08:47:07 +0200
Subject: x86: Introduce GDT_ENTRY_INIT(), fix APM

This crash:

[    0.891983] calling  cache_sysfs_init+0x0/0x1ee @ 1
[    0.897251] initcall cache_sysfs_init+0x0/0x1ee returned 0 after 405 usecs
[    0.904019] calling  mce_init_device+0x0/0x242 @ 1
[    0.909124] initcall mce_init_device+0x0/0x242 returned 0 after 347 usecs
[    0.915815] calling  apm_init+0x0/0x38d @ 1
[    0.919967] apm: BIOS version 1.2 Flags 0x07 (Driver version 1.16ac)
[    0.926813] general protection fault: 0000 [#1]
[    0.927269] last sysfs file:
[    0.927269] Modules linked in:
[    0.927269]
[    0.927269] Pid: 271, comm: kapmd Not tainted (2.6.31-rc3-00100-gd520da1-dirty #311) System Product Name
[    0.927269] EIP: 00c0:[<000082b2>] EFLAGS: 00010002 CPU: 0
[    0.927269] EIP is at 0x82b2
[    0.927269] EAX: 0000530e EBX: 00000000 ECX: 00000102 EDX: 00000000
[    0.927269] ESI: 00000000 EDI: f6a4bf44 EBP: 67890000 ESP: f6a4beec
[    0.927269]  DS: 00c8 ES: 0000 FS: 0000 GS: 0000 SS: 0068
[    0.927269] Process kapmd (pid: 271, ti=f6a4a000 task=f7142280 task.ti=f6a4a000)
[    0.927269] Stack:
[    0.927269]  0000828d 02160000 00b88092 f6a4bf3c c102a63d 00000060 f6a4bf3c f6a4bf44
[    0.927269] <0> 0000007b 0000007b 00000000 00000000 00000000 00000000 560aae9e 00000000
[    0.927269] <0> 00000200 f705fd74 00000000 c102af70 f6a4bf60 c102a6ec 0000530e 00000000
[    0.927269] Call Trace:
[    0.927269]  [<c102a63d>] ? __apm_bios_call_simple+0x7d/0x110
[    0.927269]  [<c102af70>] ? apm+0x0/0x6a0
[    0.927269]  [<c102a6ec>] ? apm_bios_call_simple+0x1c/0x50
[    0.927269]  [<c102b3f5>] ? apm+0x485/0x6a0
[    0.927269]  [<c1038e7a>] ? finish_task_switch+0x2a/0xb0
[    0.927269]  [<c164a69e>] ? schedule+0x31e/0x480
[    0.927269]  [<c102af70>] ? apm+0x0/0x6a0
[    0.927269]  [<c102af70>] ? apm+0x0/0x6a0
[    0.927269]  [<c1052654>] ? kthread+0x74/0x80
[    0.927269]  [<c10525e0>] ? kthread+0x0/0x80
[    0.927269]  [<c101d627>] ? kernel_thread_helper+0x7/0x10
[    0.927269] Code:  Bad EIP value.
[    0.927269] EIP: [<000082b2>] 0x82b2 SS:ESP 0068:f6a4beec
[    0.927269] ---[ end trace a7919e7f17c0a725 ]---
[    0.927269] Kernel panic - not syncing: Fatal exception
[    0.927269] Pid: 271, comm: kapmd Tainted: G      D    2.6.31-rc3-00100-gd520da1-dirty #311

Is caused by an incorrect GDT_ENTRY_INIT() conversion in the apm
code, as noticed by hpa.

Reported-by: Ingo Molnar <mingo@elte.hu>
Noticed-by: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
LKML-Reference: <20090808094905.GA2954@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8c9bc287f8f..b5764a26964 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -106,7 +106,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
 	/* 16-bit code */
 	[GDT_ENTRY_APMBIOS_BASE+1]	= GDT_ENTRY_INIT(0x009a, 0, 0xffff),
 	/* data */
-	[GDT_ENTRY_APMBIOS_BASE+2]	= GDT_ENTRY_INIT(0x409a, 0, 0xffff),
+	[GDT_ENTRY_APMBIOS_BASE+2]	= GDT_ENTRY_INIT(0x4092, 0, 0xffff),
 
 	[GDT_ENTRY_ESPFIX_SS]		= GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
 	[GDT_ENTRY_PERCPU]		= GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
-- 
cgit v1.2.3


From 4c711576b90cc36c13b94816a953a8de6a53d03c Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Thu, 6 Aug 2009 15:58:12 -0700
Subject: x86, 32-bit: Use generic sys_pipe()

As suggested by Al, it's better to use the generic sys_pipe() for ia32.

Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32entry.S |  2 +-
 arch/x86/ia32/sys_ia32.c  | 14 --------------
 2 files changed, 1 insertion(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index e590261ba05..ba331bfd111 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -537,7 +537,7 @@ ia32_sys_call_table:
 	.quad sys_mkdir
 	.quad sys_rmdir		/* 40 */
 	.quad sys_dup
-	.quad sys32_pipe
+	.quad sys_pipe
 	.quad compat_sys_times
 	.quad quiet_ni_syscall			/* old prof syscall holder */
 	.quad sys_brk		/* 45 */
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 085a8c35f14..9f552719882 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -189,20 +189,6 @@ asmlinkage long sys32_mprotect(unsigned long start, size_t len,
 	return sys_mprotect(start, len, prot);
 }
 
-asmlinkage long sys32_pipe(int __user *fd)
-{
-	int retval;
-	int fds[2];
-
-	retval = do_pipe_flags(fds, 0);
-	if (retval)
-		goto out;
-	if (copy_to_user(fd, fds, sizeof(fds)))
-		retval = -EFAULT;
-out:
-	return retval;
-}
-
 asmlinkage long sys32_rt_sigaction(int sig, struct sigaction32 __user *act,
 				   struct sigaction32 __user *oact,
 				   unsigned int sigsetsize)
-- 
cgit v1.2.3


From 30dd568c912602b7dbd609a45d053e01b13422bb Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Tue, 21 Jul 2009 15:56:48 +0200
Subject: x86, perf_counter, bts: Add BTS support to perfcounters

Implement a performance counter with:

    attr.type           = PERF_TYPE_HARDWARE
    attr.config         = PERF_COUNT_HW_BRANCH_INSTRUCTIONS
    attr.sample_period  = 1

Using branch trace store (BTS) on x86 hardware, if available.

The from and to address for each branch can be sampled using:

    PERF_SAMPLE_IP      for the from address
    PERF_SAMPLE_ADDR    for the to address

[ v2: address review feedback, fix bugs ]

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/perf_counter.h |  10 ++
 arch/x86/kernel/cpu/perf_counter.c  | 325 +++++++++++++++++++++++++++++++++++-
 2 files changed, 329 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
index fa64e401589..e7b7c938ae2 100644
--- a/arch/x86/include/asm/perf_counter.h
+++ b/arch/x86/include/asm/perf_counter.h
@@ -84,6 +84,16 @@ union cpuid10_edx {
 #define MSR_ARCH_PERFMON_FIXED_CTR2			0x30b
 #define X86_PMC_IDX_FIXED_BUS_CYCLES			(X86_PMC_IDX_FIXED + 2)
 
+/*
+ * We model BTS tracing as another fixed-mode PMC.
+ *
+ * We choose a value in the middle of the fixed counter range, since lower
+ * values are used by actual fixed counters and higher values are used
+ * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr.
+ */
+#define X86_PMC_IDX_FIXED_BTS				(X86_PMC_IDX_FIXED + 16)
+
+
 #ifdef CONFIG_PERF_COUNTERS
 extern void init_hw_perf_counters(void);
 extern void perf_counters_lapic_init(void);
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index a7aa8f90095..b237c181aa4 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -6,6 +6,7 @@
  *  Copyright (C) 2009 Jaswinder Singh Rajput
  *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
  *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
  *
  *  For licencing details see kernel-base/COPYING
  */
@@ -20,6 +21,7 @@
 #include <linux/sched.h>
 #include <linux/uaccess.h>
 #include <linux/highmem.h>
+#include <linux/cpu.h>
 
 #include <asm/apic.h>
 #include <asm/stacktrace.h>
@@ -27,12 +29,52 @@
 
 static u64 perf_counter_mask __read_mostly;
 
+/* The maximal number of PEBS counters: */
+#define MAX_PEBS_COUNTERS	4
+
+/* The size of a BTS record in bytes: */
+#define BTS_RECORD_SIZE		24
+
+/* The size of a per-cpu BTS buffer in bytes: */
+#define BTS_BUFFER_SIZE		(BTS_RECORD_SIZE * 1024)
+
+/* The BTS overflow threshold in bytes from the end of the buffer: */
+#define BTS_OVFL_TH		(BTS_RECORD_SIZE * 64)
+
+
+/*
+ * Bits in the debugctlmsr controlling branch tracing.
+ */
+#define X86_DEBUGCTL_TR			(1 << 6)
+#define X86_DEBUGCTL_BTS		(1 << 7)
+#define X86_DEBUGCTL_BTINT		(1 << 8)
+#define X86_DEBUGCTL_BTS_OFF_OS		(1 << 9)
+#define X86_DEBUGCTL_BTS_OFF_USR	(1 << 10)
+
+/*
+ * A debug store configuration.
+ *
+ * We only support architectures that use 64bit fields.
+ */
+struct debug_store {
+	u64	bts_buffer_base;
+	u64	bts_index;
+	u64	bts_absolute_maximum;
+	u64	bts_interrupt_threshold;
+	u64	pebs_buffer_base;
+	u64	pebs_index;
+	u64	pebs_absolute_maximum;
+	u64	pebs_interrupt_threshold;
+	u64	pebs_counter_reset[MAX_PEBS_COUNTERS];
+};
+
 struct cpu_hw_counters {
 	struct perf_counter	*counters[X86_PMC_IDX_MAX];
 	unsigned long		used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	unsigned long		interrupts;
 	int			enabled;
+	struct debug_store	*ds;
 };
 
 /*
@@ -57,6 +99,8 @@ struct x86_pmu {
 	u64		counter_mask;
 	u64		max_period;
 	u64		intel_ctrl;
+	void		(*enable_bts)(u64 config);
+	void		(*disable_bts)(void);
 };
 
 static struct x86_pmu x86_pmu __read_mostly;
@@ -576,6 +620,9 @@ x86_perf_counter_update(struct perf_counter *counter,
 	u64 prev_raw_count, new_raw_count;
 	s64 delta;
 
+	if (idx == X86_PMC_IDX_FIXED_BTS)
+		return 0;
+
 	/*
 	 * Careful: an NMI might modify the previous counter value.
 	 *
@@ -659,10 +706,109 @@ static void release_pmc_hardware(void)
 		enable_lapic_nmi_watchdog();
 }
 
+static inline bool bts_available(void)
+{
+	return x86_pmu.enable_bts != NULL;
+}
+
+static inline void init_debug_store_on_cpu(int cpu)
+{
+	struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds;
+
+	if (!ds)
+		return;
+
+	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
+		     (u32)((u64)(long)ds), (u32)((u64)(long)ds >> 32));
+}
+
+static inline void fini_debug_store_on_cpu(int cpu)
+{
+	if (!per_cpu(cpu_hw_counters, cpu).ds)
+		return;
+
+	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
+}
+
+static void release_bts_hardware(void)
+{
+	int cpu;
+
+	if (!bts_available())
+		return;
+
+	get_online_cpus();
+
+	for_each_online_cpu(cpu)
+		fini_debug_store_on_cpu(cpu);
+
+	for_each_possible_cpu(cpu) {
+		struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds;
+
+		if (!ds)
+			continue;
+
+		per_cpu(cpu_hw_counters, cpu).ds = NULL;
+
+		kfree((void *)(long)ds->bts_buffer_base);
+		kfree(ds);
+	}
+
+	put_online_cpus();
+}
+
+static int reserve_bts_hardware(void)
+{
+	int cpu, err = 0;
+
+	if (!bts_available())
+		return -EOPNOTSUPP;
+
+	get_online_cpus();
+
+	for_each_possible_cpu(cpu) {
+		struct debug_store *ds;
+		void *buffer;
+
+		err = -ENOMEM;
+		buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
+		if (unlikely(!buffer))
+			break;
+
+		ds = kzalloc(sizeof(*ds), GFP_KERNEL);
+		if (unlikely(!ds)) {
+			kfree(buffer);
+			break;
+		}
+
+		ds->bts_buffer_base = (u64)(long)buffer;
+		ds->bts_index = ds->bts_buffer_base;
+		ds->bts_absolute_maximum =
+			ds->bts_buffer_base + BTS_BUFFER_SIZE;
+		ds->bts_interrupt_threshold =
+			ds->bts_absolute_maximum - BTS_OVFL_TH;
+
+		per_cpu(cpu_hw_counters, cpu).ds = ds;
+		err = 0;
+	}
+
+	if (err)
+		release_bts_hardware();
+	else {
+		for_each_online_cpu(cpu)
+			init_debug_store_on_cpu(cpu);
+	}
+
+	put_online_cpus();
+
+	return err;
+}
+
 static void hw_perf_counter_destroy(struct perf_counter *counter)
 {
 	if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) {
 		release_pmc_hardware();
+		release_bts_hardware();
 		mutex_unlock(&pmc_reserve_mutex);
 	}
 }
@@ -705,6 +851,42 @@ set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
 	return 0;
 }
 
+static void intel_pmu_enable_bts(u64 config)
+{
+	unsigned long debugctlmsr;
+
+	debugctlmsr = get_debugctlmsr();
+
+	debugctlmsr |= X86_DEBUGCTL_TR;
+	debugctlmsr |= X86_DEBUGCTL_BTS;
+	debugctlmsr |= X86_DEBUGCTL_BTINT;
+
+	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
+		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
+
+	if (!(config & ARCH_PERFMON_EVENTSEL_USR))
+		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
+
+	update_debugctlmsr(debugctlmsr);
+}
+
+static void intel_pmu_disable_bts(void)
+{
+	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+	unsigned long debugctlmsr;
+
+	if (!cpuc->ds)
+		return;
+
+	debugctlmsr = get_debugctlmsr();
+
+	debugctlmsr &=
+		~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
+		  X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
+
+	update_debugctlmsr(debugctlmsr);
+}
+
 /*
  * Setup the hardware configuration for a given attr_type
  */
@@ -721,9 +903,13 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	err = 0;
 	if (!atomic_inc_not_zero(&active_counters)) {
 		mutex_lock(&pmc_reserve_mutex);
-		if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware())
-			err = -EBUSY;
-		else
+		if (atomic_read(&active_counters) == 0) {
+			if (!reserve_pmc_hardware())
+				err = -EBUSY;
+			else
+				reserve_bts_hardware();
+		}
+		if (!err)
 			atomic_inc(&active_counters);
 		mutex_unlock(&pmc_reserve_mutex);
 	}
@@ -801,7 +987,18 @@ static void p6_pmu_disable_all(void)
 
 static void intel_pmu_disable_all(void)
 {
+	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+
+	if (!cpuc->enabled)
+		return;
+
+	cpuc->enabled = 0;
+	barrier();
+
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
+		intel_pmu_disable_bts();
 }
 
 static void amd_pmu_disable_all(void)
@@ -859,7 +1056,25 @@ static void p6_pmu_enable_all(void)
 
 static void intel_pmu_enable_all(void)
 {
+	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+
+	if (cpuc->enabled)
+		return;
+
+	cpuc->enabled = 1;
+	barrier();
+
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
+
+	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
+		struct perf_counter *counter =
+			cpuc->counters[X86_PMC_IDX_FIXED_BTS];
+
+		if (WARN_ON_ONCE(!counter))
+			return;
+
+		intel_pmu_enable_bts(counter->hw.config);
+	}
 }
 
 static void amd_pmu_enable_all(void)
@@ -946,6 +1161,11 @@ p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
 static inline void
 intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
 {
+	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
+		intel_pmu_disable_bts();
+		return;
+	}
+
 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
 		intel_pmu_disable_fixed(hwc, idx);
 		return;
@@ -974,6 +1194,9 @@ x86_perf_counter_set_period(struct perf_counter *counter,
 	s64 period = hwc->sample_period;
 	int err, ret = 0;
 
+	if (idx == X86_PMC_IDX_FIXED_BTS)
+		return 0;
+
 	/*
 	 * If we are way outside a reasoable range then just skip forward:
 	 */
@@ -1056,6 +1279,14 @@ static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
 
 static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
 {
+	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
+		if (!__get_cpu_var(cpu_hw_counters).enabled)
+			return;
+
+		intel_pmu_enable_bts(hwc->config);
+		return;
+	}
+
 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
 		intel_pmu_enable_fixed(hwc, idx);
 		return;
@@ -1077,11 +1308,16 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
 {
 	unsigned int event;
 
+	event = hwc->config & ARCH_PERFMON_EVENT_MASK;
+
+	if (unlikely((event ==
+		      x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
+		     (hwc->sample_period == 1)))
+		return X86_PMC_IDX_FIXED_BTS;
+
 	if (!x86_pmu.num_counters_fixed)
 		return -1;
 
-	event = hwc->config & ARCH_PERFMON_EVENT_MASK;
-
 	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
 		return X86_PMC_IDX_FIXED_INSTRUCTIONS;
 	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
@@ -1102,7 +1338,25 @@ static int x86_pmu_enable(struct perf_counter *counter)
 	int idx;
 
 	idx = fixed_mode_idx(counter, hwc);
-	if (idx >= 0) {
+	if (idx == X86_PMC_IDX_FIXED_BTS) {
+		/*
+		 * Try to use BTS for branch tracing. If that is not
+		 * available, try to get a generic counter.
+		 */
+		if (unlikely(!cpuc->ds))
+			goto try_generic;
+
+		/*
+		 * Try to get the fixed counter, if that is already taken
+		 * then try to get a generic counter:
+		 */
+		if (test_and_set_bit(idx, cpuc->used_mask))
+			goto try_generic;
+
+		hwc->config_base	= 0;
+		hwc->counter_base	= 0;
+		hwc->idx		= idx;
+	} else if (idx >= 0) {
 		/*
 		 * Try to get the fixed counter, if that is already taken
 		 * then try to get a generic counter:
@@ -1213,6 +1467,45 @@ void perf_counter_print_debug(void)
 	local_irq_restore(flags);
 }
 
+static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
+				       struct perf_sample_data *data)
+{
+	struct debug_store *ds = cpuc->ds;
+	struct bts_record {
+		u64	from;
+		u64	to;
+		u64	flags;
+	};
+	struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS];
+	unsigned long orig_ip = data->regs->ip;
+	u64 at;
+
+	if (!counter)
+		return;
+
+	if (!ds)
+		return;
+
+	for (at = ds->bts_buffer_base;
+	     at < ds->bts_index;
+	     at += sizeof(struct bts_record)) {
+		struct bts_record *rec = (struct bts_record *)(long)at;
+
+		data->regs->ip	= rec->from;
+		data->addr	= rec->to;
+
+		perf_counter_output(counter, 1, data);
+	}
+
+	ds->bts_index = ds->bts_buffer_base;
+
+	data->regs->ip	= orig_ip;
+	data->addr	= 0;
+
+	/* There's new data available. */
+	counter->pending_kill = POLL_IN;
+}
+
 static void x86_pmu_disable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
@@ -1237,6 +1530,15 @@ static void x86_pmu_disable(struct perf_counter *counter)
 	 * that we are disabling:
 	 */
 	x86_perf_counter_update(counter, hwc, idx);
+
+	/* Drain the remaining BTS records. */
+	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
+		struct perf_sample_data data;
+		struct pt_regs regs;
+
+		data.regs = &regs;
+		intel_pmu_drain_bts_buffer(cpuc, &data);
+	}
 	cpuc->counters[idx] = NULL;
 	clear_bit(idx, cpuc->used_mask);
 
@@ -1264,6 +1566,7 @@ static int intel_pmu_save_and_restart(struct perf_counter *counter)
 
 static void intel_pmu_reset(void)
 {
+	struct debug_store *ds = __get_cpu_var(cpu_hw_counters).ds;
 	unsigned long flags;
 	int idx;
 
@@ -1281,6 +1584,8 @@ static void intel_pmu_reset(void)
 	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
 		checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
 	}
+	if (ds)
+		ds->bts_index = ds->bts_buffer_base;
 
 	local_irq_restore(flags);
 }
@@ -1346,6 +1651,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 	cpuc = &__get_cpu_var(cpu_hw_counters);
 
 	perf_disable();
+	intel_pmu_drain_bts_buffer(cpuc, &data);
 	status = intel_pmu_get_status();
 	if (!status) {
 		perf_enable();
@@ -1547,6 +1853,8 @@ static struct x86_pmu intel_pmu = {
 	 * the generic counter period:
 	 */
 	.max_period		= (1ULL << 31) - 1,
+	.enable_bts		= intel_pmu_enable_bts,
+	.disable_bts		= intel_pmu_disable_bts,
 };
 
 static struct x86_pmu amd_pmu = {
@@ -1936,3 +2244,8 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 
 	return entry;
 }
+
+void hw_perf_counter_setup_online(int cpu)
+{
+	init_debug_store_on_cpu(cpu);
+}
-- 
cgit v1.2.3


From 9f51e24ee8b5a1595b6a5ac0c2be278a16488e75 Mon Sep 17 00:00:00 2001
From: Marcin Slusarz <marcin.slusarz@gmail.com>
Date: Sun, 9 Aug 2009 21:54:00 +0200
Subject: x86: Use printk_once()

Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
LKML-Reference: <1249847649-11631-6-git-send-email-marcin.slusarz@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/irq_32.c | 5 ++---
 arch/x86/kvm/x86.c       | 7 +------
 2 files changed, 3 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 3b09634a515..7d35d0fe232 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -218,7 +218,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
 void fixup_irqs(void)
 {
 	unsigned int irq;
-	static int warned;
 	struct irq_desc *desc;
 
 	for_each_irq_desc(irq, desc) {
@@ -236,8 +235,8 @@ void fixup_irqs(void)
 		}
 		if (desc->chip->set_affinity)
 			desc->chip->set_affinity(irq, affinity);
-		else if (desc->action && !(warned++))
-			printk("Cannot set affinity for irq %i\n", irq);
+		else if (desc->action)
+			printk_once("Cannot set affinity for irq %i\n", irq);
 	}
 
 #if 0
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fe5474aec41..0572c90f0c8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2261,12 +2261,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
 				     unsigned int bytes,
 				     struct kvm_vcpu *vcpu)
 {
-	static int reported;
-
-	if (!reported) {
-		reported = 1;
-		printk(KERN_WARNING "kvm: emulating exchange as write\n");
-	}
+	printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
 #ifndef CONFIG_X86_64
 	/* guests cmpxchg8b have to be emulated atomically */
 	if (bytes == 8) {
-- 
cgit v1.2.3


From a8ad568dd8ca122aa8048ea067d3599820d1c1b4 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 10 Aug 2009 11:53:10 +0900
Subject: dma-ops: Remove flush_write_buffers() in dma-mapping-common.h

This moves flush_write_buffers() in
asm-generic/dma-mapping-common.h to
arch/x86/kernel/pci-nommu.c.

The purpose of this patch is that, we can avoid defining NULL
flush_write_buffers() on IA64 and SPARC.

dma-mapping-common.h is used by X86 and IA64 (and SPARC soon)
but only X86 with CONFIG_X86_OOSTORE or CONFIG_X86_PPRO_FENCE
actually uses flush_write_buffers(). CONFIG_X86_OOSTORE or
CONFIG_X86_PPRO_FENCE is usable with only kernel/pci-nommu.c
(that is, not usable with other X86 IOMMU implementations such
as SWIOTLB, VT-d, etc) so we can safely move
flush_write_buffers() in asm-generic/dma-mapping-common.h to
arch/x86/kernel/pci-nommu.c.

The further discussion is:

  http://lkml.org/lkml/2009/6/28/104

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: davem@davemloft.net
Cc: tony.luck@intel.com
Cc: fenghua.yu@intel.com
LKML-Reference: <1249872797-1314-2-git-send-email-fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-nommu.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index c0a4222bf62..a3933d4330c 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -79,12 +79,29 @@ static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
 	free_pages((unsigned long)vaddr, get_order(size));
 }
 
+static void nommu_sync_single_for_device(struct device *dev,
+			dma_addr_t addr, size_t size,
+			enum dma_data_direction dir)
+{
+	flush_write_buffers();
+}
+
+
+static void nommu_sync_sg_for_device(struct device *dev,
+			struct scatterlist *sg, int nelems,
+			enum dma_data_direction dir)
+{
+	flush_write_buffers();
+}
+
 struct dma_map_ops nommu_dma_ops = {
-	.alloc_coherent	= dma_generic_alloc_coherent,
-	.free_coherent	= nommu_free_coherent,
-	.map_sg		= nommu_map_sg,
-	.map_page	= nommu_map_page,
-	.is_phys	= 1,
+	.alloc_coherent		= dma_generic_alloc_coherent,
+	.free_coherent		= nommu_free_coherent,
+	.map_sg			= nommu_map_sg,
+	.map_page		= nommu_map_page,
+	.sync_single_for_device = nommu_sync_single_for_device,
+	.sync_sg_for_device	= nommu_sync_sg_for_device,
+	.is_phys		= 1,
 };
 
 void __init no_iommu_init(void)
-- 
cgit v1.2.3


From be02ff9940c0106dea1470462401a07c5d52e086 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 10 Aug 2009 11:53:11 +0900
Subject: IA64: Remove NULL flush_write_buffers

flush_write_buffers() in dma-mapping-common.h was removed so we
can remove NULL flush_write_buffers() in IA64.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: tony.luck@intel.com
Cc: fenghua.yu@intel.com
Cc: davem@davemloft.net
LKML-Reference: <1249872797-1314-3-git-send-email-fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/include/asm/dma-mapping.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h
index f91829de329..8d3c79cd81e 100644
--- a/arch/ia64/include/asm/dma-mapping.h
+++ b/arch/ia64/include/asm/dma-mapping.h
@@ -44,7 +44,6 @@ static inline void dma_free_coherent(struct device *dev, size_t size,
 #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
 
 #define get_dma_ops(dev) platform_dma_get_ops(dev)
-#define flush_write_buffers()
 
 #include <asm-generic/dma-mapping-common.h>
 
-- 
cgit v1.2.3


From bc0a14f154069cc4e42ea903c2c2b9984a94e4b7 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 10 Aug 2009 11:53:12 +0900
Subject: sparc: Use dma_map_ops struct

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Tested-by: Robert Reif <reif@earthlink.net>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: tony.luck@intel.com
Cc: fenghua.yu@intel.com
LKML-Reference: <1249872797-1314-4-git-send-email-fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/sparc/include/asm/dma-mapping.h | 43 ++++++------------------------------
 arch/sparc/kernel/dma.c              | 16 +++++++++-----
 arch/sparc/kernel/iommu.c            | 16 +++++++++-----
 arch/sparc/kernel/pci_sun4v.c        | 14 +++++++-----
 4 files changed, 36 insertions(+), 53 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h
index 204e4bf6443..893f3ecc975 100644
--- a/arch/sparc/include/asm/dma-mapping.h
+++ b/arch/sparc/include/asm/dma-mapping.h
@@ -13,36 +13,7 @@ extern int dma_set_mask(struct device *dev, u64 dma_mask);
 #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
 #define dma_is_consistent(d, h)	(1)
 
-struct dma_ops {
-	void *(*alloc_coherent)(struct device *dev, size_t size,
-				dma_addr_t *dma_handle, gfp_t flag);
-	void (*free_coherent)(struct device *dev, size_t size,
-			      void *cpu_addr, dma_addr_t dma_handle);
-	dma_addr_t (*map_page)(struct device *dev, struct page *page,
-			       unsigned long offset, size_t size,
-			       enum dma_data_direction direction);
-	void (*unmap_page)(struct device *dev, dma_addr_t dma_addr,
-			   size_t size,
-			   enum dma_data_direction direction);
-	int (*map_sg)(struct device *dev, struct scatterlist *sg, int nents,
-		      enum dma_data_direction direction);
-	void (*unmap_sg)(struct device *dev, struct scatterlist *sg,
-			 int nhwentries,
-			 enum dma_data_direction direction);
-	void (*sync_single_for_cpu)(struct device *dev,
-				    dma_addr_t dma_handle, size_t size,
-				    enum dma_data_direction direction);
-	void (*sync_single_for_device)(struct device *dev,
-				       dma_addr_t dma_handle, size_t size,
-				       enum dma_data_direction direction);
-	void (*sync_sg_for_cpu)(struct device *dev, struct scatterlist *sg,
-				int nelems,
-				enum dma_data_direction direction);
-	void (*sync_sg_for_device)(struct device *dev,
-				   struct scatterlist *sg, int nents,
-				   enum dma_data_direction dir);
-};
-extern const struct dma_ops *dma_ops;
+extern const struct dma_map_ops *dma_ops;
 
 static inline void *dma_alloc_coherent(struct device *dev, size_t size,
 				       dma_addr_t *dma_handle, gfp_t flag)
@@ -62,40 +33,40 @@ static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr,
 {
 	return dma_ops->map_page(dev, virt_to_page(cpu_addr),
 				 (unsigned long)cpu_addr & ~PAGE_MASK, size,
-				 direction);
+				 direction, NULL);
 }
 
 static inline void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
 				    size_t size,
 				    enum dma_data_direction direction)
 {
-	dma_ops->unmap_page(dev, dma_addr, size, direction);
+	dma_ops->unmap_page(dev, dma_addr, size, direction, NULL);
 }
 
 static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
 				      unsigned long offset, size_t size,
 				      enum dma_data_direction direction)
 {
-	return dma_ops->map_page(dev, page, offset, size, direction);
+	return dma_ops->map_page(dev, page, offset, size, direction, NULL);
 }
 
 static inline void dma_unmap_page(struct device *dev, dma_addr_t dma_address,
 				  size_t size,
 				  enum dma_data_direction direction)
 {
-	dma_ops->unmap_page(dev, dma_address, size, direction);
+	dma_ops->unmap_page(dev, dma_address, size, direction, NULL);
 }
 
 static inline int dma_map_sg(struct device *dev, struct scatterlist *sg,
 			     int nents, enum dma_data_direction direction)
 {
-	return dma_ops->map_sg(dev, sg, nents, direction);
+	return dma_ops->map_sg(dev, sg, nents, direction, NULL);
 }
 
 static inline void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 				int nents, enum dma_data_direction direction)
 {
-	dma_ops->unmap_sg(dev, sg, nents, direction);
+	dma_ops->unmap_sg(dev, sg, nents, direction, NULL);
 }
 
 static inline void dma_sync_single_for_cpu(struct device *dev,
diff --git a/arch/sparc/kernel/dma.c b/arch/sparc/kernel/dma.c
index 524c32f97c5..473a3fc7ab5 100644
--- a/arch/sparc/kernel/dma.c
+++ b/arch/sparc/kernel/dma.c
@@ -60,7 +60,8 @@ static void dma32_free_coherent(struct device *dev, size_t size,
 
 static dma_addr_t dma32_map_page(struct device *dev, struct page *page,
 				 unsigned long offset, size_t size,
-				 enum dma_data_direction direction)
+				 enum dma_data_direction direction,
+				 struct dma_attrs *attrs)
 {
 #ifdef CONFIG_PCI
 	if (dev->bus == &pci_bus_type)
@@ -72,7 +73,8 @@ static dma_addr_t dma32_map_page(struct device *dev, struct page *page,
 }
 
 static void dma32_unmap_page(struct device *dev, dma_addr_t dma_address,
-			     size_t size, enum dma_data_direction direction)
+			     size_t size, enum dma_data_direction direction,
+			     struct dma_attrs *attrs)
 {
 #ifdef CONFIG_PCI
 	if (dev->bus == &pci_bus_type) {
@@ -85,7 +87,8 @@ static void dma32_unmap_page(struct device *dev, dma_addr_t dma_address,
 }
 
 static int dma32_map_sg(struct device *dev, struct scatterlist *sg,
-			int nents, enum dma_data_direction direction)
+			int nents, enum dma_data_direction direction,
+			struct dma_attrs *attrs)
 {
 #ifdef CONFIG_PCI
 	if (dev->bus == &pci_bus_type)
@@ -95,7 +98,8 @@ static int dma32_map_sg(struct device *dev, struct scatterlist *sg,
 }
 
 void dma32_unmap_sg(struct device *dev, struct scatterlist *sg,
-		    int nents, enum dma_data_direction direction)
+		    int nents, enum dma_data_direction direction,
+		    struct dma_attrs *attrs)
 {
 #ifdef CONFIG_PCI
 	if (dev->bus == &pci_bus_type) {
@@ -161,7 +165,7 @@ static void dma32_sync_sg_for_device(struct device *dev,
 	BUG();
 }
 
-static const struct dma_ops dma32_dma_ops = {
+static const struct dma_map_ops dma32_dma_ops = {
 	.alloc_coherent		= dma32_alloc_coherent,
 	.free_coherent		= dma32_free_coherent,
 	.map_page		= dma32_map_page,
@@ -174,5 +178,5 @@ static const struct dma_ops dma32_dma_ops = {
 	.sync_sg_for_device	= dma32_sync_sg_for_device,
 };
 
-const struct dma_ops *dma_ops = &dma32_dma_ops;
+const struct dma_map_ops *dma_ops = &dma32_dma_ops;
 EXPORT_SYMBOL(dma_ops);
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index 0aeaefe696b..a9f0ad95518 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -353,7 +353,8 @@ static void dma_4u_free_coherent(struct device *dev, size_t size,
 
 static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page,
 				  unsigned long offset, size_t sz,
-				  enum dma_data_direction direction)
+				  enum dma_data_direction direction,
+				  struct dma_attrs *attrs)
 {
 	struct iommu *iommu;
 	struct strbuf *strbuf;
@@ -474,7 +475,8 @@ do_flush_sync:
 }
 
 static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr,
-			      size_t sz, enum dma_data_direction direction)
+			      size_t sz, enum dma_data_direction direction,
+			      struct dma_attrs *attrs)
 {
 	struct iommu *iommu;
 	struct strbuf *strbuf;
@@ -520,7 +522,8 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr,
 }
 
 static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
-			 int nelems, enum dma_data_direction direction)
+			 int nelems, enum dma_data_direction direction,
+			 struct dma_attrs *attrs)
 {
 	struct scatterlist *s, *outs, *segstart;
 	unsigned long flags, handle, prot, ctx;
@@ -691,7 +694,8 @@ static unsigned long fetch_sg_ctx(struct iommu *iommu, struct scatterlist *sg)
 }
 
 static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
-			    int nelems, enum dma_data_direction direction)
+			    int nelems, enum dma_data_direction direction,
+			    struct dma_attrs *attrs)
 {
 	unsigned long flags, ctx;
 	struct scatterlist *sg;
@@ -822,7 +826,7 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev,
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
-static const struct dma_ops sun4u_dma_ops = {
+static const struct dma_map_ops sun4u_dma_ops = {
 	.alloc_coherent		= dma_4u_alloc_coherent,
 	.free_coherent		= dma_4u_free_coherent,
 	.map_page		= dma_4u_map_page,
@@ -833,7 +837,7 @@ static const struct dma_ops sun4u_dma_ops = {
 	.sync_sg_for_cpu	= dma_4u_sync_sg_for_cpu,
 };
 
-const struct dma_ops *dma_ops = &sun4u_dma_ops;
+const struct dma_map_ops *dma_ops = &sun4u_dma_ops;
 EXPORT_SYMBOL(dma_ops);
 
 int dma_supported(struct device *dev, u64 device_mask)
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index 2485eaa2310..c4f7dce577d 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -232,7 +232,8 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
 
 static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
 				  unsigned long offset, size_t sz,
-				  enum dma_data_direction direction)
+				  enum dma_data_direction direction,
+				  struct dma_attrs *attrs)
 {
 	struct iommu *iommu;
 	unsigned long flags, npages, oaddr;
@@ -296,7 +297,8 @@ iommu_map_fail:
 }
 
 static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
-			      size_t sz, enum dma_data_direction direction)
+			      size_t sz, enum dma_data_direction direction,
+			      struct dma_attrs *attrs)
 {
 	struct pci_pbm_info *pbm;
 	struct iommu *iommu;
@@ -336,7 +338,8 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
 }
 
 static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
-			 int nelems, enum dma_data_direction direction)
+			 int nelems, enum dma_data_direction direction,
+			 struct dma_attrs *attrs)
 {
 	struct scatterlist *s, *outs, *segstart;
 	unsigned long flags, handle, prot;
@@ -478,7 +481,8 @@ iommu_map_failed:
 }
 
 static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
-			    int nelems, enum dma_data_direction direction)
+			    int nelems, enum dma_data_direction direction,
+			    struct dma_attrs *attrs)
 {
 	struct pci_pbm_info *pbm;
 	struct scatterlist *sg;
@@ -535,7 +539,7 @@ static void dma_4v_sync_sg_for_cpu(struct device *dev,
 	/* Nothing to do... */
 }
 
-static const struct dma_ops sun4v_dma_ops = {
+static const struct dma_map_ops sun4v_dma_ops = {
 	.alloc_coherent			= dma_4v_alloc_coherent,
 	.free_coherent			= dma_4v_free_coherent,
 	.map_page			= dma_4v_map_page,
-- 
cgit v1.2.3


From 02f7a18935eef0e56d9180fc3c35da6aad1ff3bb Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 10 Aug 2009 11:53:13 +0900
Subject: sparc: Use asm-generic/dma-mapping-common.h

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Tested-by: Robert Reif <reif@earthlink.net>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: tony.luck@intel.com
Cc: fenghua.yu@intel.com
LKML-Reference: <1249872797-1314-5-git-send-email-fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/sparc/Kconfig                   |   1 +
 arch/sparc/include/asm/dma-mapping.h | 107 +++++------------------------------
 arch/sparc/kernel/dma.c              |   4 +-
 arch/sparc/kernel/iommu.c            |   4 +-
 arch/sparc/kernel/pci_sun4v.c        |   2 +-
 5 files changed, 19 insertions(+), 99 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 3f8b6a92eab..5f2df99645c 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -25,6 +25,7 @@ config SPARC
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select RTC_CLASS
 	select RTC_DRV_M48T59
+	select HAVE_DMA_ATTRS
 
 config SPARC32
 	def_bool !64BIT
diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h
index 893f3ecc975..34c92264208 100644
--- a/arch/sparc/include/asm/dma-mapping.h
+++ b/arch/sparc/include/asm/dma-mapping.h
@@ -3,6 +3,7 @@
 
 #include <linux/scatterlist.h>
 #include <linux/mm.h>
+#include <linux/dma-debug.h>
 
 #define DMA_ERROR_CODE	(~(dma_addr_t)0x0)
 
@@ -13,113 +14,31 @@ extern int dma_set_mask(struct device *dev, u64 dma_mask);
 #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
 #define dma_is_consistent(d, h)	(1)
 
-extern const struct dma_map_ops *dma_ops;
+extern struct dma_map_ops *dma_ops;
 
-static inline void *dma_alloc_coherent(struct device *dev, size_t size,
-				       dma_addr_t *dma_handle, gfp_t flag)
-{
-	return dma_ops->alloc_coherent(dev, size, dma_handle, flag);
-}
-
-static inline void dma_free_coherent(struct device *dev, size_t size,
-				     void *cpu_addr, dma_addr_t dma_handle)
-{
-	dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
-}
-
-static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr,
-					size_t size,
-					enum dma_data_direction direction)
+static inline struct dma_map_ops *get_dma_ops(struct device *dev)
 {
-	return dma_ops->map_page(dev, virt_to_page(cpu_addr),
-				 (unsigned long)cpu_addr & ~PAGE_MASK, size,
-				 direction, NULL);
+	return dma_ops;
 }
 
-static inline void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
-				    size_t size,
-				    enum dma_data_direction direction)
-{
-	dma_ops->unmap_page(dev, dma_addr, size, direction, NULL);
-}
-
-static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
-				      unsigned long offset, size_t size,
-				      enum dma_data_direction direction)
-{
-	return dma_ops->map_page(dev, page, offset, size, direction, NULL);
-}
+#include <asm-generic/dma-mapping-common.h>
 
-static inline void dma_unmap_page(struct device *dev, dma_addr_t dma_address,
-				  size_t size,
-				  enum dma_data_direction direction)
-{
-	dma_ops->unmap_page(dev, dma_address, size, direction, NULL);
-}
-
-static inline int dma_map_sg(struct device *dev, struct scatterlist *sg,
-			     int nents, enum dma_data_direction direction)
-{
-	return dma_ops->map_sg(dev, sg, nents, direction, NULL);
-}
-
-static inline void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
-				int nents, enum dma_data_direction direction)
-{
-	dma_ops->unmap_sg(dev, sg, nents, direction, NULL);
-}
-
-static inline void dma_sync_single_for_cpu(struct device *dev,
-					   dma_addr_t dma_handle, size_t size,
-					   enum dma_data_direction direction)
-{
-	dma_ops->sync_single_for_cpu(dev, dma_handle, size, direction);
-}
-
-static inline void dma_sync_single_for_device(struct device *dev,
-					      dma_addr_t dma_handle,
-					      size_t size,
-					      enum dma_data_direction direction)
-{
-	if (dma_ops->sync_single_for_device)
-		dma_ops->sync_single_for_device(dev, dma_handle, size,
-						direction);
-}
-
-static inline void dma_sync_sg_for_cpu(struct device *dev,
-				       struct scatterlist *sg, int nelems,
-				       enum dma_data_direction direction)
+static inline void *dma_alloc_coherent(struct device *dev, size_t size,
+				       dma_addr_t *dma_handle, gfp_t flag)
 {
-	dma_ops->sync_sg_for_cpu(dev, sg, nelems, direction);
-}
+	struct dma_map_ops *ops = get_dma_ops(dev);
 
-static inline void dma_sync_sg_for_device(struct device *dev,
-					  struct scatterlist *sg, int nelems,
-					  enum dma_data_direction direction)
-{
-	if (dma_ops->sync_sg_for_device)
-		dma_ops->sync_sg_for_device(dev, sg, nelems, direction);
+	return ops->alloc_coherent(dev, size, dma_handle, flag);
 }
 
-static inline void dma_sync_single_range_for_cpu(struct device *dev,
-						 dma_addr_t dma_handle,
-						 unsigned long offset,
-						 size_t size,
-						 enum dma_data_direction dir)
+static inline void dma_free_coherent(struct device *dev, size_t size,
+				     void *cpu_addr, dma_addr_t dma_handle)
 {
-	dma_sync_single_for_cpu(dev, dma_handle+offset, size, dir);
-}
+	struct dma_map_ops *ops = get_dma_ops(dev);
 
-static inline void dma_sync_single_range_for_device(struct device *dev,
-						    dma_addr_t dma_handle,
-						    unsigned long offset,
-						    size_t size,
-						    enum dma_data_direction dir)
-{
-	dma_sync_single_for_device(dev, dma_handle+offset, size, dir);
+	ops->free_coherent(dev, size, cpu_addr, dma_handle);
 }
 
-
 static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return (dma_addr == DMA_ERROR_CODE);
diff --git a/arch/sparc/kernel/dma.c b/arch/sparc/kernel/dma.c
index 473a3fc7ab5..15820a91817 100644
--- a/arch/sparc/kernel/dma.c
+++ b/arch/sparc/kernel/dma.c
@@ -165,7 +165,7 @@ static void dma32_sync_sg_for_device(struct device *dev,
 	BUG();
 }
 
-static const struct dma_map_ops dma32_dma_ops = {
+static struct dma_map_ops dma32_dma_ops = {
 	.alloc_coherent		= dma32_alloc_coherent,
 	.free_coherent		= dma32_free_coherent,
 	.map_page		= dma32_map_page,
@@ -178,5 +178,5 @@ static const struct dma_map_ops dma32_dma_ops = {
 	.sync_sg_for_device	= dma32_sync_sg_for_device,
 };
 
-const struct dma_map_ops *dma_ops = &dma32_dma_ops;
+struct dma_map_ops *dma_ops = &dma32_dma_ops;
 EXPORT_SYMBOL(dma_ops);
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index a9f0ad95518..74b289cab55 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -826,7 +826,7 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev,
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
-static const struct dma_map_ops sun4u_dma_ops = {
+static struct dma_map_ops sun4u_dma_ops = {
 	.alloc_coherent		= dma_4u_alloc_coherent,
 	.free_coherent		= dma_4u_free_coherent,
 	.map_page		= dma_4u_map_page,
@@ -837,7 +837,7 @@ static const struct dma_map_ops sun4u_dma_ops = {
 	.sync_sg_for_cpu	= dma_4u_sync_sg_for_cpu,
 };
 
-const struct dma_map_ops *dma_ops = &sun4u_dma_ops;
+struct dma_map_ops *dma_ops = &sun4u_dma_ops;
 EXPORT_SYMBOL(dma_ops);
 
 int dma_supported(struct device *dev, u64 device_mask)
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index c4f7dce577d..ee800f92792 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -539,7 +539,7 @@ static void dma_4v_sync_sg_for_cpu(struct device *dev,
 	/* Nothing to do... */
 }
 
-static const struct dma_map_ops sun4v_dma_ops = {
+static struct dma_map_ops sun4v_dma_ops = {
 	.alloc_coherent			= dma_4v_alloc_coherent,
 	.free_coherent			= dma_4v_free_coherent,
 	.map_page			= dma_4v_map_page,
-- 
cgit v1.2.3


From 595cc8560783ea31ed1208dc1f97282a2a5606b7 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 10 Aug 2009 11:53:14 +0900
Subject: sparc: Remove no-op dma_4v_sync_single_for_cpu and
 dma_4v_sync_sg_for_cpu

Now sparc uses include/asm-generic/dma-mapping-common.h.
pci_sun4v.c doesn't need to have no-op
dma_4v_sync_single_for_cpu and dma_4v_sync_sg_for_cpu
(dma-mapping-common.h does nothing if sync_{single|sg}_for_cpu
hook is not defined). So we can remove them safely.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Tested-by: Robert Reif <reif@earthlink.net>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: tony.luck@intel.com
Cc: fenghua.yu@intel.com
LKML-Reference: <1249872797-1314-6-git-send-email-fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/sparc/kernel/pci_sun4v.c | 16 ----------------
 1 file changed, 16 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index ee800f92792..23c33ff9c31 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -525,20 +525,6 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
-static void dma_4v_sync_single_for_cpu(struct device *dev,
-				       dma_addr_t bus_addr, size_t sz,
-				       enum dma_data_direction direction)
-{
-	/* Nothing to do... */
-}
-
-static void dma_4v_sync_sg_for_cpu(struct device *dev,
-				   struct scatterlist *sglist, int nelems,
-				   enum dma_data_direction direction)
-{
-	/* Nothing to do... */
-}
-
 static struct dma_map_ops sun4v_dma_ops = {
 	.alloc_coherent			= dma_4v_alloc_coherent,
 	.free_coherent			= dma_4v_free_coherent,
@@ -546,8 +532,6 @@ static struct dma_map_ops sun4v_dma_ops = {
 	.unmap_page			= dma_4v_unmap_page,
 	.map_sg				= dma_4v_map_sg,
 	.unmap_sg			= dma_4v_unmap_sg,
-	.sync_single_for_cpu		= dma_4v_sync_single_for_cpu,
-	.sync_sg_for_cpu		= dma_4v_sync_sg_for_cpu,
 };
 
 static void __devinit pci_sun4v_scan_bus(struct pci_pbm_info *pbm,
-- 
cgit v1.2.3


From c2c07dbd8742a26ab3f1ee8b82237a060a0d9f61 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 10 Aug 2009 11:53:15 +0900
Subject: sparc: Replace sbus_map_single and sbus_unmap_single with
 sbus_map_page and sbus_unmap_page

This is a preparation for using asm-generic/pci-dma-compat.h;
SPARC32 has two dma_map_ops structures for pci and sbus
(removing arch/sparc/kernel/dma.c, PCI and SBUS DMA accessor).

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Tested-by: Robert Reif <reif@earthlink.net>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: tony.luck@intel.com
Cc: fenghua.yu@intel.com
LKML-Reference: <1249872797-1314-7-git-send-email-fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/sparc/kernel/dma.c    | 5 ++---
 arch/sparc/kernel/dma.h    | 6 +++---
 arch/sparc/kernel/ioport.c | 7 +++++--
 3 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/dma.c b/arch/sparc/kernel/dma.c
index 15820a91817..a5d50dac735 100644
--- a/arch/sparc/kernel/dma.c
+++ b/arch/sparc/kernel/dma.c
@@ -68,8 +68,7 @@ static dma_addr_t dma32_map_page(struct device *dev, struct page *page,
 		return pci_map_page(to_pci_dev(dev), page, offset,
 				    size, (int)direction);
 #endif
-	return sbus_map_single(dev, page_address(page) + offset,
-			       size, (int)direction);
+	return sbus_map_page(dev, page, offset, size, (int)direction);
 }
 
 static void dma32_unmap_page(struct device *dev, dma_addr_t dma_address,
@@ -83,7 +82,7 @@ static void dma32_unmap_page(struct device *dev, dma_addr_t dma_address,
 		return;
 	}
 #endif
-	sbus_unmap_single(dev, dma_address, size, (int)direction);
+	sbus_unmap_page(dev, dma_address, size, (int)direction);
 }
 
 static int dma32_map_sg(struct device *dev, struct scatterlist *sg,
diff --git a/arch/sparc/kernel/dma.h b/arch/sparc/kernel/dma.h
index f8d8951adb5..680351ee0d4 100644
--- a/arch/sparc/kernel/dma.h
+++ b/arch/sparc/kernel/dma.h
@@ -1,8 +1,8 @@
 void *sbus_alloc_consistent(struct device *dev, long len, u32 *dma_addrp);
 void sbus_free_consistent(struct device *dev, long n, void *p, u32 ba);
-dma_addr_t sbus_map_single(struct device *dev, void *va,
-			   size_t len, int direction);
-void sbus_unmap_single(struct device *dev, dma_addr_t ba,
+dma_addr_t sbus_map_page(struct device *dev, struct page *page,
+			 unsigned long offset, size_t len, int direction);
+void sbus_unmap_page(struct device *dev, dma_addr_t ba,
 		       size_t n, int direction);
 int sbus_map_sg(struct device *dev, struct scatterlist *sg,
 		int n, int direction);
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index 87ea0d03d97..39ff1e0c518 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -337,8 +337,11 @@ void sbus_free_consistent(struct device *dev, long n, void *p, u32 ba)
  * CPU view of this memory may be inconsistent with
  * a device view and explicit flushing is necessary.
  */
-dma_addr_t sbus_map_single(struct device *dev, void *va, size_t len, int direction)
+dma_addr_t sbus_map_page(struct device *dev, struct page *page,
+			 unsigned long offset, size_t len, int direction)
 {
+	void *va = page_address(page) + offset;
+
 	/* XXX why are some lengths signed, others unsigned? */
 	if (len <= 0) {
 		return 0;
@@ -350,7 +353,7 @@ dma_addr_t sbus_map_single(struct device *dev, void *va, size_t len, int directi
 	return mmu_get_scsi_one(dev, va, len);
 }
 
-void sbus_unmap_single(struct device *dev, dma_addr_t ba, size_t n, int direction)
+void sbus_unmap_page(struct device *dev, dma_addr_t ba, size_t n, int direction)
 {
 	mmu_release_scsi_one(dev, ba, n);
 }
-- 
cgit v1.2.3


From ee664a9252d24ef10317d1bba8fc8f4c6495b36c Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 10 Aug 2009 11:53:16 +0900
Subject: sparc: Use asm-generic/pci-dma-compat

This converts SPARC to use asm-generic/pci-dma-compat instead
of the homegrown mechnism.

SPARC32 has two dma_map_ops structures for pci and sbus
(removing arch/sparc/kernel/dma.c, PCI and SBUS DMA accessor).
The global 'dma_ops' is set to sbus_dma_ops and get_dma_ops()
returns pci32_dma_ops for pci devices so we can use the
appropriate dma mapping operations.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Tested-by: Robert Reif <reif@earthlink.net>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: tony.luck@intel.com
Cc: fenghua.yu@intel.com
LKML-Reference: <1249872797-1314-8-git-send-email-fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/sparc/include/asm/dma-mapping.h |   7 +-
 arch/sparc/include/asm/pci.h         |   3 +
 arch/sparc/include/asm/pci_32.h      | 105 -----------------------
 arch/sparc/include/asm/pci_64.h      |  88 -------------------
 arch/sparc/kernel/dma.c              | 155 ++-------------------------------
 arch/sparc/kernel/dma.h              |  14 ---
 arch/sparc/kernel/iommu.c            |   4 +-
 arch/sparc/kernel/ioport.c           | 162 ++++++++++++++++-------------------
 arch/sparc/kernel/pci.c              |   2 +-
 9 files changed, 96 insertions(+), 444 deletions(-)
 delete mode 100644 arch/sparc/kernel/dma.h

(limited to 'arch')

diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h
index 34c92264208..2677818dc78 100644
--- a/arch/sparc/include/asm/dma-mapping.h
+++ b/arch/sparc/include/asm/dma-mapping.h
@@ -14,10 +14,15 @@ extern int dma_set_mask(struct device *dev, u64 dma_mask);
 #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
 #define dma_is_consistent(d, h)	(1)
 
-extern struct dma_map_ops *dma_ops;
+extern struct dma_map_ops *dma_ops, pci32_dma_ops;
+extern struct bus_type pci_bus_type;
 
 static inline struct dma_map_ops *get_dma_ops(struct device *dev)
 {
+#if defined(CONFIG_SPARC32) && defined(CONFIG_PCI)
+	if (dev->bus == &pci_bus_type)
+		return &pci32_dma_ops;
+#endif
 	return dma_ops;
 }
 
diff --git a/arch/sparc/include/asm/pci.h b/arch/sparc/include/asm/pci.h
index 6e14fd17933..d9c031f9910 100644
--- a/arch/sparc/include/asm/pci.h
+++ b/arch/sparc/include/asm/pci.h
@@ -5,4 +5,7 @@
 #else
 #include <asm/pci_32.h>
 #endif
+
+#include <asm-generic/pci-dma-compat.h>
+
 #endif
diff --git a/arch/sparc/include/asm/pci_32.h b/arch/sparc/include/asm/pci_32.h
index b41c4c19815..ac0e8369fd9 100644
--- a/arch/sparc/include/asm/pci_32.h
+++ b/arch/sparc/include/asm/pci_32.h
@@ -31,42 +31,8 @@ static inline void pcibios_penalize_isa_irq(int irq, int active)
  */
 #define PCI_DMA_BUS_IS_PHYS	(0)
 
-#include <asm/scatterlist.h>
-
 struct pci_dev;
 
-/* Allocate and map kernel buffer using consistent mode DMA for a device.
- * hwdev should be valid struct pci_dev pointer for PCI devices.
- */
-extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle);
-
-/* Free and unmap a consistent DMA buffer.
- * cpu_addr is what was returned from pci_alloc_consistent,
- * size must be the same as what as passed into pci_alloc_consistent,
- * and likewise dma_addr must be the same as what *dma_addrp was set to.
- *
- * References to the memory and mappings assosciated with cpu_addr/dma_addr
- * past this call are illegal.
- */
-extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle);
-
-/* Map a single buffer of the indicated size for DMA in streaming mode.
- * The 32-bit bus address to use is returned.
- *
- * Once the device is given the dma address, the device owns this memory
- * until either pci_unmap_single or pci_dma_sync_single_for_cpu is performed.
- */
-extern dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, int direction);
-
-/* Unmap a single streaming mode DMA translation.  The dma_addr and size
- * must match what was provided for in a previous pci_map_single call.  All
- * other usages are undefined.
- *
- * After this call, reads by the cpu to the buffer are guaranteed to see
- * whatever the device wrote there.
- */
-extern void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction);
-
 /* pci_unmap_{single,page} is not a nop, thus... */
 #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)	\
 	dma_addr_t ADDR_NAME;
@@ -81,69 +47,6 @@ extern void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, size_t
 #define pci_unmap_len_set(PTR, LEN_NAME, VAL)		\
 	(((PTR)->LEN_NAME) = (VAL))
 
-/*
- * Same as above, only with pages instead of mapped addresses.
- */
-extern dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page,
-			unsigned long offset, size_t size, int direction);
-extern void pci_unmap_page(struct pci_dev *hwdev,
-			dma_addr_t dma_address, size_t size, int direction);
-
-/* Map a set of buffers described by scatterlist in streaming
- * mode for DMA.  This is the scather-gather version of the
- * above pci_map_single interface.  Here the scatter gather list
- * elements are each tagged with the appropriate dma address
- * and length.  They are obtained via sg_dma_{address,length}(SG).
- *
- * NOTE: An implementation may be able to use a smaller number of
- *       DMA address/length pairs than there are SG table elements.
- *       (for example via virtual mapping capabilities)
- *       The routine returns the number of addr/length pairs actually
- *       used, at most nents.
- *
- * Device ownership issues as mentioned above for pci_map_single are
- * the same here.
- */
-extern int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction);
-
-/* Unmap a set of streaming mode DMA translations.
- * Again, cpu read rules concerning calls here are the same as for
- * pci_unmap_single() above.
- */
-extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nhwents, int direction);
-
-/* Make physical memory consistent for a single
- * streaming mode DMA translation after a transfer.
- *
- * If you perform a pci_map_single() but wish to interrogate the
- * buffer using the cpu, yet do not wish to teardown the PCI dma
- * mapping, you must call this function before doing so.  At the
- * next point you give the PCI dma address back to the card, you
- * must first perform a pci_dma_sync_for_device, and then the device
- * again owns the buffer.
- */
-extern void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction);
-extern void pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction);
-
-/* Make physical memory consistent for a set of streaming
- * mode DMA translations after a transfer.
- *
- * The same as pci_dma_sync_single_* but for a scatter-gather list,
- * same rules and usage.
- */
-extern void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction);
-extern void pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction);
-
-/* Return whether the given PCI device DMA address mask can
- * be supported properly.  For example, if your device can
- * only drive the low 24-bits during PCI bus mastering, then
- * you would pass 0x00ffffff as the mask to this function.
- */
-static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask)
-{
-	return 1;
-}
-
 #ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
@@ -154,14 +57,6 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 }
 #endif
 
-#define PCI_DMA_ERROR_CODE      (~(dma_addr_t)0x0)
-
-static inline int pci_dma_mapping_error(struct pci_dev *pdev,
-					dma_addr_t dma_addr)
-{
-        return (dma_addr == PCI_DMA_ERROR_CODE);
-}
-
 struct device_node;
 extern struct device_node *pci_device_to_OF_node(struct pci_dev *pdev);
 
diff --git a/arch/sparc/include/asm/pci_64.h b/arch/sparc/include/asm/pci_64.h
index 7a1e3566e59..5cc9f6aa549 100644
--- a/arch/sparc/include/asm/pci_64.h
+++ b/arch/sparc/include/asm/pci_64.h
@@ -35,37 +35,6 @@ static inline void pcibios_penalize_isa_irq(int irq, int active)
  */
 #define PCI_DMA_BUS_IS_PHYS	(0)
 
-static inline void *pci_alloc_consistent(struct pci_dev *pdev, size_t size,
-					 dma_addr_t *dma_handle)
-{
-	return dma_alloc_coherent(&pdev->dev, size, dma_handle, GFP_ATOMIC);
-}
-
-static inline void pci_free_consistent(struct pci_dev *pdev, size_t size,
-				       void *vaddr, dma_addr_t dma_handle)
-{
-	return dma_free_coherent(&pdev->dev, size, vaddr, dma_handle);
-}
-
-static inline dma_addr_t pci_map_single(struct pci_dev *pdev, void *ptr,
-					size_t size, int direction)
-{
-	return dma_map_single(&pdev->dev, ptr, size,
-			      (enum dma_data_direction) direction);
-}
-
-static inline void pci_unmap_single(struct pci_dev *pdev, dma_addr_t dma_addr,
-				    size_t size, int direction)
-{
-	dma_unmap_single(&pdev->dev, dma_addr, size,
-			 (enum dma_data_direction) direction);
-}
-
-#define pci_map_page(dev, page, off, size, dir) \
-	pci_map_single(dev, (page_address(page) + (off)), size, dir)
-#define pci_unmap_page(dev,addr,sz,dir) \
-	pci_unmap_single(dev,addr,sz,dir)
-
 /* pci_unmap_{single,page} is not a nop, thus... */
 #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)	\
 	dma_addr_t ADDR_NAME;
@@ -80,57 +49,6 @@ static inline void pci_unmap_single(struct pci_dev *pdev, dma_addr_t dma_addr,
 #define pci_unmap_len_set(PTR, LEN_NAME, VAL)		\
 	(((PTR)->LEN_NAME) = (VAL))
 
-static inline int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sg,
-			     int nents, int direction)
-{
-	return dma_map_sg(&pdev->dev, sg, nents,
-			  (enum dma_data_direction) direction);
-}
-
-static inline void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sg,
-				int nents, int direction)
-{
-	dma_unmap_sg(&pdev->dev, sg, nents,
-		     (enum dma_data_direction) direction);
-}
-
-static inline void pci_dma_sync_single_for_cpu(struct pci_dev *pdev,
-					       dma_addr_t dma_handle,
-					       size_t size, int direction)
-{
-	dma_sync_single_for_cpu(&pdev->dev, dma_handle, size,
-				(enum dma_data_direction) direction);
-}
-
-static inline void pci_dma_sync_single_for_device(struct pci_dev *pdev,
-						  dma_addr_t dma_handle,
-						  size_t size, int direction)
-{
-	/* No flushing needed to sync cpu writes to the device.  */
-}
-
-static inline void pci_dma_sync_sg_for_cpu(struct pci_dev *pdev,
-					   struct scatterlist *sg,
-					   int nents, int direction)
-{
-	dma_sync_sg_for_cpu(&pdev->dev, sg, nents,
-			    (enum dma_data_direction) direction);
-}
-
-static inline void pci_dma_sync_sg_for_device(struct pci_dev *pdev,
-					      struct scatterlist *sg,
-					      int nelems, int direction)
-{
-	/* No flushing needed to sync cpu writes to the device.  */
-}
-
-/* Return whether the given PCI device DMA address mask can
- * be supported properly.  For example, if your device can
- * only drive the low 24-bits during PCI bus mastering, then
- * you would pass 0x00ffffff as the mask to this function.
- */
-extern int pci_dma_supported(struct pci_dev *hwdev, u64 mask);
-
 /* PCI IOMMU mapping bypass support. */
 
 /* PCI 64-bit addressing works for all slots on all controller
@@ -140,12 +58,6 @@ extern int pci_dma_supported(struct pci_dev *hwdev, u64 mask);
 #define PCI64_REQUIRED_MASK	(~(dma64_addr_t)0)
 #define PCI64_ADDR_BASE		0xfffc000000000000UL
 
-static inline int pci_dma_mapping_error(struct pci_dev *pdev,
-					dma_addr_t dma_addr)
-{
-	return dma_mapping_error(&pdev->dev, dma_addr);
-}
-
 #ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
diff --git a/arch/sparc/kernel/dma.c b/arch/sparc/kernel/dma.c
index a5d50dac735..b2fa3127f60 100644
--- a/arch/sparc/kernel/dma.c
+++ b/arch/sparc/kernel/dma.c
@@ -13,13 +13,17 @@
 #include <linux/pci.h>
 #endif
 
-#include "dma.h"
-
+/*
+ * Return whether the given PCI device DMA address mask can be
+ * supported properly.  For example, if your device can only drive the
+ * low 24-bits during PCI bus mastering, then you would pass
+ * 0x00ffffff as the mask to this function.
+ */
 int dma_supported(struct device *dev, u64 mask)
 {
 #ifdef CONFIG_PCI
 	if (dev->bus == &pci_bus_type)
-		return pci_dma_supported(to_pci_dev(dev), mask);
+		return 1;
 #endif
 	return 0;
 }
@@ -34,148 +38,3 @@ int dma_set_mask(struct device *dev, u64 dma_mask)
 	return -EOPNOTSUPP;
 }
 EXPORT_SYMBOL(dma_set_mask);
-
-static void *dma32_alloc_coherent(struct device *dev, size_t size,
-				  dma_addr_t *dma_handle, gfp_t flag)
-{
-#ifdef CONFIG_PCI
-	if (dev->bus == &pci_bus_type)
-		return pci_alloc_consistent(to_pci_dev(dev), size, dma_handle);
-#endif
-	return sbus_alloc_consistent(dev, size, dma_handle);
-}
-
-static void dma32_free_coherent(struct device *dev, size_t size,
-				void *cpu_addr, dma_addr_t dma_handle)
-{
-#ifdef CONFIG_PCI
-	if (dev->bus == &pci_bus_type) {
-		pci_free_consistent(to_pci_dev(dev), size,
-				    cpu_addr, dma_handle);
-		return;
-	}
-#endif
-	sbus_free_consistent(dev, size, cpu_addr, dma_handle);
-}
-
-static dma_addr_t dma32_map_page(struct device *dev, struct page *page,
-				 unsigned long offset, size_t size,
-				 enum dma_data_direction direction,
-				 struct dma_attrs *attrs)
-{
-#ifdef CONFIG_PCI
-	if (dev->bus == &pci_bus_type)
-		return pci_map_page(to_pci_dev(dev), page, offset,
-				    size, (int)direction);
-#endif
-	return sbus_map_page(dev, page, offset, size, (int)direction);
-}
-
-static void dma32_unmap_page(struct device *dev, dma_addr_t dma_address,
-			     size_t size, enum dma_data_direction direction,
-			     struct dma_attrs *attrs)
-{
-#ifdef CONFIG_PCI
-	if (dev->bus == &pci_bus_type) {
-		pci_unmap_page(to_pci_dev(dev), dma_address,
-			       size, (int)direction);
-		return;
-	}
-#endif
-	sbus_unmap_page(dev, dma_address, size, (int)direction);
-}
-
-static int dma32_map_sg(struct device *dev, struct scatterlist *sg,
-			int nents, enum dma_data_direction direction,
-			struct dma_attrs *attrs)
-{
-#ifdef CONFIG_PCI
-	if (dev->bus == &pci_bus_type)
-		return pci_map_sg(to_pci_dev(dev), sg, nents, (int)direction);
-#endif
-	return sbus_map_sg(dev, sg, nents, direction);
-}
-
-void dma32_unmap_sg(struct device *dev, struct scatterlist *sg,
-		    int nents, enum dma_data_direction direction,
-		    struct dma_attrs *attrs)
-{
-#ifdef CONFIG_PCI
-	if (dev->bus == &pci_bus_type) {
-		pci_unmap_sg(to_pci_dev(dev), sg, nents, (int)direction);
-		return;
-	}
-#endif
-	sbus_unmap_sg(dev, sg, nents, (int)direction);
-}
-
-static void dma32_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
-				      size_t size,
-				      enum dma_data_direction direction)
-{
-#ifdef CONFIG_PCI
-	if (dev->bus == &pci_bus_type) {
-		pci_dma_sync_single_for_cpu(to_pci_dev(dev), dma_handle,
-					    size, (int)direction);
-		return;
-	}
-#endif
-	sbus_dma_sync_single_for_cpu(dev, dma_handle, size, (int) direction);
-}
-
-static void dma32_sync_single_for_device(struct device *dev,
-					 dma_addr_t dma_handle, size_t size,
-					 enum dma_data_direction direction)
-{
-#ifdef CONFIG_PCI
-	if (dev->bus == &pci_bus_type) {
-		pci_dma_sync_single_for_device(to_pci_dev(dev), dma_handle,
-					       size, (int)direction);
-		return;
-	}
-#endif
-	sbus_dma_sync_single_for_device(dev, dma_handle, size, (int) direction);
-}
-
-static void dma32_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
-				  int nelems, enum dma_data_direction direction)
-{
-#ifdef CONFIG_PCI
-	if (dev->bus == &pci_bus_type) {
-		pci_dma_sync_sg_for_cpu(to_pci_dev(dev), sg,
-					nelems, (int)direction);
-		return;
-	}
-#endif
-	BUG();
-}
-
-static void dma32_sync_sg_for_device(struct device *dev,
-				     struct scatterlist *sg, int nelems,
-				     enum dma_data_direction direction)
-{
-#ifdef CONFIG_PCI
-	if (dev->bus == &pci_bus_type) {
-		pci_dma_sync_sg_for_device(to_pci_dev(dev), sg,
-					   nelems, (int)direction);
-		return;
-	}
-#endif
-	BUG();
-}
-
-static struct dma_map_ops dma32_dma_ops = {
-	.alloc_coherent		= dma32_alloc_coherent,
-	.free_coherent		= dma32_free_coherent,
-	.map_page		= dma32_map_page,
-	.unmap_page		= dma32_unmap_page,
-	.map_sg			= dma32_map_sg,
-	.unmap_sg		= dma32_unmap_sg,
-	.sync_single_for_cpu	= dma32_sync_single_for_cpu,
-	.sync_single_for_device	= dma32_sync_single_for_device,
-	.sync_sg_for_cpu	= dma32_sync_sg_for_cpu,
-	.sync_sg_for_device	= dma32_sync_sg_for_device,
-};
-
-struct dma_map_ops *dma_ops = &dma32_dma_ops;
-EXPORT_SYMBOL(dma_ops);
diff --git a/arch/sparc/kernel/dma.h b/arch/sparc/kernel/dma.h
deleted file mode 100644
index 680351ee0d4..00000000000
--- a/arch/sparc/kernel/dma.h
+++ /dev/null
@@ -1,14 +0,0 @@
-void *sbus_alloc_consistent(struct device *dev, long len, u32 *dma_addrp);
-void sbus_free_consistent(struct device *dev, long n, void *p, u32 ba);
-dma_addr_t sbus_map_page(struct device *dev, struct page *page,
-			 unsigned long offset, size_t len, int direction);
-void sbus_unmap_page(struct device *dev, dma_addr_t ba,
-		       size_t n, int direction);
-int sbus_map_sg(struct device *dev, struct scatterlist *sg,
-		int n, int direction);
-void sbus_unmap_sg(struct device *dev, struct scatterlist *sg,
-		   int n, int direction);
-void sbus_dma_sync_single_for_cpu(struct device *dev, dma_addr_t ba,
-				  size_t size, int direction);
-void sbus_dma_sync_single_for_device(struct device *dev, dma_addr_t ba,
-				     size_t size, int direction);
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index 74b289cab55..7690cc219ec 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -840,6 +840,8 @@ static struct dma_map_ops sun4u_dma_ops = {
 struct dma_map_ops *dma_ops = &sun4u_dma_ops;
 EXPORT_SYMBOL(dma_ops);
 
+extern int pci64_dma_supported(struct pci_dev *pdev, u64 device_mask);
+
 int dma_supported(struct device *dev, u64 device_mask)
 {
 	struct iommu *iommu = dev->archdata.iommu;
@@ -853,7 +855,7 @@ int dma_supported(struct device *dev, u64 device_mask)
 
 #ifdef CONFIG_PCI
 	if (dev->bus == &pci_bus_type)
-		return pci_dma_supported(to_pci_dev(dev), device_mask);
+		return pci64_dma_supported(to_pci_dev(dev), device_mask);
 #endif
 
 	return 0;
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index 39ff1e0c518..1eb60438965 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -48,8 +48,6 @@
 #include <asm/iommu.h>
 #include <asm/io-unit.h>
 
-#include "dma.h"
-
 #define mmu_inval_dma_area(p, l)	/* Anton pulled it out for 2.4.0-xx */
 
 static struct resource *_sparc_find_resource(struct resource *r,
@@ -246,7 +244,8 @@ EXPORT_SYMBOL(sbus_set_sbus64);
  * Typically devices use them for control blocks.
  * CPU may access them without any explicit flushing.
  */
-void *sbus_alloc_consistent(struct device *dev, long len, u32 *dma_addrp)
+static void *sbus_alloc_coherent(struct device *dev, size_t len,
+				 dma_addr_t *dma_addrp, gfp_t gfp)
 {
 	struct of_device *op = to_of_device(dev);
 	unsigned long len_total = (len + PAGE_SIZE-1) & PAGE_MASK;
@@ -299,7 +298,8 @@ err_nopages:
 	return NULL;
 }
 
-void sbus_free_consistent(struct device *dev, long n, void *p, u32 ba)
+static void sbus_free_coherent(struct device *dev, size_t n, void *p,
+			       dma_addr_t ba)
 {
 	struct resource *res;
 	struct page *pgv;
@@ -317,7 +317,7 @@ void sbus_free_consistent(struct device *dev, long n, void *p, u32 ba)
 
 	n = (n + PAGE_SIZE-1) & PAGE_MASK;
 	if ((res->end-res->start)+1 != n) {
-		printk("sbus_free_consistent: region 0x%lx asked 0x%lx\n",
+		printk("sbus_free_consistent: region 0x%lx asked 0x%zx\n",
 		    (long)((res->end-res->start)+1), n);
 		return;
 	}
@@ -337,8 +337,10 @@ void sbus_free_consistent(struct device *dev, long n, void *p, u32 ba)
  * CPU view of this memory may be inconsistent with
  * a device view and explicit flushing is necessary.
  */
-dma_addr_t sbus_map_page(struct device *dev, struct page *page,
-			 unsigned long offset, size_t len, int direction)
+static dma_addr_t sbus_map_page(struct device *dev, struct page *page,
+				unsigned long offset, size_t len,
+				enum dma_data_direction dir,
+				struct dma_attrs *attrs)
 {
 	void *va = page_address(page) + offset;
 
@@ -353,12 +355,14 @@ dma_addr_t sbus_map_page(struct device *dev, struct page *page,
 	return mmu_get_scsi_one(dev, va, len);
 }
 
-void sbus_unmap_page(struct device *dev, dma_addr_t ba, size_t n, int direction)
+static void sbus_unmap_page(struct device *dev, dma_addr_t ba, size_t n,
+			    enum dma_data_direction dir, struct dma_attrs *attrs)
 {
 	mmu_release_scsi_one(dev, ba, n);
 }
 
-int sbus_map_sg(struct device *dev, struct scatterlist *sg, int n, int direction)
+static int sbus_map_sg(struct device *dev, struct scatterlist *sg, int n,
+		       enum dma_data_direction dir, struct dma_attrs *attrs)
 {
 	mmu_get_scsi_sgl(dev, sg, n);
 
@@ -369,19 +373,38 @@ int sbus_map_sg(struct device *dev, struct scatterlist *sg, int n, int direction
 	return n;
 }
 
-void sbus_unmap_sg(struct device *dev, struct scatterlist *sg, int n, int direction)
+static void sbus_unmap_sg(struct device *dev, struct scatterlist *sg, int n,
+			  enum dma_data_direction dir, struct dma_attrs *attrs)
 {
 	mmu_release_scsi_sgl(dev, sg, n);
 }
 
-void sbus_dma_sync_single_for_cpu(struct device *dev, dma_addr_t ba, size_t size, int direction)
+static void sbus_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+				 int n,	enum dma_data_direction dir)
 {
+	BUG();
 }
 
-void sbus_dma_sync_single_for_device(struct device *dev, dma_addr_t ba, size_t size, int direction)
+static void sbus_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+				    int n, enum dma_data_direction dir)
 {
+	BUG();
 }
 
+struct dma_map_ops sbus_dma_ops = {
+	.alloc_coherent		= sbus_alloc_coherent,
+	.free_coherent		= sbus_free_coherent,
+	.map_page		= sbus_map_page,
+	.unmap_page		= sbus_unmap_page,
+	.map_sg			= sbus_map_sg,
+	.unmap_sg		= sbus_unmap_sg,
+	.sync_sg_for_cpu	= sbus_sync_sg_for_cpu,
+	.sync_sg_for_device	= sbus_sync_sg_for_device,
+};
+
+struct dma_map_ops *dma_ops = &sbus_dma_ops;
+EXPORT_SYMBOL(dma_ops);
+
 static int __init sparc_register_ioport(void)
 {
 	register_proc_sparc_ioport();
@@ -398,7 +421,8 @@ arch_initcall(sparc_register_ioport);
 /* Allocate and map kernel buffer using consistent mode DMA for a device.
  * hwdev should be valid struct pci_dev pointer for PCI devices.
  */
-void *pci_alloc_consistent(struct pci_dev *pdev, size_t len, dma_addr_t *pba)
+static void *pci32_alloc_coherent(struct device *dev, size_t len,
+				  dma_addr_t *pba, gfp_t gfp)
 {
 	unsigned long len_total = (len + PAGE_SIZE-1) & PAGE_MASK;
 	unsigned long va;
@@ -442,7 +466,6 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t len, dma_addr_t *pba)
 	*pba = virt_to_phys(va); /* equals virt_to_bus (R.I.P.) for us. */
 	return (void *) res->start;
 }
-EXPORT_SYMBOL(pci_alloc_consistent);
 
 /* Free and unmap a consistent DMA buffer.
  * cpu_addr is what was returned from pci_alloc_consistent,
@@ -452,7 +475,8 @@ EXPORT_SYMBOL(pci_alloc_consistent);
  * References to the memory and mappings associated with cpu_addr/dma_addr
  * past this call are illegal.
  */
-void pci_free_consistent(struct pci_dev *pdev, size_t n, void *p, dma_addr_t ba)
+static void pci32_free_coherent(struct device *dev, size_t n, void *p,
+				dma_addr_t ba)
 {
 	struct resource *res;
 	unsigned long pgp;
@@ -484,60 +508,18 @@ void pci_free_consistent(struct pci_dev *pdev, size_t n, void *p, dma_addr_t ba)
 
 	free_pages(pgp, get_order(n));
 }
-EXPORT_SYMBOL(pci_free_consistent);
-
-/* Map a single buffer of the indicated size for DMA in streaming mode.
- * The 32-bit bus address to use is returned.
- *
- * Once the device is given the dma address, the device owns this memory
- * until either pci_unmap_single or pci_dma_sync_single_* is performed.
- */
-dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size,
-    int direction)
-{
-	BUG_ON(direction == PCI_DMA_NONE);
-	/* IIep is write-through, not flushing. */
-	return virt_to_phys(ptr);
-}
-EXPORT_SYMBOL(pci_map_single);
-
-/* Unmap a single streaming mode DMA translation.  The dma_addr and size
- * must match what was provided for in a previous pci_map_single call.  All
- * other usages are undefined.
- *
- * After this call, reads by the cpu to the buffer are guaranteed to see
- * whatever the device wrote there.
- */
-void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t ba, size_t size,
-    int direction)
-{
-	BUG_ON(direction == PCI_DMA_NONE);
-	if (direction != PCI_DMA_TODEVICE) {
-		mmu_inval_dma_area((unsigned long)phys_to_virt(ba),
-		    (size + PAGE_SIZE-1) & PAGE_MASK);
-	}
-}
-EXPORT_SYMBOL(pci_unmap_single);
 
 /*
  * Same as pci_map_single, but with pages.
  */
-dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page,
-			unsigned long offset, size_t size, int direction)
+static dma_addr_t pci32_map_page(struct device *dev, struct page *page,
+				 unsigned long offset, size_t size,
+				 enum dma_data_direction dir,
+				 struct dma_attrs *attrs)
 {
-	BUG_ON(direction == PCI_DMA_NONE);
 	/* IIep is write-through, not flushing. */
 	return page_to_phys(page) + offset;
 }
-EXPORT_SYMBOL(pci_map_page);
-
-void pci_unmap_page(struct pci_dev *hwdev,
-			dma_addr_t dma_address, size_t size, int direction)
-{
-	BUG_ON(direction == PCI_DMA_NONE);
-	/* mmu_inval_dma_area XXX */
-}
-EXPORT_SYMBOL(pci_unmap_page);
 
 /* Map a set of buffers described by scatterlist in streaming
  * mode for DMA.  This is the scather-gather version of the
@@ -554,13 +536,13 @@ EXPORT_SYMBOL(pci_unmap_page);
  * Device ownership issues as mentioned above for pci_map_single are
  * the same here.
  */
-int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sgl, int nents,
-    int direction)
+static int pci32_map_sg(struct device *device, struct scatterlist *sgl,
+			int nents, enum dma_data_direction dir,
+			struct dma_attrs *attrs)
 {
 	struct scatterlist *sg;
 	int n;
 
-	BUG_ON(direction == PCI_DMA_NONE);
 	/* IIep is write-through, not flushing. */
 	for_each_sg(sgl, sg, nents, n) {
 		BUG_ON(page_address(sg_page(sg)) == NULL);
@@ -569,20 +551,19 @@ int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sgl, int nents,
 	}
 	return nents;
 }
-EXPORT_SYMBOL(pci_map_sg);
 
 /* Unmap a set of streaming mode DMA translations.
  * Again, cpu read rules concerning calls here are the same as for
  * pci_unmap_single() above.
  */
-void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sgl, int nents,
-    int direction)
+static void pci32_unmap_sg(struct device *dev, struct scatterlist *sgl,
+			   int nents, enum dma_data_direction dir,
+			   struct dma_attrs *attrs)
 {
 	struct scatterlist *sg;
 	int n;
 
-	BUG_ON(direction == PCI_DMA_NONE);
-	if (direction != PCI_DMA_TODEVICE) {
+	if (dir != PCI_DMA_TODEVICE) {
 		for_each_sg(sgl, sg, nents, n) {
 			BUG_ON(page_address(sg_page(sg)) == NULL);
 			mmu_inval_dma_area(
@@ -591,7 +572,6 @@ void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sgl, int nents,
 		}
 	}
 }
-EXPORT_SYMBOL(pci_unmap_sg);
 
 /* Make physical memory consistent for a single
  * streaming mode DMA translation before or after a transfer.
@@ -603,25 +583,23 @@ EXPORT_SYMBOL(pci_unmap_sg);
  * must first perform a pci_dma_sync_for_device, and then the
  * device again owns the buffer.
  */
-void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t ba, size_t size, int direction)
+static void pci32_sync_single_for_cpu(struct device *dev, dma_addr_t ba,
+				      size_t size, enum dma_data_direction dir)
 {
-	BUG_ON(direction == PCI_DMA_NONE);
-	if (direction != PCI_DMA_TODEVICE) {
+	if (dir != PCI_DMA_TODEVICE) {
 		mmu_inval_dma_area((unsigned long)phys_to_virt(ba),
 		    (size + PAGE_SIZE-1) & PAGE_MASK);
 	}
 }
-EXPORT_SYMBOL(pci_dma_sync_single_for_cpu);
 
-void pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t ba, size_t size, int direction)
+static void pci32_sync_single_for_device(struct device *dev, dma_addr_t ba,
+					 size_t size, enum dma_data_direction dir)
 {
-	BUG_ON(direction == PCI_DMA_NONE);
-	if (direction != PCI_DMA_TODEVICE) {
+	if (dir != PCI_DMA_TODEVICE) {
 		mmu_inval_dma_area((unsigned long)phys_to_virt(ba),
 		    (size + PAGE_SIZE-1) & PAGE_MASK);
 	}
 }
-EXPORT_SYMBOL(pci_dma_sync_single_for_device);
 
 /* Make physical memory consistent for a set of streaming
  * mode DMA translations after a transfer.
@@ -629,13 +607,13 @@ EXPORT_SYMBOL(pci_dma_sync_single_for_device);
  * The same as pci_dma_sync_single_* but for a scatter-gather list,
  * same rules and usage.
  */
-void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sgl, int nents, int direction)
+static void pci32_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
+				  int nents, enum dma_data_direction dir)
 {
 	struct scatterlist *sg;
 	int n;
 
-	BUG_ON(direction == PCI_DMA_NONE);
-	if (direction != PCI_DMA_TODEVICE) {
+	if (dir != PCI_DMA_TODEVICE) {
 		for_each_sg(sgl, sg, nents, n) {
 			BUG_ON(page_address(sg_page(sg)) == NULL);
 			mmu_inval_dma_area(
@@ -644,15 +622,14 @@ void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sgl, int
 		}
 	}
 }
-EXPORT_SYMBOL(pci_dma_sync_sg_for_cpu);
 
-void pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sgl, int nents, int direction)
+static void pci32_sync_sg_for_device(struct device *device, struct scatterlist *sgl,
+				     int nents, enum dma_data_direction dir)
 {
 	struct scatterlist *sg;
 	int n;
 
-	BUG_ON(direction == PCI_DMA_NONE);
-	if (direction != PCI_DMA_TODEVICE) {
+	if (dir != PCI_DMA_TODEVICE) {
 		for_each_sg(sgl, sg, nents, n) {
 			BUG_ON(page_address(sg_page(sg)) == NULL);
 			mmu_inval_dma_area(
@@ -661,7 +638,20 @@ void pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sgl,
 		}
 	}
 }
-EXPORT_SYMBOL(pci_dma_sync_sg_for_device);
+
+struct dma_map_ops pci32_dma_ops = {
+	.alloc_coherent		= pci32_alloc_coherent,
+	.free_coherent		= pci32_free_coherent,
+	.map_page		= pci32_map_page,
+	.map_sg			= pci32_map_sg,
+	.unmap_sg		= pci32_unmap_sg,
+	.sync_single_for_cpu	= pci32_sync_single_for_cpu,
+	.sync_single_for_device	= pci32_sync_single_for_device,
+	.sync_sg_for_cpu	= pci32_sync_sg_for_cpu,
+	.sync_sg_for_device	= pci32_sync_sg_for_device,
+};
+EXPORT_SYMBOL(pci32_dma_ops);
+
 #endif /* CONFIG_PCI */
 
 #ifdef CONFIG_PROC_FS
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index 57859ad2354..c6864866280 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -1039,7 +1039,7 @@ static void ali_sound_dma_hack(struct pci_dev *pdev, int set_bit)
 	pci_dev_put(ali_isa_bridge);
 }
 
-int pci_dma_supported(struct pci_dev *pdev, u64 device_mask)
+int pci64_dma_supported(struct pci_dev *pdev, u64 device_mask)
 {
 	u64 dma_addr_mask;
 
-- 
cgit v1.2.3


From 451d7400a34cb679369e337d67f0238ed410f484 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 10 Aug 2009 11:53:17 +0900
Subject: sparc: Add CONFIG_DMA_API_DEBUG support

All we need to do for CONFIG_DMA_API_DEBUG support is call
dma_debug_init() in DMA code common for SPARC32 and SPARC64.

Now SPARC32 uses two dma_map_ops structures for pci and sbus so
there is not much dma stuff for SPARC32 in kernel/dma.c.
kernel/ioport.c also includes dma stuff for SPARC32. So let's
put all the dma stuff for SPARC32 in kernel/ioport.c and make
kernel/dma.c common for SPARC32 and SPARC64.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Tested-by: Robert Reif <reif@earthlink.net>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: tony.luck@intel.com
Cc: fenghua.yu@intel.com
LKML-Reference: <1249872797-1314-9-git-send-email-fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/sparc/Kconfig                   |  1 +
 arch/sparc/include/asm/dma-mapping.h |  6 +++++-
 arch/sparc/kernel/Makefile           |  2 +-
 arch/sparc/kernel/dma.c              | 37 +++++-------------------------------
 arch/sparc/kernel/ioport.c           | 27 ++++++++++++++++++++++++++
 5 files changed, 39 insertions(+), 34 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 5f2df99645c..233cff53a62 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -26,6 +26,7 @@ config SPARC
 	select RTC_CLASS
 	select RTC_DRV_M48T59
 	select HAVE_DMA_ATTRS
+	select HAVE_DMA_API_DEBUG
 
 config SPARC32
 	def_bool !64BIT
diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h
index 2677818dc78..5a8c308e2b5 100644
--- a/arch/sparc/include/asm/dma-mapping.h
+++ b/arch/sparc/include/asm/dma-mapping.h
@@ -32,8 +32,11 @@ static inline void *dma_alloc_coherent(struct device *dev, size_t size,
 				       dma_addr_t *dma_handle, gfp_t flag)
 {
 	struct dma_map_ops *ops = get_dma_ops(dev);
+	void *cpu_addr;
 
-	return ops->alloc_coherent(dev, size, dma_handle, flag);
+	cpu_addr = ops->alloc_coherent(dev, size, dma_handle, flag);
+	debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
+	return cpu_addr;
 }
 
 static inline void dma_free_coherent(struct device *dev, size_t size,
@@ -41,6 +44,7 @@ static inline void dma_free_coherent(struct device *dev, size_t size,
 {
 	struct dma_map_ops *ops = get_dma_ops(dev);
 
+	debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
 	ops->free_coherent(dev, size, cpu_addr, dma_handle);
 }
 
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index 475ce4696ac..29b88a58066 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -61,7 +61,7 @@ obj-$(CONFIG_SPARC64_SMP) += cpumap.o
 obj-$(CONFIG_SPARC32)     += devres.o
 devres-y                  := ../../../kernel/irq/devres.o
 
-obj-$(CONFIG_SPARC32)     += dma.o
+obj-y                     += dma.o
 
 obj-$(CONFIG_SPARC32_PCI) += pcic.o
 
diff --git a/arch/sparc/kernel/dma.c b/arch/sparc/kernel/dma.c
index b2fa3127f60..e1ba8ee21b9 100644
--- a/arch/sparc/kernel/dma.c
+++ b/arch/sparc/kernel/dma.c
@@ -1,40 +1,13 @@
-/* dma.c: PCI and SBUS DMA accessors for 32-bit sparc.
- *
- * Copyright (C) 2008 David S. Miller <davem@davemloft.net>
- */
-
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/dma-mapping.h>
-#include <linux/scatterlist.h>
-#include <linux/mm.h>
+#include <linux/dma-debug.h>
 
-#ifdef CONFIG_PCI
-#include <linux/pci.h>
-#endif
+#define PREALLOC_DMA_DEBUG_ENTRIES       (1 << 15)
 
-/*
- * Return whether the given PCI device DMA address mask can be
- * supported properly.  For example, if your device can only drive the
- * low 24-bits during PCI bus mastering, then you would pass
- * 0x00ffffff as the mask to this function.
- */
-int dma_supported(struct device *dev, u64 mask)
+static int __init dma_init(void)
 {
-#ifdef CONFIG_PCI
-	if (dev->bus == &pci_bus_type)
-		return 1;
-#endif
+	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
 	return 0;
 }
-EXPORT_SYMBOL(dma_supported);
-
-int dma_set_mask(struct device *dev, u64 dma_mask)
-{
-#ifdef CONFIG_PCI
-	if (dev->bus == &pci_bus_type)
-		return pci_set_dma_mask(to_pci_dev(dev), dma_mask);
-#endif
-	return -EOPNOTSUPP;
-}
-EXPORT_SYMBOL(dma_set_mask);
+fs_initcall(dma_init);
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index 1eb60438965..edbea232c61 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -654,6 +654,33 @@ EXPORT_SYMBOL(pci32_dma_ops);
 
 #endif /* CONFIG_PCI */
 
+/*
+ * Return whether the given PCI device DMA address mask can be
+ * supported properly.  For example, if your device can only drive the
+ * low 24-bits during PCI bus mastering, then you would pass
+ * 0x00ffffff as the mask to this function.
+ */
+int dma_supported(struct device *dev, u64 mask)
+{
+#ifdef CONFIG_PCI
+	if (dev->bus == &pci_bus_type)
+		return 1;
+#endif
+	return 0;
+}
+EXPORT_SYMBOL(dma_supported);
+
+int dma_set_mask(struct device *dev, u64 dma_mask)
+{
+#ifdef CONFIG_PCI
+	if (dev->bus == &pci_bus_type)
+		return pci_set_dma_mask(to_pci_dev(dev), dma_mask);
+#endif
+	return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(dma_set_mask);
+
+
 #ifdef CONFIG_PROC_FS
 
 static int
-- 
cgit v1.2.3


From c7425314c755d5f94da7c978205c85a7c6201212 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Sun, 9 Aug 2009 17:03:52 +0900
Subject: x86: Introduce GDT_ENTRY_INIT(), initialize bad_bios_desc statically

Fully initialize bad_bios_desc statically instead of doing some
fields statically and some dynamically.

Suggested-by: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
LKML-Reference: <20090809080350.GA4765@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apm_32.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index febb2dab254..39a4462ef8a 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -403,7 +403,15 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
 static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
 static struct apm_user *user_list;
 static DEFINE_SPINLOCK(user_list_lock);
-static struct desc_struct bad_bios_desc = GDT_ENTRY_INIT(0x4092, 0, 0);
+
+/*
+ * Set up a segment that references the real mode segment 0x40
+ * that extends up to the end of page zero (that we have reserved).
+ * This is for buggy BIOS's that refer to (real mode) segment 0x40
+ * even though they are called in protected mode.
+ */
+static struct desc_struct bad_bios_desc = GDT_ENTRY_INIT(0x4092,
+			(unsigned long)__va(0x400UL), PAGE_SIZE - 0x400 - 1);
 
 static const char driver_version[] = "1.16ac";	/* no spaces */
 
@@ -2331,15 +2339,6 @@ static int __init apm_init(void)
 	}
 	pm_flags |= PM_APM;
 
-	/*
-	 * Set up a segment that references the real mode segment 0x40
-	 * that extends up to the end of page zero (that we have reserved).
-	 * This is for buggy BIOS's that refer to (real mode) segment 0x40
-	 * even though they are called in protected mode.
-	 */
-	set_desc_base(&bad_bios_desc, (unsigned long)__va(0x40UL << 4));
-	set_desc_limit(&bad_bios_desc, 4095 - (0x40 << 4));
-
 	/*
 	 * Set up the long jump entry point to the APM BIOS, which is called
 	 * from inline assembly.
-- 
cgit v1.2.3


From eeac19a7efa150231e4a6bb110d6f27500bcc8ce Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Mon, 10 Aug 2009 16:52:13 -0400
Subject: tracing: Map syscall name to number

Add a new function to support translating a syscall name to number at
runtime.
This allows the syscall event tracer to map syscall names to number.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/kernel/ftrace.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 8e9663413b7..afb31d72618 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -500,6 +500,22 @@ struct syscall_metadata *syscall_nr_to_meta(int nr)
 	return syscalls_metadata[nr];
 }
 
+int syscall_name_to_nr(char *name)
+{
+	int i;
+
+	if (!syscalls_metadata)
+		return -1;
+
+	for (i = 0; i < FTRACE_SYSCALL_MAX; i++) {
+		if (syscalls_metadata[i]) {
+			if (!strcmp(syscalls_metadata[i]->name, name))
+				return i;
+		}
+	}
+	return -1;
+}
+
 void arch_init_ftrace_syscalls(void)
 {
 	int i;
-- 
cgit v1.2.3


From 066e0378c23f0a3db730893f6a041e4a3922a385 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Mon, 10 Aug 2009 16:52:23 -0400
Subject: tracing: Call arch_init_ftrace_syscalls at boot

Call arch_init_ftrace_syscalls at boot, so we can determine early the
set of syscalls for the syscall trace events.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/kernel/ftrace.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index afb31d72618..0d93d409b8d 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -516,31 +516,24 @@ int syscall_name_to_nr(char *name)
 	return -1;
 }
 
-void arch_init_ftrace_syscalls(void)
+static int __init arch_init_ftrace_syscalls(void)
 {
 	int i;
 	struct syscall_metadata *meta;
 	unsigned long **psys_syscall_table = &sys_call_table;
-	static atomic_t refs;
-
-	if (atomic_inc_return(&refs) != 1)
-		goto end;
 
 	syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
 					FTRACE_SYSCALL_MAX, GFP_KERNEL);
 	if (!syscalls_metadata) {
 		WARN_ON(1);
-		return;
+		return -ENOMEM;
 	}
 
 	for (i = 0; i < FTRACE_SYSCALL_MAX; i++) {
 		meta = find_syscall_meta(psys_syscall_table[i]);
 		syscalls_metadata[i] = meta;
 	}
-	return;
-
-	/* Paranoid: avoid overflow */
-end:
-	atomic_dec(&refs);
+	return 0;
 }
+arch_initcall(arch_init_ftrace_syscalls);
 #endif
-- 
cgit v1.2.3


From a871bd33a6c0bc86fb47cd02ea2650dd43d3d95f Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Mon, 10 Aug 2009 16:52:31 -0400
Subject: tracing: Add syscall tracepoints

add two tracepoints in syscall exit and entry path, conditioned on
TIF_SYSCALL_FTRACE. Supports the syscall trace event code.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/kernel/ptrace.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 09ecbde91c1..34dd6f15185 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -37,6 +37,9 @@
 
 #include <trace/syscall.h>
 
+DEFINE_TRACE(syscall_enter);
+DEFINE_TRACE(syscall_exit);
+
 #include "tls.h"
 
 enum x86_regset {
@@ -1498,7 +1501,7 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
 		ret = -1L;
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
-		ftrace_syscall_enter(regs);
+		trace_syscall_enter(regs, regs->orig_ax);
 
 	if (unlikely(current->audit_context)) {
 		if (IS_IA32)
@@ -1524,7 +1527,7 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
 		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
-		ftrace_syscall_exit(regs);
+		trace_syscall_exit(regs, regs->ax);
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
 		tracehook_report_syscall_exit(regs, 0);
-- 
cgit v1.2.3


From 9daa77e2e9a6b8b859660d5e24d0f8cd77c2af39 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Mon, 10 Aug 2009 16:52:35 -0400
Subject: tracing: Update FTRACE_SYSCALL_MAX

update FTRACE_SYSCALL_MAX to the current number of syscalls

FTRACE_SYSCALL_MAX is a temporary solution to get the number of
syscalls supported by the arch until we find a more dynamic way
to get this number.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/include/asm/ftrace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index bd2c6511c88..71136545187 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -30,9 +30,9 @@
 
 /* FIXME: I don't want to stay hardcoded */
 #ifdef CONFIG_X86_64
-# define FTRACE_SYSCALL_MAX     296
+# define FTRACE_SYSCALL_MAX     299
 #else
-# define FTRACE_SYSCALL_MAX     333
+# define FTRACE_SYSCALL_MAX     337
 #endif
 
 #ifdef CONFIG_FUNCTION_TRACER
-- 
cgit v1.2.3


From 64c12e0444fcc6b75eb49144ba46d43dbdc6bc8f Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Mon, 10 Aug 2009 16:52:53 -0400
Subject: tracing: Add individual syscalls tracepoint id support

The current state of syscalls tracepoints generates only one event id
for every syscall events.

This patch associates an id with each syscall trace event, so that we
can identify each syscall trace event using the 'perf' tool.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/kernel/ftrace.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 0d93d409b8d..3cff1214e17 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -516,6 +516,16 @@ int syscall_name_to_nr(char *name)
 	return -1;
 }
 
+void set_syscall_enter_id(int num, int id)
+{
+	syscalls_metadata[num]->enter_id = id;
+}
+
+void set_syscall_exit_id(int num, int id)
+{
+	syscalls_metadata[num]->exit_id = id;
+}
+
 static int __init arch_init_ftrace_syscalls(void)
 {
 	int i;
-- 
cgit v1.2.3


From 0ac676fb50f5f8a22e5e80afc40bf38e31b77c00 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Mon, 10 Aug 2009 16:53:11 -0400
Subject: tracing: Convert x86_64 mmap and uname to use DEFINE_SYSCALL

A number of syscalls are not using 'DEFINE_SYSCALL'. I'm not sure why.
Convert x86_64 uname and mmap to use DEFINE_SYSCALL.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/kernel/sys_x86_64.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 6bc211accf0..45e00eb09c3 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -18,9 +18,9 @@
 #include <asm/ia32.h>
 #include <asm/syscalls.h>
 
-asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
-		unsigned long prot, unsigned long flags,
-		unsigned long fd, unsigned long off)
+SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
+		unsigned long, prot, unsigned long, flags,
+		unsigned long, fd, unsigned long, off)
 {
 	long error;
 	struct file *file;
@@ -226,7 +226,7 @@ bottomup:
 }
 
 
-asmlinkage long sys_uname(struct new_utsname __user *name)
+SYSCALL_DEFINE1(uname, struct new_utsname __user *, name)
 {
 	int err;
 	down_read(&uts_sem);
-- 
cgit v1.2.3


From 4ac0478f2afaf8e778b4190d6218459a9dbf2a8f Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Thu, 30 Jul 2009 02:55:01 +0200
Subject: ALSA: Allow passing platform_data for pxa2xx-ac97

This patch adds support for passing platform data to ac97 bus devices
from PXA2xx-AC97 driver..

Signed-off-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 arch/arm/mach-pxa/include/mach/audio.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/mach-pxa/include/mach/audio.h b/arch/arm/mach-pxa/include/mach/audio.h
index 16eb02552d5..a3449e35a6f 100644
--- a/arch/arm/mach-pxa/include/mach/audio.h
+++ b/arch/arm/mach-pxa/include/mach/audio.h
@@ -3,10 +3,12 @@
 
 #include <sound/core.h>
 #include <sound/pcm.h>
+#include <sound/ac97_codec.h>
 
 /*
  * @reset_gpio: AC97 reset gpio (normally gpio113 or gpio95)
  *              a -1 value means no gpio will be used for reset
+ * @codec_pdata: AC97 codec platform_data
 
  * reset_gpio should only be specified for pxa27x CPUs where a silicon
  * bug prevents correct operation of the reset line. If not specified,
@@ -20,6 +22,7 @@ typedef struct {
 	void (*resume)(void *);
 	void *priv;
 	int reset_gpio;
+	void *codec_pdata[AC97_BUS_MAX_DEVICES];
 } pxa2xx_audio_ops_t;
 
 extern void pxa_set_ac97_info(pxa2xx_audio_ops_t *ops);
-- 
cgit v1.2.3


From 48e46b7b311c54525712c28004f0a59f2c931d30 Mon Sep 17 00:00:00 2001
From: Jurij Smakov <jurij@wooyd.org>
Date: Sun, 16 Aug 2009 18:21:47 -0700
Subject: sparc64: build compressed image (zImage) by default

Besides creating the uncompressed vmlinux image for sparc64, also
create a compressed zImage. This is more consistent with other
architectures and required to make the 'deb-pkg' target work.

Signed-off-by: Jurij Smakov <jurij@wooyd.org>
Signed-off-by: Frans Pop <elendil@planet.nl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/Makefile      | 12 ++++++------
 arch/sparc/boot/Makefile |  3 +++
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index 2003ded054c..467221dd570 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -38,10 +38,6 @@ CPPFLAGS_vmlinux.lds += -m32
 #  Actual linking is done with "make image".
 LDFLAGS_vmlinux = -r
 
-# Default target
-all: zImage
-
-
 else
 #####
 # sparc64
@@ -91,6 +87,9 @@ endif
 
 boot := arch/sparc/boot
 
+# Default target
+all: zImage
+
 image zImage tftpboot.img vmlinux.aout: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
@@ -109,8 +108,9 @@ define archhelp
 endef
 else
 define archhelp
-  echo  '* vmlinux       - Standard sparc64 kernel'
-  echo  '  vmlinux.aout  - a.out kernel for sparc64'
+  echo  '* vmlinux      - standard sparc64 kernel'
+  echo  '* zImage       - stripped and compressed sparc64 kernel ($(boot)/zImage)'
+  echo  '  vmlinux.aout - a.out kernel for sparc64'
   echo  '  tftpboot.img - image prepared for tftp'
 endef
 endif
diff --git a/arch/sparc/boot/Makefile b/arch/sparc/boot/Makefile
index 1ff0fd92475..97e3feb9ff1 100644
--- a/arch/sparc/boot/Makefile
+++ b/arch/sparc/boot/Makefile
@@ -79,6 +79,9 @@ $(obj)/image: vmlinux FORCE
 	$(call if_changed,strip)
 	@echo '  kernel: $@ is ready'
 
+$(obj)/zImage: $(obj)/image
+	$(call if_changed,gzip)
+
 $(obj)/tftpboot.img: $(obj)/image $(obj)/piggyback_64 System.map $(ROOT_IMG) FORCE
 	$(call if_changed,elftoaout)
 	$(call if_changed,piggy)
-- 
cgit v1.2.3


From 3f389635102e0dd2f85ebfe7fd549942b5bbc33b Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Wed, 12 Aug 2009 22:49:47 +0000
Subject: SPARC: fix duplicate declaration

Only difference for 32 and 64 bit version is dma64_addr_t and rest is same.

Also fixed the following 'make includecheck' warning:

  arch/sparc/include/asm/types.h: asm-generic/int-ll64.h is included more than once.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/types.h | 27 +++++++--------------------
 1 file changed, 7 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/include/asm/types.h b/arch/sparc/include/asm/types.h
index de671d73bae..09c79a9c851 100644
--- a/arch/sparc/include/asm/types.h
+++ b/arch/sparc/include/asm/types.h
@@ -8,9 +8,8 @@
  * need to be careful to avoid a name clashes.
  */
 
-#if defined(__sparc__) && defined(__arch64__)
+#if defined(__sparc__)
 
-/*** SPARC 64 bit ***/
 #include <asm-generic/int-ll64.h>
 
 #ifndef __ASSEMBLY__
@@ -26,33 +25,21 @@ typedef unsigned short umode_t;
 /* Dma addresses come in generic and 64-bit flavours.  */
 
 typedef u32 dma_addr_t;
-typedef u64 dma64_addr_t;
 
-#endif /* __ASSEMBLY__ */
+#if defined(__arch64__)
 
-#endif /* __KERNEL__ */
+/*** SPARC 64 bit ***/
+typedef u64 dma64_addr_t;
 #else
-
 /*** SPARC 32 bit ***/
-#include <asm-generic/int-ll64.h>
-
-#ifndef __ASSEMBLY__
-
-typedef unsigned short umode_t;
-
-#endif /* __ASSEMBLY__ */
-
-#ifdef __KERNEL__
-
-#ifndef __ASSEMBLY__
-
-typedef u32 dma_addr_t;
 typedef u32 dma64_addr_t;
 
+#endif /* defined(__arch64__) */
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
 
-#endif /* defined(__sparc__) && defined(__arch64__) */
+#endif /* defined(__sparc__) */
 
 #endif /* defined(_SPARC_TYPES_H) */
-- 
cgit v1.2.3


From 8abf9196008a7f06841b10769d1aed3f28c1d314 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 13 Aug 2009 10:05:43 +0000
Subject: sparc64: cheaper asm/uaccess.h inclusion

sched.h inclusion is definitely not needed like in 32-bit version,
remove it, fixup compilation.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/uaccess_64.h | 2 +-
 arch/sparc/kernel/sysfs.c           | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
index a38c0323891..9ea271e19c7 100644
--- a/arch/sparc/include/asm/uaccess_64.h
+++ b/arch/sparc/include/asm/uaccess_64.h
@@ -7,8 +7,8 @@
 
 #ifdef __KERNEL__
 #include <linux/compiler.h>
-#include <linux/sched.h>
 #include <linux/string.h>
+#include <linux/thread_info.h>
 #include <asm/asi.h>
 #include <asm/system.h>
 #include <asm/spitfire.h>
diff --git a/arch/sparc/kernel/sysfs.c b/arch/sparc/kernel/sysfs.c
index d28f496f466..ca39c606fe8 100644
--- a/arch/sparc/kernel/sysfs.c
+++ b/arch/sparc/kernel/sysfs.c
@@ -2,6 +2,7 @@
  *
  * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
  */
+#include <linux/sched.h>
 #include <linux/sysdev.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
-- 
cgit v1.2.3


From b7f42ab2e237f08a5bbcefa17473e80eb05e725c Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 17 Aug 2009 11:19:40 -0700
Subject: x86, apic: Move dmar_table_init() out of enable_IR()

On an x2apic system, we got:

[    1.818072] ------------[ cut here ]------------
[    1.820376] WARNING: at kernel/lockdep.c:2461 lockdep_trace_alloc+0xa5/0xe9()
[    1.835282] Hardware name: ASSY,
[    1.839006] Modules linked in:
[    1.841253] Pid: 1, comm: swapper Not tainted 2.6.31-rc5-tip-03926-g39aaa80-dirty #510
[    1.858056] Call Trace:
[    1.859913]  [<ffffffff810d13aa>] ? lockdep_trace_alloc+0xa5/0xe9
[    1.876270]  [<ffffffff81093f37>] warn_slowpath_common+0x8d/0xd0
[    1.879132]  [<ffffffff81093fa1>] warn_slowpath_null+0x27/0x3d
[    1.896823]  [<ffffffff810d13aa>] lockdep_trace_alloc+0xa5/0xe9
[    1.900659]  [<ffffffff810cf5a0>] ? lock_release_holdtime+0x2f/0x199
[    1.917188]  [<ffffffff81167a3c>] kmem_cache_alloc_notrace+0x42/0x111
[    1.922320]  [<ffffffff8106fe8c>] ? reserve_memtype+0x152/0x518
[    1.938137]  [<ffffffff8106f8b1>] ? pat_pagerange_is_ram+0x4a/0x91
[    1.941730]  [<ffffffff8106fe8c>] reserve_memtype+0x152/0x518
[    1.958115]  [<ffffffff8106ce62>] __ioremap_caller+0x1dd/0x30f
[    1.975507]  [<ffffffff81ce2c5c>] ? acpi_os_map_memory+0x2a/0x47
[    1.978987]  [<ffffffff8106d0fd>] ioremap_nocache+0x2a/0x40
[    2.031400]  [<ffffffff810d0364>] ? trace_hardirqs_off+0x20/0x36
[    2.036096]  [<ffffffff81ce2c5c>] acpi_os_map_memory+0x2a/0x47
[    2.046263]  [<ffffffff815cd642>] acpi_tb_verify_table+0x3d/0x85
[    2.050349]  [<ffffffff81d34af7>] ? _spin_unlock_irqrestore+0x50/0x76
[    2.067327]  [<ffffffff815ccad6>] acpi_get_table_with_size+0x64/0xd9
[    2.070860]  [<ffffffff81d34af7>] ? _spin_unlock_irqrestore+0x50/0x76
[    2.088000]  [<ffffffff825c88d5>] dmar_table_detect+0x33/0x70
[    2.092047]  [<ffffffff825c8a01>] dmar_table_init+0x43/0x428
[    2.106854]  [<ffffffff825a7537>] enable_IR+0x1c/0x8d
[    2.110256]  [<ffffffff825a7624>] enable_IR_x2apic+0x7c/0x19e
[    2.127139]  [<ffffffff825a4876>] native_smp_prepare_cpus+0x139/0x3b8
[    2.145175]  [<ffffffff8259678d>] kernel_init+0x71/0x1da
[    2.148913]  [<ffffffff8104305a>] child_rip+0xa/0x20
[    2.152349]  [<ffffffff810429fc>] ? restore_args+0x0/0x30
[    2.167931]  [<ffffffff8259671c>] ? kernel_init+0x0/0x1da
[    2.171671]  [<ffffffff81043050>] ? child_rip+0x0/0x20
[    2.187607] ---[ end trace a7919e7f17c0a725 ]---

Venkatesh Pallipadi said:

| Looks like the problem started with this commit
|
| commit ce69a784504222c3ab6f1b3c357d09ec5772127a
| Author: Gleb Natapov <gleb@redhat.com>
| Date:   Mon Jul 20 15:24:17 2009 +0300
|
| x86/apic: Enable x2APIC without interrupt remapping under KVM
|
| Before this commit, dmar_table_init() was getting called
| with interrupts enabled and after this commit, it is getting
| called with interrupts disabled.

so try to move out dmar_table_init out of that function.

Analyzed-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Gleb Natapov <gleb@redhat.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: "Pallipadi, Venkatesh" <venkatesh.pallipadi@intel.com>
LKML-Reference: <4A899F3C.2050104@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/apic.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index de039fcdd05..3fc3a6c428d 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1365,14 +1365,6 @@ void enable_x2apic(void)
 int __init enable_IR(void)
 {
 #ifdef CONFIG_INTR_REMAP
-	int ret;
-
-	ret = dmar_table_init();
-	if (ret) {
-		pr_debug("dmar_table_init() failed with %d:\n", ret);
-		return 0;
-	}
-
 	if (!intr_remapping_supported()) {
 		pr_debug("intr-remapping not supported\n");
 		return 0;
@@ -1400,6 +1392,14 @@ void __init enable_IR_x2apic(void)
 	unsigned long flags;
 	struct IO_APIC_route_entry **ioapic_entries = NULL;
 	int ret, x2apic_enabled = 0;
+	int dmar_table_init_ret = 0;
+
+#ifdef CONFIG_INTR_REMAP
+	dmar_table_init_ret = dmar_table_init();
+	if (dmar_table_init_ret)
+		pr_debug("dmar_table_init() failed with %d:\n",
+				dmar_table_init_ret);
+#endif
 
 	ioapic_entries = alloc_ioapic_entries();
 	if (!ioapic_entries) {
@@ -1417,7 +1417,11 @@ void __init enable_IR_x2apic(void)
 	mask_8259A();
 	mask_IO_APIC_setup(ioapic_entries);
 
-	ret = enable_IR();
+	if (dmar_table_init_ret)
+		ret = 0;
+	else
+		ret = enable_IR();
+
 	if (!ret) {
 		/* IR is required if there is APIC ID > 255 even when running
 		 * under KVM
-- 
cgit v1.2.3


From 5213a780295895630530aebacdd19217a5379c9a Mon Sep 17 00:00:00 2001
From: Konrad Eisele <konrad@gaisler.com>
Date: Mon, 17 Aug 2009 00:13:29 +0000
Subject: sparc,leon: CONFIG_SPARC_LEON option and leon specific files.

The macro CONFIG_SPARC_LEON will shield, if undefined, the sun-sparc
code from LEON specific code. In
particular include/asm/leon.h will get empty through #ifdef and
leon_kernel.c and leon_mm.c will not be compiled.

Signed-off-by: Konrad Eisele <konrad@gaisler.com>
Reviewed-by:   Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/Kconfig                 |  11 ++
 arch/sparc/include/asm/leon.h      | 362 +++++++++++++++++++++++++++++++++++++
 arch/sparc/include/asm/leon_amba.h | 263 +++++++++++++++++++++++++++
 arch/sparc/kernel/leon_kernel.c    | 203 +++++++++++++++++++++
 arch/sparc/mm/leon_mm.c            | 260 ++++++++++++++++++++++++++
 5 files changed, 1099 insertions(+)
 create mode 100644 arch/sparc/include/asm/leon.h
 create mode 100644 arch/sparc/include/asm/leon_amba.h
 create mode 100644 arch/sparc/kernel/leon_kernel.c
 create mode 100644 arch/sparc/mm/leon_mm.c

(limited to 'arch')

diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 3f8b6a92eab..b847f880eec 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -437,6 +437,17 @@ config SERIAL_CONSOLE
 
 	  If unsure, say N.
 
+config SPARC_LEON
+	bool "Sparc Leon processor family"
+	depends on SPARC32
+	---help---
+	  If you say Y here if you are running on a SPARC-LEON processor.
+	  The LEON processor is a synthesizable VHDL model of the
+	  SPARC-v8 standard. LEON is  part of the GRLIB collection of
+	  IP cores that are distributed under GPL. GRLIB can be downloaded
+	  from www.gaisler.com. You can download a sparc-linux cross-compilation
+	  toolchain at www.gaisler.com.
+
 endmenu
 
 menu "Bus options (PCI etc.)"
diff --git a/arch/sparc/include/asm/leon.h b/arch/sparc/include/asm/leon.h
new file mode 100644
index 00000000000..28a42b73f64
--- /dev/null
+++ b/arch/sparc/include/asm/leon.h
@@ -0,0 +1,362 @@
+/*
+ * Copyright (C) 2004 Konrad Eisele (eiselekd@web.de,konrad@gaisler.com) Gaisler Research
+ * Copyright (C) 2004 Stefan Holst (mail@s-holst.de) Uni-Stuttgart
+ * Copyright (C) 2009 Daniel Hellstrom (daniel@gaisler.com) Aeroflex Gaisler AB
+ * Copyright (C) 2009 Konrad Eisele (konrad@gaisler.com) Aeroflex Gaisler AB
+ */
+
+#ifndef LEON_H_INCLUDE
+#define LEON_H_INCLUDE
+
+#ifdef CONFIG_SPARC_LEON
+
+#define ASI_LEON_NOCACHE	0x01
+
+#define ASI_LEON_DCACHE_MISS	0x1
+
+#define ASI_LEON_CACHEREGS	0x02
+#define ASI_LEON_IFLUSH		0x10
+#define ASI_LEON_DFLUSH		0x11
+
+#define ASI_LEON_MMUFLUSH	0x18
+#define ASI_LEON_MMUREGS	0x19
+#define ASI_LEON_BYPASS		0x1c
+#define ASI_LEON_FLUSH_PAGE	0x10
+
+/* mmu register access, ASI_LEON_MMUREGS */
+#define LEON_CNR_CTRL		0x000
+#define LEON_CNR_CTXP		0x100
+#define LEON_CNR_CTX		0x200
+#define LEON_CNR_F		0x300
+#define LEON_CNR_FADDR		0x400
+
+#define LEON_CNR_CTX_NCTX	256	/*number of MMU ctx */
+
+#define LEON_CNR_CTRL_TLBDIS	0x80000000
+
+#define LEON_MMUTLB_ENT_MAX	64
+
+/*
+ * diagnostic access from mmutlb.vhd:
+ * 0: pte address
+ * 4: pte
+ * 8: additional flags
+ */
+#define LEON_DIAGF_LVL		0x3
+#define LEON_DIAGF_WR		0x8
+#define LEON_DIAGF_WR_SHIFT	3
+#define LEON_DIAGF_HIT		0x10
+#define LEON_DIAGF_HIT_SHIFT	4
+#define LEON_DIAGF_CTX		0x1fe0
+#define LEON_DIAGF_CTX_SHIFT	5
+#define LEON_DIAGF_VALID	0x2000
+#define LEON_DIAGF_VALID_SHIFT	13
+
+/*
+ *  Interrupt Sources
+ *
+ *  The interrupt source numbers directly map to the trap type and to
+ *  the bits used in the Interrupt Clear, Interrupt Force, Interrupt Mask,
+ *  and the Interrupt Pending Registers.
+ */
+#define LEON_INTERRUPT_CORRECTABLE_MEMORY_ERROR	1
+#define LEON_INTERRUPT_UART_1_RX_TX		2
+#define LEON_INTERRUPT_UART_0_RX_TX		3
+#define LEON_INTERRUPT_EXTERNAL_0		4
+#define LEON_INTERRUPT_EXTERNAL_1		5
+#define LEON_INTERRUPT_EXTERNAL_2		6
+#define LEON_INTERRUPT_EXTERNAL_3		7
+#define LEON_INTERRUPT_TIMER1			8
+#define LEON_INTERRUPT_TIMER2			9
+#define LEON_INTERRUPT_EMPTY1			10
+#define LEON_INTERRUPT_EMPTY2			11
+#define LEON_INTERRUPT_OPEN_ETH			12
+#define LEON_INTERRUPT_EMPTY4			13
+#define LEON_INTERRUPT_EMPTY5			14
+#define LEON_INTERRUPT_EMPTY6			15
+
+/* irq masks */
+#define LEON_HARD_INT(x)	(1 << (x))	/* irq 0-15 */
+#define LEON_IRQMASK_R		0x0000fffe	/* bit 15- 1 of lregs.irqmask */
+#define LEON_IRQPRIO_R		0xfffe0000	/* bit 31-17 of lregs.irqmask */
+
+/* leon uart register definitions */
+#define LEON_OFF_UDATA	0x0
+#define LEON_OFF_USTAT	0x4
+#define LEON_OFF_UCTRL	0x8
+#define LEON_OFF_USCAL	0xc
+
+#define LEON_UCTRL_RE	0x01
+#define LEON_UCTRL_TE	0x02
+#define LEON_UCTRL_RI	0x04
+#define LEON_UCTRL_TI	0x08
+#define LEON_UCTRL_PS	0x10
+#define LEON_UCTRL_PE	0x20
+#define LEON_UCTRL_FL	0x40
+#define LEON_UCTRL_LB	0x80
+
+#define LEON_USTAT_DR	0x01
+#define LEON_USTAT_TS	0x02
+#define LEON_USTAT_TH	0x04
+#define LEON_USTAT_BR	0x08
+#define LEON_USTAT_OV	0x10
+#define LEON_USTAT_PE	0x20
+#define LEON_USTAT_FE	0x40
+
+#define LEON_MCFG2_SRAMDIS		0x00002000
+#define LEON_MCFG2_SDRAMEN		0x00004000
+#define LEON_MCFG2_SRAMBANKSZ		0x00001e00	/* [12-9] */
+#define LEON_MCFG2_SRAMBANKSZ_SHIFT	9
+#define LEON_MCFG2_SDRAMBANKSZ		0x03800000	/* [25-23] */
+#define LEON_MCFG2_SDRAMBANKSZ_SHIFT	23
+
+#define LEON_TCNT0_MASK	0x7fffff
+
+#define LEON_USTAT_ERROR (LEON_USTAT_OV | LEON_USTAT_PE | LEON_USTAT_FE)
+/* no break yet */
+
+#define ASI_LEON3_SYSCTRL		0x02
+#define ASI_LEON3_SYSCTRL_ICFG		0x08
+#define ASI_LEON3_SYSCTRL_DCFG		0x0c
+#define ASI_LEON3_SYSCTRL_CFG_SNOOPING (1 << 27)
+#define ASI_LEON3_SYSCTRL_CFG_SSIZE(c) (1 << ((c >> 20) & 0xf))
+
+#ifndef __ASSEMBLY__
+
+/* do a virtual address read without cache */
+static inline unsigned long leon_readnobuffer_reg(unsigned long paddr)
+{
+	unsigned long retval;
+	__asm__ __volatile__("lda [%1] %2, %0\n\t" :
+			     "=r"(retval) : "r"(paddr), "i"(ASI_LEON_NOCACHE));
+	return retval;
+}
+
+/* do a physical address bypass write, i.e. for 0x80000000 */
+static inline void leon_store_reg(unsigned long paddr, unsigned long value)
+{
+	__asm__ __volatile__("sta %0, [%1] %2\n\t" : : "r"(value), "r"(paddr),
+			     "i"(ASI_LEON_BYPASS) : "memory");
+}
+
+/* do a physical address bypass load, i.e. for 0x80000000 */
+static inline unsigned long leon_load_reg(unsigned long paddr)
+{
+	unsigned long retval;
+	__asm__ __volatile__("lda [%1] %2, %0\n\t" :
+			     "=r"(retval) : "r"(paddr), "i"(ASI_LEON_BYPASS));
+	return retval;
+}
+
+extern inline void leon_srmmu_disabletlb(void)
+{
+	unsigned int retval;
+	__asm__ __volatile__("lda [%%g0] %2, %0\n\t" : "=r"(retval) : "r"(0),
+			     "i"(ASI_LEON_MMUREGS));
+	retval |= LEON_CNR_CTRL_TLBDIS;
+	__asm__ __volatile__("sta %0, [%%g0] %2\n\t" : : "r"(retval), "r"(0),
+			     "i"(ASI_LEON_MMUREGS) : "memory");
+}
+
+extern inline void leon_srmmu_enabletlb(void)
+{
+	unsigned int retval;
+	__asm__ __volatile__("lda [%%g0] %2, %0\n\t" : "=r"(retval) : "r"(0),
+			     "i"(ASI_LEON_MMUREGS));
+	retval = retval & ~LEON_CNR_CTRL_TLBDIS;
+	__asm__ __volatile__("sta %0, [%%g0] %2\n\t" : : "r"(retval), "r"(0),
+			     "i"(ASI_LEON_MMUREGS) : "memory");
+}
+
+/* macro access for leon_load_reg() and leon_store_reg() */
+#define LEON3_BYPASS_LOAD_PA(x)	    (leon_load_reg((unsigned long)(x)))
+#define LEON3_BYPASS_STORE_PA(x, v) (leon_store_reg((unsigned long)(x), (unsigned long)(v)))
+#define LEON3_BYPASS_ANDIN_PA(x, v) LEON3_BYPASS_STORE_PA(x, LEON3_BYPASS_LOAD_PA(x) & v)
+#define LEON3_BYPASS_ORIN_PA(x, v)  LEON3_BYPASS_STORE_PA(x, LEON3_BYPASS_LOAD_PA(x) | v)
+#define LEON_BYPASS_LOAD_PA(x)      leon_load_reg((unsigned long)(x))
+#define LEON_BYPASS_STORE_PA(x, v)  leon_store_reg((unsigned long)(x), (unsigned long)(v))
+#define LEON_REGLOAD_PA(x)          leon_load_reg((unsigned long)(x)+LEON_PREGS)
+#define LEON_REGSTORE_PA(x, v)      leon_store_reg((unsigned long)(x)+LEON_PREGS, (unsigned long)(v))
+#define LEON_REGSTORE_OR_PA(x, v)   LEON_REGSTORE_PA(x, LEON_REGLOAD_PA(x) | (unsigned long)(v))
+#define LEON_REGSTORE_AND_PA(x, v)  LEON_REGSTORE_PA(x, LEON_REGLOAD_PA(x) & (unsigned long)(v))
+
+/* macro access for leon_readnobuffer_reg() */
+#define LEON_BYPASSCACHE_LOAD_VA(x) leon_readnobuffer_reg((unsigned long)(x))
+
+extern void sparc_leon_eirq_register(int eirq);
+extern void leon_init(void);
+extern void leon_switch_mm(void);
+extern void leon_init_IRQ(void);
+
+extern unsigned long last_valid_pfn;
+
+extern inline unsigned long sparc_leon3_get_dcachecfg(void)
+{
+	unsigned int retval;
+	__asm__ __volatile__("lda [%1] %2, %0\n\t" :
+			     "=r"(retval) :
+			     "r"(ASI_LEON3_SYSCTRL_DCFG),
+			     "i"(ASI_LEON3_SYSCTRL));
+	return retval;
+}
+
+/* enable snooping */
+extern inline void sparc_leon3_enable_snooping(void)
+{
+	__asm__ __volatile__ ("lda [%%g0] 2, %%l1\n\t"
+			  "set 0x800000, %%l2\n\t"
+			  "or  %%l2, %%l1, %%l2\n\t"
+			  "sta %%l2, [%%g0] 2\n\t" : : : "l1", "l2");
+};
+
+extern inline void sparc_leon3_disable_cache(void)
+{
+	__asm__ __volatile__ ("lda [%%g0] 2, %%l1\n\t"
+			  "set 0x00000f, %%l2\n\t"
+			  "andn  %%l2, %%l1, %%l2\n\t"
+			  "sta %%l2, [%%g0] 2\n\t" : : : "l1", "l2");
+};
+
+#endif /*!__ASSEMBLY__*/
+
+#ifdef CONFIG_SMP
+# define LEON3_IRQ_RESCHEDULE		13
+# define LEON3_IRQ_TICKER		(leon_percpu_timer_dev[0].irq)
+# define LEON3_IRQ_CROSS_CALL		15
+#endif
+
+#if defined(PAGE_SIZE_LEON_8K)
+#define LEON_PAGE_SIZE_LEON 1
+#elif defined(PAGE_SIZE_LEON_16K)
+#define LEON_PAGE_SIZE_LEON 2)
+#else
+#define LEON_PAGE_SIZE_LEON 0
+#endif
+
+#if LEON_PAGE_SIZE_LEON == 0
+/* [ 8, 6, 6 ] + 12 */
+#define LEON_PGD_SH    24
+#define LEON_PGD_M     0xff
+#define LEON_PMD_SH    18
+#define LEON_PMD_SH_V  (LEON_PGD_SH-2)
+#define LEON_PMD_M     0x3f
+#define LEON_PTE_SH    12
+#define LEON_PTE_M     0x3f
+#elif LEON_PAGE_SIZE_LEON == 1
+/* [ 7, 6, 6 ] + 13 */
+#define LEON_PGD_SH    25
+#define LEON_PGD_M     0x7f
+#define LEON_PMD_SH    19
+#define LEON_PMD_SH_V  (LEON_PGD_SH-1)
+#define LEON_PMD_M     0x3f
+#define LEON_PTE_SH    13
+#define LEON_PTE_M     0x3f
+#elif LEON_PAGE_SIZE_LEON == 2
+/* [ 6, 6, 6 ] + 14 */
+#define LEON_PGD_SH    26
+#define LEON_PGD_M     0x3f
+#define LEON_PMD_SH    20
+#define LEON_PMD_SH_V  (LEON_PGD_SH-0)
+#define LEON_PMD_M     0x3f
+#define LEON_PTE_SH    14
+#define LEON_PTE_M     0x3f
+#elif LEON_PAGE_SIZE_LEON == 3
+/* [ 4, 7, 6 ] + 15 */
+#define LEON_PGD_SH    28
+#define LEON_PGD_M     0x0f
+#define LEON_PMD_SH    21
+#define LEON_PMD_SH_V  (LEON_PGD_SH-0)
+#define LEON_PMD_M     0x7f
+#define LEON_PTE_SH    15
+#define LEON_PTE_M     0x3f
+#else
+#error cannot determine LEON_PAGE_SIZE_LEON
+#endif
+
+#define PAGE_MIN_SHIFT   (12)
+#define PAGE_MIN_SIZE    (1UL << PAGE_MIN_SHIFT)
+
+#define LEON3_XCCR_SETS_MASK  0x07000000UL
+#define LEON3_XCCR_SSIZE_MASK 0x00f00000UL
+
+#define LEON2_CCR_DSETS_MASK 0x03000000UL
+#define LEON2_CFG_SSIZE_MASK 0x00007000UL
+
+#ifndef __ASSEMBLY__
+extern unsigned long srmmu_swprobe(unsigned long vaddr, unsigned long *paddr);
+extern void leon_flush_icache_all(void);
+extern void leon_flush_dcache_all(void);
+extern void leon_flush_cache_all(void);
+extern void leon_flush_tlb_all(void);
+extern int leon_flush_during_switch;
+extern int leon_flush_needed(void);
+
+struct vm_area_struct;
+extern void leon_flush_icache_all(void);
+extern void leon_flush_dcache_all(void);
+extern void leon_flush_pcache_all(struct vm_area_struct *vma, unsigned long page);
+extern void leon_flush_cache_all(void);
+extern void leon_flush_tlb_all(void);
+extern int leon_flush_during_switch;
+extern int leon_flush_needed(void);
+extern void leon_flush_pcache_all(struct vm_area_struct *vma, unsigned long page);
+
+/* struct that hold LEON3 cache configuration registers */
+struct leon3_cacheregs {
+	unsigned long ccr;	/* 0x00 - Cache Control Register  */
+	unsigned long iccr;     /* 0x08 - Instruction Cache Configuration Register */
+	unsigned long dccr;	/* 0x0c - Data Cache Configuration Register */
+};
+
+/* struct that hold LEON2 cache configuration register
+ * & configuration register
+ */
+struct leon2_cacheregs {
+	unsigned long ccr, cfg;
+};
+
+#ifdef __KERNEL__
+
+#include <linux/interrupt.h>
+
+struct device_node;
+extern int sparc_leon_eirq_get(int eirq, int cpu);
+extern irqreturn_t sparc_leon_eirq_isr(int dummy, void *dev_id);
+extern void sparc_leon_eirq_register(int eirq);
+extern void leon_clear_clock_irq(void);
+extern void leon_load_profile_irq(int cpu, unsigned int limit);
+extern void leon_init_timers(irq_handler_t counter_fn);
+extern void leon_clear_clock_irq(void);
+extern void leon_load_profile_irq(int cpu, unsigned int limit);
+extern void leon_trans_init(struct device_node *dp);
+extern void leon_node_init(struct device_node *dp, struct device_node ***nextp);
+extern void leon_init_IRQ(void);
+extern void leon_init(void);
+extern unsigned long srmmu_swprobe(unsigned long vaddr, unsigned long *paddr);
+extern void init_leon(void);
+extern void poke_leonsparc(void);
+extern void leon3_getCacheRegs(struct leon3_cacheregs *regs);
+extern int leon_flush_needed(void);
+extern void leon_switch_mm(void);
+extern int srmmu_swprobe_trace;
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASSEMBLY__ */
+
+/* macros used in leon_mm.c */
+#define PFN(x)           ((x) >> PAGE_SHIFT)
+#define _pfn_valid(pfn)	 ((pfn < last_valid_pfn) && (pfn >= PFN(phys_base)))
+#define _SRMMU_PTE_PMASK_LEON 0xffffffff
+
+#else /* defined(CONFIG_SPARC_LEON) */
+
+/* nop definitions for !LEON case */
+#define leon_init() do {} while (0)
+#define leon_switch_mm() do {} while (0)
+#define leon_init_IRQ() do {} while (0)
+#define init_leon() do {} while (0)
+
+#endif /* !defined(CONFIG_SPARC_LEON) */
+
+#endif
diff --git a/arch/sparc/include/asm/leon_amba.h b/arch/sparc/include/asm/leon_amba.h
new file mode 100644
index 00000000000..618e8882179
--- /dev/null
+++ b/arch/sparc/include/asm/leon_amba.h
@@ -0,0 +1,263 @@
+/*
+*Copyright (C) 2004 Konrad Eisele (eiselekd@web.de,konrad@gaisler.com), Gaisler Research
+*Copyright (C) 2004 Stefan Holst (mail@s-holst.de), Uni-Stuttgart
+*Copyright (C) 2009 Daniel Hellstrom (daniel@gaisler.com),Konrad Eisele (konrad@gaisler.com) Aeroflex Gaisler AB
+*/
+
+#ifndef LEON_AMBA_H_INCLUDE
+#define LEON_AMBA_H_INCLUDE
+
+#ifndef __ASSEMBLY__
+
+struct amba_prom_registers {
+	unsigned int phys_addr;	/* The physical address of this register */
+	unsigned int reg_size;	/* How many bytes does this register take up? */
+};
+
+#endif
+
+/*
+ *  The following defines the bits in the LEON UART Status Registers.
+ */
+
+#define LEON_REG_UART_STATUS_DR   0x00000001	/* Data Ready */
+#define LEON_REG_UART_STATUS_TSE  0x00000002	/* TX Send Register Empty */
+#define LEON_REG_UART_STATUS_THE  0x00000004	/* TX Hold Register Empty */
+#define LEON_REG_UART_STATUS_BR   0x00000008	/* Break Error */
+#define LEON_REG_UART_STATUS_OE   0x00000010	/* RX Overrun Error */
+#define LEON_REG_UART_STATUS_PE   0x00000020	/* RX Parity Error */
+#define LEON_REG_UART_STATUS_FE   0x00000040	/* RX Framing Error */
+#define LEON_REG_UART_STATUS_ERR  0x00000078	/* Error Mask */
+
+/*
+ *  The following defines the bits in the LEON UART Ctrl Registers.
+ */
+
+#define LEON_REG_UART_CTRL_RE     0x00000001	/* Receiver enable */
+#define LEON_REG_UART_CTRL_TE     0x00000002	/* Transmitter enable */
+#define LEON_REG_UART_CTRL_RI     0x00000004	/* Receiver interrupt enable */
+#define LEON_REG_UART_CTRL_TI     0x00000008	/* Transmitter irq */
+#define LEON_REG_UART_CTRL_PS     0x00000010	/* Parity select */
+#define LEON_REG_UART_CTRL_PE     0x00000020	/* Parity enable */
+#define LEON_REG_UART_CTRL_FL     0x00000040	/* Flow control enable */
+#define LEON_REG_UART_CTRL_LB     0x00000080	/* Loop Back enable */
+
+#define LEON3_GPTIMER_EN 1
+#define LEON3_GPTIMER_RL 2
+#define LEON3_GPTIMER_LD 4
+#define LEON3_GPTIMER_IRQEN 8
+#define LEON3_GPTIMER_SEPIRQ 8
+
+#define LEON23_REG_TIMER_CONTROL_EN    0x00000001 /* 1 = enable counting */
+/* 0 = hold scalar and counter */
+#define LEON23_REG_TIMER_CONTROL_RL    0x00000002 /* 1 = reload at 0 */
+						  /* 0 = stop at 0 */
+#define LEON23_REG_TIMER_CONTROL_LD    0x00000004 /* 1 = load counter */
+						  /* 0 = no function */
+#define LEON23_REG_TIMER_CONTROL_IQ    0x00000008 /* 1 = irq enable */
+						  /* 0 = no function */
+
+/*
+ *  The following defines the bits in the LEON PS/2 Status Registers.
+ */
+
+#define LEON_REG_PS2_STATUS_DR   0x00000001	/* Data Ready */
+#define LEON_REG_PS2_STATUS_PE   0x00000002	/* Parity error */
+#define LEON_REG_PS2_STATUS_FE   0x00000004	/* Framing error */
+#define LEON_REG_PS2_STATUS_KI   0x00000008	/* Keyboard inhibit */
+#define LEON_REG_PS2_STATUS_RF   0x00000010	/* RX buffer full */
+#define LEON_REG_PS2_STATUS_TF   0x00000020	/* TX buffer full */
+
+/*
+ *  The following defines the bits in the LEON PS/2 Ctrl Registers.
+ */
+
+#define LEON_REG_PS2_CTRL_RE 0x00000001	/* Receiver enable */
+#define LEON_REG_PS2_CTRL_TE 0x00000002	/* Transmitter enable */
+#define LEON_REG_PS2_CTRL_RI 0x00000004	/* Keyboard receive irq  */
+#define LEON_REG_PS2_CTRL_TI 0x00000008	/* Keyboard transmit irq */
+
+#define LEON3_IRQMPSTATUS_CPUNR     28
+#define LEON3_IRQMPSTATUS_BROADCAST 27
+
+#define GPTIMER_CONFIG_IRQNT(a)          (((a) >> 3) & 0x1f)
+#define GPTIMER_CONFIG_ISSEP(a)          ((a) & (1 << 8))
+#define GPTIMER_CONFIG_NTIMERS(a)        ((a) & (0x7))
+#define LEON3_GPTIMER_CTRL_PENDING       0x10
+#define LEON3_GPTIMER_CONFIG_NRTIMERS(c) ((c)->config & 0x7)
+#define LEON3_GPTIMER_CTRL_ISPENDING(r)  (((r)&LEON3_GPTIMER_CTRL_PENDING) ? 1 : 0)
+
+#ifdef CONFIG_SPARC_LEON
+
+#ifndef __ASSEMBLY__
+
+struct leon3_irqctrl_regs_map {
+	u32 ilevel;
+	u32 ipend;
+	u32 iforce;
+	u32 iclear;
+	u32 mpstatus;
+	u32 mpbroadcast;
+	u32 notused02;
+	u32 notused03;
+	u32 notused10;
+	u32 notused11;
+	u32 notused12;
+	u32 notused13;
+	u32 notused20;
+	u32 notused21;
+	u32 notused22;
+	u32 notused23;
+	u32 mask[16];
+	u32 force[16];
+	/* Extended IRQ registers */
+	u32 intid[16];	/* 0xc0 */
+};
+
+struct leon3_apbuart_regs_map {
+	u32 data;
+	u32 status;
+	u32 ctrl;
+	u32 scaler;
+};
+
+struct leon3_gptimerelem_regs_map {
+	u32 val;
+	u32 rld;
+	u32 ctrl;
+	u32 unused;
+};
+
+struct leon3_gptimer_regs_map {
+	u32 scalar;
+	u32 scalar_reload;
+	u32 config;
+	u32 unused;
+	struct leon3_gptimerelem_regs_map e[8];
+};
+
+/*
+ *  Types and structure used for AMBA Plug & Play bus scanning
+ */
+
+#define AMBA_MAXAPB_DEVS 64
+#define AMBA_MAXAPB_DEVS_PERBUS 16
+
+struct amba_device_table {
+	int devnr;		   /* number of devices on AHB or APB bus */
+	unsigned int *addr[16];    /* addresses to the devices configuration tables */
+	unsigned int allocbits[1]; /* 0=unallocated, 1=allocated driver */
+};
+
+struct amba_apbslv_device_table {
+	int devnr;		                  /* number of devices on AHB or APB bus */
+	unsigned int *addr[AMBA_MAXAPB_DEVS];     /* addresses to the devices configuration tables */
+	unsigned int apbmst[AMBA_MAXAPB_DEVS];    /* apb master if a entry is a apb slave */
+	unsigned int apbmstidx[AMBA_MAXAPB_DEVS]; /* apb master idx if a entry is a apb slave */
+	unsigned int allocbits[4];                /* 0=unallocated, 1=allocated driver */
+};
+
+struct amba_confarea_type {
+	struct amba_confarea_type *next;/* next bus in chain */
+	struct amba_device_table ahbmst;
+	struct amba_device_table ahbslv;
+	struct amba_apbslv_device_table apbslv;
+	unsigned int apbmst;
+};
+
+/* collect apb slaves */
+struct amba_apb_device {
+	unsigned int start, irq, bus_id;
+	struct amba_confarea_type *bus;
+};
+
+/* collect ahb slaves */
+struct amba_ahb_device {
+	unsigned int start[4], irq, bus_id;
+	struct amba_confarea_type *bus;
+};
+
+struct device_node;
+void _amba_init(struct device_node *dp, struct device_node ***nextp);
+
+extern struct leon3_irqctrl_regs_map *leon3_irqctrl_regs;
+extern struct leon3_gptimer_regs_map *leon3_gptimer_regs;
+extern struct amba_apb_device leon_percpu_timer_dev[16];
+extern int leondebug_irq_disable;
+extern int leon_debug_irqout;
+extern unsigned long leon3_gptimer_irq;
+extern unsigned int sparc_leon_eirq;
+
+#endif /* __ASSEMBLY__ */
+
+#define LEON3_IO_AREA 0xfff00000
+#define LEON3_CONF_AREA 0xff000
+#define LEON3_AHB_SLAVE_CONF_AREA (1 << 11)
+
+#define LEON3_AHB_CONF_WORDS 8
+#define LEON3_APB_CONF_WORDS 2
+#define LEON3_AHB_MASTERS 16
+#define LEON3_AHB_SLAVES 16
+#define LEON3_APB_SLAVES 16
+#define LEON3_APBUARTS 8
+
+/* Vendor codes */
+#define VENDOR_GAISLER   1
+#define VENDOR_PENDER    2
+#define VENDOR_ESA       4
+#define VENDOR_OPENCORES 8
+
+/* Gaisler Research device id's */
+#define GAISLER_LEON3    0x003
+#define GAISLER_LEON3DSU 0x004
+#define GAISLER_ETHAHB   0x005
+#define GAISLER_APBMST   0x006
+#define GAISLER_AHBUART  0x007
+#define GAISLER_SRCTRL   0x008
+#define GAISLER_SDCTRL   0x009
+#define GAISLER_APBUART  0x00C
+#define GAISLER_IRQMP    0x00D
+#define GAISLER_AHBRAM   0x00E
+#define GAISLER_GPTIMER  0x011
+#define GAISLER_PCITRG   0x012
+#define GAISLER_PCISBRG  0x013
+#define GAISLER_PCIFBRG  0x014
+#define GAISLER_PCITRACE 0x015
+#define GAISLER_PCIDMA   0x016
+#define GAISLER_AHBTRACE 0x017
+#define GAISLER_ETHDSU   0x018
+#define GAISLER_PIOPORT  0x01A
+#define GAISLER_GRGPIO   0x01A
+#define GAISLER_AHBJTAG  0x01c
+#define GAISLER_ETHMAC   0x01D
+#define GAISLER_AHB2AHB  0x020
+#define GAISLER_USBDC    0x021
+#define GAISLER_ATACTRL  0x024
+#define GAISLER_DDRSPA   0x025
+#define GAISLER_USBEHC   0x026
+#define GAISLER_USBUHC   0x027
+#define GAISLER_I2CMST   0x028
+#define GAISLER_SPICTRL  0x02D
+#define GAISLER_DDR2SPA  0x02E
+#define GAISLER_SPIMCTRL 0x045
+#define GAISLER_LEON4    0x048
+#define GAISLER_LEON4DSU 0x049
+#define GAISLER_AHBSTAT  0x052
+#define GAISLER_FTMCTRL  0x054
+#define GAISLER_KBD      0x060
+#define GAISLER_VGA      0x061
+#define GAISLER_SVGA     0x063
+#define GAISLER_GRSYSMON 0x066
+#define GAISLER_GRACECTRL 0x067
+
+#define GAISLER_L2TIME   0xffd	/* internal device: leon2 timer */
+#define GAISLER_L2C      0xffe	/* internal device: leon2compat */
+#define GAISLER_PLUGPLAY 0xfff	/* internal device: plug & play configarea */
+
+#define amba_vendor(x) (((x) >> 24) & 0xff)
+
+#define amba_device(x) (((x) >> 12) & 0xfff)
+
+#endif /* !defined(CONFIG_SPARC_LEON) */
+
+#endif
diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c
new file mode 100644
index 00000000000..54d8a5bd482
--- /dev/null
+++ b/arch/sparc/kernel/leon_kernel.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright (C) 2009 Daniel Hellstrom (daniel@gaisler.com) Aeroflex Gaisler AB
+ * Copyright (C) 2009 Konrad Eisele (konrad@gaisler.com) Aeroflex Gaisler AB
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/interrupt.h>
+#include <linux/of_device.h>
+#include <asm/oplib.h>
+#include <asm/timer.h>
+#include <asm/prom.h>
+#include <asm/leon.h>
+#include <asm/leon_amba.h>
+
+#include "prom.h"
+#include "irq.h"
+
+struct leon3_irqctrl_regs_map *leon3_irqctrl_regs; /* interrupt controller base address, initialized by amba_init() */
+struct leon3_gptimer_regs_map *leon3_gptimer_regs; /* timer controller base address, initialized by amba_init() */
+struct amba_apb_device leon_percpu_timer_dev[16];
+
+int leondebug_irq_disable;
+int leon_debug_irqout;
+static int dummy_master_l10_counter;
+
+unsigned long leon3_gptimer_irq; /* interrupt controller irq number, initialized by amba_init() */
+unsigned int sparc_leon_eirq;
+#define LEON_IMASK ((&leon3_irqctrl_regs->mask[0]))
+
+/* Return the IRQ of the pending IRQ on the extended IRQ controller */
+int sparc_leon_eirq_get(int eirq, int cpu)
+{
+	return LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->intid[cpu]) & 0x1f;
+}
+
+irqreturn_t sparc_leon_eirq_isr(int dummy, void *dev_id)
+{
+	printk(KERN_ERR "sparc_leon_eirq_isr: ERROR EXTENDED IRQ\n");
+	return IRQ_HANDLED;
+}
+
+/* The extended IRQ controller has been found, this function registers it */
+void sparc_leon_eirq_register(int eirq)
+{
+	int irq;
+
+	/* Register a "BAD" handler for this interrupt, it should never happen */
+	irq = request_irq(eirq, sparc_leon_eirq_isr,
+			  (IRQF_DISABLED | SA_STATIC_ALLOC), "extirq", NULL);
+
+	if (irq) {
+		printk(KERN_ERR
+		       "sparc_leon_eirq_register: unable to attach IRQ%d\n",
+		       eirq);
+	} else {
+		sparc_leon_eirq = eirq;
+	}
+
+}
+
+static inline unsigned long get_irqmask(unsigned int irq)
+{
+	unsigned long mask;
+
+	if (!irq || ((irq > 0xf) && !sparc_leon_eirq)
+	    || ((irq > 0x1f) && sparc_leon_eirq)) {
+		printk(KERN_ERR
+		       "leon_get_irqmask: false irq number: %d\n", irq);
+		mask = 0;
+	} else {
+		mask = LEON_HARD_INT(irq);
+	}
+	return mask;
+}
+
+static void leon_enable_irq(unsigned int irq_nr)
+{
+	unsigned long mask, flags;
+	mask = get_irqmask(irq_nr);
+	local_irq_save(flags);
+	LEON3_BYPASS_STORE_PA(LEON_IMASK,
+			      (LEON3_BYPASS_LOAD_PA(LEON_IMASK) | (mask)));
+	local_irq_restore(flags);
+}
+
+static void leon_disable_irq(unsigned int irq_nr)
+{
+	unsigned long mask, flags;
+	mask = get_irqmask(irq_nr);
+	local_irq_save(flags);
+	LEON3_BYPASS_STORE_PA(LEON_IMASK,
+			      (LEON3_BYPASS_LOAD_PA(LEON_IMASK) & ~(mask)));
+	local_irq_restore(flags);
+
+}
+
+void __init leon_init_timers(irq_handler_t counter_fn)
+{
+	int irq;
+
+	leondebug_irq_disable = 0;
+	leon_debug_irqout = 0;
+	master_l10_counter = (unsigned int *)&dummy_master_l10_counter;
+	dummy_master_l10_counter = 0;
+
+	if (leon3_gptimer_regs && leon3_irqctrl_regs) {
+		LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[0].val, 0);
+		LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[0].rld,
+				      (((1000000 / 100) - 1)));
+		LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[0].ctrl, 0);
+
+	} else {
+		printk(KERN_ERR "No Timer/irqctrl found\n");
+		BUG();
+	}
+
+	irq = request_irq(leon3_gptimer_irq,
+			  counter_fn,
+			  (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL);
+
+	if (irq) {
+		printk(KERN_ERR "leon_time_init: unable to attach IRQ%d\n",
+		       LEON_INTERRUPT_TIMER1);
+		prom_halt();
+	}
+
+	if (leon3_gptimer_regs) {
+		LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[0].ctrl,
+				      LEON3_GPTIMER_EN |
+				      LEON3_GPTIMER_RL |
+				      LEON3_GPTIMER_LD | LEON3_GPTIMER_IRQEN);
+	}
+}
+
+void leon_clear_clock_irq(void)
+{
+}
+
+void leon_load_profile_irq(int cpu, unsigned int limit)
+{
+	BUG();
+}
+
+
+
+
+void __init leon_trans_init(struct device_node *dp)
+{
+	if (strcmp(dp->type, "cpu") == 0 && strcmp(dp->name, "<NULL>") == 0) {
+		struct property *p;
+		p = of_find_property(dp, "mid", (void *)0);
+		if (p) {
+			int mid;
+			dp->name = prom_early_alloc(5 + 1);
+			memcpy(&mid, p->value, p->length);
+			sprintf((char *)dp->name, "cpu%.2d", mid);
+		}
+	}
+}
+
+void __initdata (*prom_amba_init)(struct device_node *dp, struct device_node ***nextp) = 0;
+
+void __init leon_node_init(struct device_node *dp, struct device_node ***nextp)
+{
+	if (prom_amba_init &&
+	    strcmp(dp->type, "ambapp") == 0 &&
+	    strcmp(dp->name, "ambapp0") == 0) {
+		prom_amba_init(dp, nextp);
+	}
+}
+
+void __init leon_init_IRQ(void)
+{
+	sparc_init_timers = leon_init_timers;
+
+	BTFIXUPSET_CALL(enable_irq, leon_enable_irq, BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(disable_irq, leon_disable_irq, BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(enable_pil_irq, leon_enable_irq, BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(disable_pil_irq, leon_disable_irq, BTFIXUPCALL_NORM);
+
+	BTFIXUPSET_CALL(clear_clock_irq, leon_clear_clock_irq,
+			BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(load_profile_irq, leon_load_profile_irq,
+			BTFIXUPCALL_NOP);
+
+#ifdef CONFIG_SMP
+	BTFIXUPSET_CALL(set_cpu_int, leon_set_cpu_int, BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(clear_cpu_int, leon_clear_ipi, BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(set_irq_udt, leon_set_udt, BTFIXUPCALL_NORM);
+#endif
+
+}
+
+void __init leon_init(void)
+{
+	prom_build_more = &leon_node_init;
+}
diff --git a/arch/sparc/mm/leon_mm.c b/arch/sparc/mm/leon_mm.c
new file mode 100644
index 00000000000..c0e01297e64
--- /dev/null
+++ b/arch/sparc/mm/leon_mm.c
@@ -0,0 +1,260 @@
+/*
+ *  linux/arch/sparc/mm/leon_m.c
+ *
+ * Copyright (C) 2004 Konrad Eisele (eiselekd@web.de, konrad@gaisler.com) Gaisler Research
+ * Copyright (C) 2009 Daniel Hellstrom (daniel@gaisler.com) Aeroflex Gaisler AB
+ * Copyright (C) 2009 Konrad Eisele (konrad@gaisler.com) Aeroflex Gaisler AB
+ *
+ * do srmmu probe in software
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <asm/asi.h>
+#include <asm/leon.h>
+#include <asm/tlbflush.h>
+
+int leon_flush_during_switch = 1;
+int srmmu_swprobe_trace;
+
+unsigned long srmmu_swprobe(unsigned long vaddr, unsigned long *paddr)
+{
+
+	unsigned int ctxtbl;
+	unsigned int pgd, pmd, ped;
+	unsigned int ptr;
+	unsigned int lvl, pte, paddrbase;
+	unsigned int ctx;
+	unsigned int paddr_calc;
+
+	paddrbase = 0;
+
+	if (srmmu_swprobe_trace)
+		printk(KERN_INFO "swprobe: trace on\n");
+
+	ctxtbl = srmmu_get_ctable_ptr();
+	if (!(ctxtbl)) {
+		if (srmmu_swprobe_trace)
+			printk(KERN_INFO "swprobe: srmmu_get_ctable_ptr returned 0=>0\n");
+		return 0;
+	}
+	if (!_pfn_valid(PFN(ctxtbl))) {
+		if (srmmu_swprobe_trace)
+			printk(KERN_INFO
+			       "swprobe: !_pfn_valid(%x)=>0\n",
+			       PFN(ctxtbl));
+		return 0;
+	}
+
+	ctx = srmmu_get_context();
+	if (srmmu_swprobe_trace)
+		printk(KERN_INFO "swprobe:  --- ctx (%x) ---\n", ctx);
+
+	pgd = LEON_BYPASS_LOAD_PA(ctxtbl + (ctx * 4));
+
+	if (((pgd & SRMMU_ET_MASK) == SRMMU_ET_PTE)) {
+		if (srmmu_swprobe_trace)
+			printk(KERN_INFO "swprobe: pgd is entry level 3\n");
+		lvl = 3;
+		pte = pgd;
+		paddrbase = pgd & _SRMMU_PTE_PMASK_LEON;
+		goto ready;
+	}
+	if (((pgd & SRMMU_ET_MASK) != SRMMU_ET_PTD)) {
+		if (srmmu_swprobe_trace)
+			printk(KERN_INFO "swprobe: pgd is invalid => 0\n");
+		return 0;
+	}
+
+	if (srmmu_swprobe_trace)
+		printk(KERN_INFO "swprobe:  --- pgd (%x) ---\n", pgd);
+
+	ptr = (pgd & SRMMU_PTD_PMASK) << 4;
+	ptr += ((((vaddr) >> LEON_PGD_SH) & LEON_PGD_M) * 4);
+	if (!_pfn_valid(PFN(ptr)))
+		return 0;
+
+	pmd = LEON_BYPASS_LOAD_PA(ptr);
+	if (((pmd & SRMMU_ET_MASK) == SRMMU_ET_PTE)) {
+		if (srmmu_swprobe_trace)
+			printk(KERN_INFO "swprobe: pmd is entry level 2\n");
+		lvl = 2;
+		pte = pmd;
+		paddrbase = pmd & _SRMMU_PTE_PMASK_LEON;
+		goto ready;
+	}
+	if (((pmd & SRMMU_ET_MASK) != SRMMU_ET_PTD)) {
+		if (srmmu_swprobe_trace)
+			printk(KERN_INFO "swprobe: pmd is invalid => 0\n");
+		return 0;
+	}
+
+	if (srmmu_swprobe_trace)
+		printk(KERN_INFO "swprobe:  --- pmd (%x) ---\n", pmd);
+
+	ptr = (pmd & SRMMU_PTD_PMASK) << 4;
+	ptr += (((vaddr >> LEON_PMD_SH) & LEON_PMD_M) * 4);
+	if (!_pfn_valid(PFN(ptr))) {
+		if (srmmu_swprobe_trace)
+			printk(KERN_INFO "swprobe: !_pfn_valid(%x)=>0\n",
+			       PFN(ptr));
+		return 0;
+	}
+
+	ped = LEON_BYPASS_LOAD_PA(ptr);
+
+	if (((ped & SRMMU_ET_MASK) == SRMMU_ET_PTE)) {
+		if (srmmu_swprobe_trace)
+			printk(KERN_INFO "swprobe: ped is entry level 1\n");
+		lvl = 1;
+		pte = ped;
+		paddrbase = ped & _SRMMU_PTE_PMASK_LEON;
+		goto ready;
+	}
+	if (((ped & SRMMU_ET_MASK) != SRMMU_ET_PTD)) {
+		if (srmmu_swprobe_trace)
+			printk(KERN_INFO "swprobe: ped is invalid => 0\n");
+		return 0;
+	}
+
+	if (srmmu_swprobe_trace)
+		printk(KERN_INFO "swprobe:  --- ped (%x) ---\n", ped);
+
+	ptr = (ped & SRMMU_PTD_PMASK) << 4;
+	ptr += (((vaddr >> LEON_PTE_SH) & LEON_PTE_M) * 4);
+	if (!_pfn_valid(PFN(ptr)))
+		return 0;
+
+	ptr = LEON_BYPASS_LOAD_PA(ptr);
+	if (((ptr & SRMMU_ET_MASK) == SRMMU_ET_PTE)) {
+		if (srmmu_swprobe_trace)
+			printk(KERN_INFO "swprobe: ptr is entry level 0\n");
+		lvl = 0;
+		pte = ptr;
+		paddrbase = ptr & _SRMMU_PTE_PMASK_LEON;
+		goto ready;
+	}
+	if (srmmu_swprobe_trace)
+		printk(KERN_INFO "swprobe: ptr is invalid => 0\n");
+	return 0;
+
+ready:
+	switch (lvl) {
+	case 0:
+		paddr_calc =
+		    (vaddr & ~(-1 << LEON_PTE_SH)) | ((pte & ~0xff) << 4);
+		break;
+	case 1:
+		paddr_calc =
+		    (vaddr & ~(-1 << LEON_PMD_SH)) | ((pte & ~0xff) << 4);
+		break;
+	case 2:
+		paddr_calc =
+		    (vaddr & ~(-1 << LEON_PGD_SH)) | ((pte & ~0xff) << 4);
+		break;
+	default:
+	case 3:
+		paddr_calc = vaddr;
+		break;
+	}
+	if (srmmu_swprobe_trace)
+		printk(KERN_INFO "swprobe: padde %x\n", paddr_calc);
+	if (paddr)
+		*paddr = paddr_calc;
+	return paddrbase;
+}
+
+void leon_flush_icache_all(void)
+{
+	__asm__ __volatile__(" flush ");	/*iflush*/
+}
+
+void leon_flush_dcache_all(void)
+{
+	__asm__ __volatile__("sta %%g0, [%%g0] %0\n\t" : :
+			     "i"(ASI_LEON_DFLUSH) : "memory");
+}
+
+void leon_flush_pcache_all(struct vm_area_struct *vma, unsigned long page)
+{
+	if (vma->vm_flags & VM_EXEC)
+		leon_flush_icache_all();
+	leon_flush_dcache_all();
+}
+
+void leon_flush_cache_all(void)
+{
+	__asm__ __volatile__(" flush ");	/*iflush*/
+	__asm__ __volatile__("sta %%g0, [%%g0] %0\n\t" : :
+			     "i"(ASI_LEON_DFLUSH) : "memory");
+}
+
+void leon_flush_tlb_all(void)
+{
+	leon_flush_cache_all();
+	__asm__ __volatile__("sta %%g0, [%0] %1\n\t" : : "r"(0x400),
+			     "i"(ASI_LEON_MMUFLUSH) : "memory");
+}
+
+/* get all cache regs */
+void leon3_getCacheRegs(struct leon3_cacheregs *regs)
+{
+	unsigned long ccr, iccr, dccr;
+
+	if (!regs)
+		return;
+	/* Get Cache regs from "Cache ASI" address 0x0, 0x8 and 0xC */
+	__asm__ __volatile__("lda [%%g0] %3, %0\n\t"
+			     "mov 0x08, %%g1\n\t"
+			     "lda [%%g1] %3, %1\n\t"
+			     "mov 0x0c, %%g1\n\t"
+			     "lda [%%g1] %3, %2\n\t"
+			     : "=r"(ccr), "=r"(iccr), "=r"(dccr)
+			       /* output */
+			     : "i"(ASI_LEON_CACHEREGS)	/* input */
+			     : "g1"	/* clobber list */
+	    );
+	regs->ccr = ccr;
+	regs->iccr = iccr;
+	regs->dccr = dccr;
+}
+
+/* Due to virtual cache we need to check cache configuration if
+ * it is possible to skip flushing in some cases.
+ *
+ * Leon2 and Leon3 differ in their way of telling cache information
+ *
+ */
+int leon_flush_needed(void)
+{
+	int flush_needed = -1;
+	unsigned int ssize, sets;
+	char *setStr[4] =
+	    { "direct mapped", "2-way associative", "3-way associative",
+		"4-way associative"
+	};
+	/* leon 3 */
+	struct leon3_cacheregs cregs;
+	leon3_getCacheRegs(&cregs);
+	sets = (cregs.dccr & LEON3_XCCR_SETS_MASK) >> 24;
+	/* (ssize=>realsize) 0=>1k, 1=>2k, 2=>4k, 3=>8k ... */
+	ssize = 1 << ((cregs.dccr & LEON3_XCCR_SSIZE_MASK) >> 20);
+
+	printk(KERN_INFO "CACHE: %s cache, set size %dk\n",
+	       sets > 3 ? "unknown" : setStr[sets], ssize);
+	if ((ssize <= (PAGE_SIZE / 1024)) && (sets == 0)) {
+		/* Set Size <= Page size  ==>
+		   flush on every context switch not needed. */
+		flush_needed = 0;
+		printk(KERN_INFO "CACHE: not flushing on every context switch\n");
+	}
+	return flush_needed;
+}
+
+void leon_switch_mm(void)
+{
+	flush_tlb_mm((void *)0);
+	if (leon_flush_during_switch)
+		leon_flush_cache_all();
+}
-- 
cgit v1.2.3


From 97fb58fa9bb509b49090a1c62ed1b660d518c66b Mon Sep 17 00:00:00 2001
From: Konrad Eisele <konrad@gaisler.com>
Date: Mon, 17 Aug 2009 00:13:30 +0000
Subject: sparc,leon: Redefine MMU register access asi if CONFIG_LEON

SPARC-LEON has a different ASI for mmu register accesses.

Signed-off-by: Konrad Eisele <konrad@gaisler.com>
Reviewed-by:   Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/asi.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/sparc/include/asm/asi.h b/arch/sparc/include/asm/asi.h
index 74703c5ef98..b2e3db63a64 100644
--- a/arch/sparc/include/asm/asi.h
+++ b/arch/sparc/include/asm/asi.h
@@ -40,7 +40,11 @@
 #define ASI_M_UNA01         0x01   /* Same here... */
 #define ASI_M_MXCC          0x02   /* Access to TI VIKING MXCC registers */
 #define ASI_M_FLUSH_PROBE   0x03   /* Reference MMU Flush/Probe; rw, ss */
+#ifndef CONFIG_SPARC_LEON
 #define ASI_M_MMUREGS       0x04   /* MMU Registers; rw, ss */
+#else
+#define ASI_M_MMUREGS       0x19
+#endif /* CONFIG_SPARC_LEON */
 #define ASI_M_TLBDIAG       0x05   /* MMU TLB only Diagnostics */
 #define ASI_M_DIAGS         0x06   /* Reference MMU Diagnostics */
 #define ASI_M_IODIAG        0x07   /* MMU I/O TLB only Diagnostics */
-- 
cgit v1.2.3


From 0fd7ef1fe0e6e70c7851ce65a2eb8a8d3f49147e Mon Sep 17 00:00:00 2001
From: Konrad Eisele <konrad@gaisler.com>
Date: Mon, 17 Aug 2009 00:13:31 +0000
Subject: sparc,leon: Introduce the sparc-leon CPU type.

Add sparc_leon enum, M_LEON|M_LEON3_SOC machine. Add compilation of
leon.c in mm and kernel
if CONFIG_SPARC_LEON is defined. Add sparc_leon dependent
initialization to switch statements + head.S.

Signed-off-by: Konrad Eisele <konrad@gaisler.com>
Reviewed-by:   Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/machines.h  | 6 +++++-
 arch/sparc/include/asm/pgtsrmmu.h  | 4 ++++
 arch/sparc/include/asm/prom.h      | 3 +++
 arch/sparc/include/asm/system_32.h | 1 +
 arch/sparc/kernel/Makefile         | 2 ++
 arch/sparc/kernel/cpu.c            | 5 +++++
 arch/sparc/kernel/head_32.S        | 5 +++++
 arch/sparc/kernel/idprom.c         | 2 ++
 arch/sparc/kernel/irq_32.c         | 5 +++++
 arch/sparc/kernel/setup_32.c       | 5 +++++
 arch/sparc/mm/Makefile             | 1 +
 arch/sparc/mm/init_32.c            | 4 ++++
 arch/sparc/mm/loadmmu.c            | 1 +
 13 files changed, 43 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/sparc/include/asm/machines.h b/arch/sparc/include/asm/machines.h
index c28c2f24879..cd9c099567e 100644
--- a/arch/sparc/include/asm/machines.h
+++ b/arch/sparc/include/asm/machines.h
@@ -15,7 +15,7 @@ struct Sun_Machine_Models {
 /* Current number of machines we know about that has an IDPROM
  * machtype entry including one entry for the 0x80 OBP machines.
  */
-#define NUM_SUN_MACHINES   15
+#define NUM_SUN_MACHINES   16
 
 /* The machine type in the idprom area looks like this:
  *
@@ -30,6 +30,7 @@ struct Sun_Machine_Models {
 
 #define SM_ARCH_MASK  0xf0
 #define SM_SUN4       0x20
+#define  M_LEON       0x30
 #define SM_SUN4C      0x50
 #define SM_SUN4M      0x70
 #define SM_SUN4M_OBP  0x80
@@ -41,6 +42,9 @@ struct Sun_Machine_Models {
 #define SM_4_330      0x03    /* Sun 4/300 series */
 #define SM_4_470      0x04    /* Sun 4/400 series */
 
+/* Leon machines */
+#define M_LEON3_SOC   0x02    /* Leon3 SoC */
+
 /* Sun4c machines                Full Name              - PROM NAME */
 #define SM_4C_SS1     0x01    /* Sun4c SparcStation 1   - Sun 4/60  */
 #define SM_4C_IPC     0x02    /* Sun4c SparcStation IPC - Sun 4/40  */
diff --git a/arch/sparc/include/asm/pgtsrmmu.h b/arch/sparc/include/asm/pgtsrmmu.h
index 808555fc1d5..1407c07bdad 100644
--- a/arch/sparc/include/asm/pgtsrmmu.h
+++ b/arch/sparc/include/asm/pgtsrmmu.h
@@ -267,6 +267,7 @@ static inline void srmmu_flush_tlb_page(unsigned long page)
 
 }
 
+#ifndef CONFIG_SPARC_LEON
 static inline unsigned long srmmu_hwprobe(unsigned long vaddr)
 {
 	unsigned long retval;
@@ -278,6 +279,9 @@ static inline unsigned long srmmu_hwprobe(unsigned long vaddr)
 
 	return retval;
 }
+#else
+#define srmmu_hwprobe(addr) (srmmu_swprobe(addr, 0) & SRMMU_PTE_PMASK)
+#endif
 
 static inline int
 srmmu_get_pte (unsigned long addr)
diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h
index be8d7aaeb60..82a190d7efc 100644
--- a/arch/sparc/include/asm/prom.h
+++ b/arch/sparc/include/asm/prom.h
@@ -118,5 +118,8 @@ extern struct device_node *of_console_device;
 extern char *of_console_path;
 extern char *of_console_options;
 
+extern void (*prom_build_more)(struct device_node *dp, struct device_node ***nextp);
+extern char *build_full_name(struct device_node *dp);
+
 #endif /* __KERNEL__ */
 #endif /* _SPARC_PROM_H */
diff --git a/arch/sparc/include/asm/system_32.h b/arch/sparc/include/asm/system_32.h
index 751c8c17f5a..890036b3689 100644
--- a/arch/sparc/include/asm/system_32.h
+++ b/arch/sparc/include/asm/system_32.h
@@ -32,6 +32,7 @@ enum sparc_cpu {
   sun4u       = 0x05, /* V8 ploos ploos */
   sun_unknown = 0x06,
   ap1000      = 0x07, /* almost a sun4m */
+  sparc_leon  = 0x08, /* Leon SoC */
 };
 
 /* Really, userland should not be looking at any of this... */
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index 475ce4696ac..b0041756c9f 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -41,6 +41,8 @@ obj-y                   += of_device_common.o
 obj-y                   += of_device_$(BITS).o
 obj-$(CONFIG_SPARC64)   += prom_irqtrans.o
 
+obj-$(CONFIG_SPARC_LEON)+= leon_kernel.o
+
 obj-$(CONFIG_SPARC64)   += reboot.o
 obj-$(CONFIG_SPARC64)   += sysfs.o
 obj-$(CONFIG_SPARC64)   += iommu.o
diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c
index d85c3dc4953..1446df90ef8 100644
--- a/arch/sparc/kernel/cpu.c
+++ b/arch/sparc/kernel/cpu.c
@@ -312,7 +312,12 @@ void __cpuinit cpu_probe(void)
 
 	psr = get_psr();
 	put_psr(psr | PSR_EF);
+#ifdef CONFIG_SPARC_LEON
+	fpu_vers = 7;
+#else
 	fpu_vers = ((get_fsr() >> 17) & 0x7);
+#endif
+
 	put_psr(psr);
 
 	set_cpu_and_fpu(psr_impl, psr_vers, fpu_vers);
diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S
index 6b4d8acc4c8..439d82a95ac 100644
--- a/arch/sparc/kernel/head_32.S
+++ b/arch/sparc/kernel/head_32.S
@@ -809,6 +809,11 @@ found_version:
 		 nop
 
 got_prop:
+#ifdef CONFIG_SPARC_LEON
+	        /* no cpu-type check is needed, it is a SPARC-LEON */
+		ba sun4c_continue_boot
+		 nop
+#endif
 		set	cputypval, %o2
 		ldub	[%o2 + 0x4], %l1
 
diff --git a/arch/sparc/kernel/idprom.c b/arch/sparc/kernel/idprom.c
index 57922f69c3f..52a15fe2db1 100644
--- a/arch/sparc/kernel/idprom.c
+++ b/arch/sparc/kernel/idprom.c
@@ -31,6 +31,8 @@ static struct Sun_Machine_Models Sun_Machines[NUM_SUN_MACHINES] = {
 { .name = "Sun 4/200 Series",        .id_machtype = (SM_SUN4 | SM_4_260) },
 { .name = "Sun 4/300 Series",        .id_machtype = (SM_SUN4 | SM_4_330) },
 { .name = "Sun 4/400 Series",        .id_machtype = (SM_SUN4 | SM_4_470) },
+/* Now Leon */
+{ .name = "Leon3 System-on-a-Chip",  .id_machtype = (M_LEON | M_LEON3_SOC) },
 /* Now, Sun4c's */
 { .name = "Sun4c SparcStation 1",    .id_machtype = (SM_SUN4C | SM_4C_SS1) },
 { .name = "Sun4c SparcStation IPC",  .id_machtype = (SM_SUN4C | SM_4C_IPC) },
diff --git a/arch/sparc/kernel/irq_32.c b/arch/sparc/kernel/irq_32.c
index ad800b80c71..e1af4372832 100644
--- a/arch/sparc/kernel/irq_32.c
+++ b/arch/sparc/kernel/irq_32.c
@@ -45,6 +45,7 @@
 #include <asm/pcic.h>
 #include <asm/cacheflush.h>
 #include <asm/irq_regs.h>
+#include <asm/leon.h>
 
 #include "kernel.h"
 #include "irq.h"
@@ -661,6 +662,10 @@ void __init init_IRQ(void)
 		sun4d_init_IRQ();
 		break;
 
+	case sparc_leon:
+		leon_init_IRQ();
+		break;
+
 	default:
 		prom_printf("Cannot initialize IRQs on this Sun machine...");
 		break;
diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c
index 998cadb4e7f..16a47ffe03c 100644
--- a/arch/sparc/kernel/setup_32.c
+++ b/arch/sparc/kernel/setup_32.c
@@ -235,6 +235,8 @@ void __init setup_arch(char **cmdline_p)
 		sparc_cpu_model = sun4e;
 	if (!strcmp(&cputypval,"sun4u"))
 		sparc_cpu_model = sun4u;
+	if (!strncmp(&cputypval, "leon" , 4))
+		sparc_cpu_model = sparc_leon;
 
 	printk("ARCH: ");
 	switch(sparc_cpu_model) {
@@ -256,6 +258,9 @@ void __init setup_arch(char **cmdline_p)
 	case sun4u:
 		printk("SUN4U\n");
 		break;
+	case sparc_leon:
+		printk("LEON\n");
+		break;
 	default:
 		printk("UNKNOWN!\n");
 		break;
diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile
index 681abe0a459..79836a7dd00 100644
--- a/arch/sparc/mm/Makefile
+++ b/arch/sparc/mm/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_SPARC32)   += loadmmu.o
 obj-y                   += generic_$(BITS).o
 obj-$(CONFIG_SPARC32)   += extable.o btfixup.o srmmu.o iommu.o io-unit.o
 obj-$(CONFIG_SPARC32)   += hypersparc.o viking.o tsunami.o swift.o
+obj-$(CONFIG_SPARC_LEON)+= leon_mm.o
 
 # Only used by sparc64
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index 26bb3919ff1..54114ad0bde 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -34,6 +34,7 @@
 #include <asm/pgalloc.h>	/* bug in asm-generic/tlb.h: check_pgt_cache */
 #include <asm/tlb.h>
 #include <asm/prom.h>
+#include <asm/leon.h>
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
@@ -326,6 +327,9 @@ void __init paging_init(void)
 		sparc_unmapped_base = 0xe0000000;
 		BTFIXUPSET_SETHI(sparc_unmapped_base, 0xe0000000);
 		break;
+	case sparc_leon:
+		leon_init();
+		/* fall through */
 	case sun4m:
 	case sun4d:
 		srmmu_paging_init();
diff --git a/arch/sparc/mm/loadmmu.c b/arch/sparc/mm/loadmmu.c
index 652be05acbe..82ec8f66603 100644
--- a/arch/sparc/mm/loadmmu.c
+++ b/arch/sparc/mm/loadmmu.c
@@ -33,6 +33,7 @@ void __init load_mmu(void)
 		break;
 	case sun4m:
 	case sun4d:
+	case sparc_leon:
 		ld_mmu_srmmu();
 		break;
 	default:
-- 
cgit v1.2.3


From e63829de3d03f92cea2b26119e0aa9a7043b9913 Mon Sep 17 00:00:00 2001
From: Konrad Eisele <konrad@gaisler.com>
Date: Mon, 17 Aug 2009 00:13:32 +0000
Subject: sparc,leon: Added support for AMBAPP bus.

The device is a AMBA bus if it is a child of prom node "ambapp" (AMBA
plug and play). Two functions
leon_trans_init() and leon_node_init() (defined in
sparc/kernel/leon_kernel.c) are called in the
prom_build_tree() path if CONFIG_SPARC_LEON is
defined. leon_node_init() will build up the device
tree using AMBA plug and play. Also: a extra check was addes to
prom_common.c:build_one_prop()
in case a rom-node is undefined which can happen for SPARC-LEON
because it creates only a minimum
nodes to emulate sparc behaviour.

Signed-off-by: Konrad Eisele <konrad@gaisler.com>
Reviewed-by:   Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/of_device_32.c | 40 ++++++++++++++++++++++++++++++++++++++++
 arch/sparc/kernel/prom_32.c      | 33 +++++++++++++++++++++++++++++++++
 arch/sparc/kernel/prom_common.c  | 10 ++++++++--
 3 files changed, 81 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/of_device_32.c b/arch/sparc/kernel/of_device_32.c
index 90396702ea2..4c26eb59e74 100644
--- a/arch/sparc/kernel/of_device_32.c
+++ b/arch/sparc/kernel/of_device_32.c
@@ -9,6 +9,8 @@
 #include <linux/irq.h>
 #include <linux/of_device.h>
 #include <linux/of_platform.h>
+#include <asm/leon.h>
+#include <asm/leon_amba.h>
 
 #include "of_device_common.h"
 
@@ -97,6 +99,35 @@ static unsigned long of_bus_sbus_get_flags(const u32 *addr, unsigned long flags)
 	return IORESOURCE_MEM;
 }
 
+ /*
+ * AMBAPP bus specific translator
+ */
+
+static int of_bus_ambapp_match(struct device_node *np)
+{
+	return !strcmp(np->name, "ambapp");
+}
+
+static void of_bus_ambapp_count_cells(struct device_node *child,
+				      int *addrc, int *sizec)
+{
+	if (addrc)
+		*addrc = 1;
+	if (sizec)
+		*sizec = 1;
+}
+
+static int of_bus_ambapp_map(u32 *addr, const u32 *range,
+			     int na, int ns, int pna)
+{
+	return of_bus_default_map(addr, range, na, ns, pna);
+}
+
+static unsigned long of_bus_ambapp_get_flags(const u32 *addr,
+					     unsigned long flags)
+{
+	return IORESOURCE_MEM;
+}
 
 /*
  * Array of bus specific translators
@@ -121,6 +152,15 @@ static struct of_bus of_busses[] = {
 		.map = of_bus_default_map,
 		.get_flags = of_bus_sbus_get_flags,
 	},
+	/* AMBA */
+	{
+		.name = "ambapp",
+		.addr_prop_name = "reg",
+		.match = of_bus_ambapp_match,
+		.count_cells = of_bus_ambapp_count_cells,
+		.map = of_bus_ambapp_map,
+		.get_flags = of_bus_ambapp_get_flags,
+	},
 	/* Default */
 	{
 		.name = "default",
diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c
index fe43e80772d..0a37e8cfd16 100644
--- a/arch/sparc/kernel/prom_32.c
+++ b/arch/sparc/kernel/prom_32.c
@@ -24,6 +24,8 @@
 
 #include <asm/prom.h>
 #include <asm/oplib.h>
+#include <asm/leon.h>
+#include <asm/leon_amba.h>
 
 #include "prom.h"
 
@@ -131,6 +133,35 @@ static void __init ebus_path_component(struct device_node *dp, char *tmp_buf)
 		regs->which_io, regs->phys_addr);
 }
 
+/* "name:vendor:device@irq,addrlo" */
+static void __init ambapp_path_component(struct device_node *dp, char *tmp_buf)
+{
+	struct amba_prom_registers *regs; unsigned int *intr;
+	unsigned int *device, *vendor;
+	struct property *prop;
+
+	prop = of_find_property(dp, "reg", NULL);
+	if (!prop)
+		return;
+	regs = prop->value;
+	prop = of_find_property(dp, "interrupts", NULL);
+	if (!prop)
+		return;
+	intr = prop->value;
+	prop = of_find_property(dp, "vendor", NULL);
+	if (!prop)
+		return;
+	vendor = prop->value;
+	prop = of_find_property(dp, "device", NULL);
+	if (!prop)
+		return;
+	device = prop->value;
+
+	sprintf(tmp_buf, "%s:%d:%d@%x,%x",
+		dp->name, *vendor, *device,
+		*intr, regs->phys_addr);
+}
+
 static void __init __build_path_component(struct device_node *dp, char *tmp_buf)
 {
 	struct device_node *parent = dp->parent;
@@ -143,6 +174,8 @@ static void __init __build_path_component(struct device_node *dp, char *tmp_buf)
 			return sbus_path_component(dp, tmp_buf);
 		if (!strcmp(parent->type, "ebus"))
 			return ebus_path_component(dp, tmp_buf);
+		if (!strcmp(parent->type, "ambapp"))
+			return ambapp_path_component(dp, tmp_buf);
 
 		/* "isa" is handled with platform naming */
 	}
diff --git a/arch/sparc/kernel/prom_common.c b/arch/sparc/kernel/prom_common.c
index 0fb5789d43c..138910c6720 100644
--- a/arch/sparc/kernel/prom_common.c
+++ b/arch/sparc/kernel/prom_common.c
@@ -22,9 +22,12 @@
 #include <linux/of.h>
 #include <asm/prom.h>
 #include <asm/oplib.h>
+#include <asm/leon.h>
 
 #include "prom.h"
 
+void (*prom_build_more)(struct device_node *dp, struct device_node ***nextp);
+
 struct device_node *of_console_device;
 EXPORT_SYMBOL(of_console_device);
 
@@ -161,7 +164,7 @@ static struct property * __init build_one_prop(phandle node, char *prev,
 			name = prom_nextprop(node, prev, p->name);
 		}
 
-		if (strlen(name) == 0) {
+		if (!name || strlen(name) == 0) {
 			tmp = p;
 			return NULL;
 		}
@@ -242,7 +245,7 @@ static struct device_node * __init prom_create_node(phandle node,
 	return dp;
 }
 
-static char * __init build_full_name(struct device_node *dp)
+char * __init build_full_name(struct device_node *dp)
 {
 	int len, ourlen, plen;
 	char *n;
@@ -289,6 +292,9 @@ static struct device_node * __init prom_build_tree(struct device_node *parent,
 
 		dp->child = prom_build_tree(dp, prom_getchild(node), nextp);
 
+		if (prom_build_more)
+			prom_build_more(dp, nextp);
+
 		node = prom_getsibling(node);
 	}
 
-- 
cgit v1.2.3


From 75d9e34698540e96b422293e1d76ab02cc7faefb Mon Sep 17 00:00:00 2001
From: Konrad Eisele <konrad@gaisler.com>
Date: Mon, 17 Aug 2009 00:13:33 +0000
Subject: sparc, leon: sparc-leon specific SRMMU initialization and bootup
 fixes.

The sparc-leon caches are virtually tagged so a flush is needed on ctx
switch.

Signed-off-by: Konrad Eisele <konrad@gaisler.com>
Reviewed-by:   Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/mm/srmmu.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 52 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index ade4eb373bd..509b1ffeba6 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -46,6 +46,7 @@
 #include <asm/tsunami.h>
 #include <asm/swift.h>
 #include <asm/turbosparc.h>
+#include <asm/leon.h>
 
 #include <asm/btfixup.h>
 
@@ -569,6 +570,9 @@ static void srmmu_switch_mm(struct mm_struct *old_mm, struct mm_struct *mm,
 		srmmu_ctxd_set(&srmmu_context_table[mm->context], mm->pgd);
 	}
 
+	if (sparc_cpu_model == sparc_leon)
+		leon_switch_mm();
+
 	if (is_hypersparc)
 		hyper_flush_whole_icache();
 
@@ -1977,6 +1981,45 @@ static void __init init_viking(void)
 	poke_srmmu = poke_viking;
 }
 
+#ifdef CONFIG_SPARC_LEON
+
+void __init poke_leonsparc(void)
+{
+}
+
+void __init init_leon(void)
+{
+
+	srmmu_name = "Leon";
+
+	BTFIXUPSET_CALL(flush_cache_all, leon_flush_cache_all,
+			BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(flush_cache_mm, leon_flush_cache_all,
+			BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(flush_cache_page, leon_flush_pcache_all,
+			BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(flush_cache_range, leon_flush_cache_all,
+			BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(flush_page_for_dma, leon_flush_dcache_all,
+			BTFIXUPCALL_NORM);
+
+	BTFIXUPSET_CALL(flush_tlb_all, leon_flush_tlb_all, BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(flush_tlb_mm, leon_flush_tlb_all, BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(flush_tlb_page, leon_flush_tlb_all, BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(flush_tlb_range, leon_flush_tlb_all, BTFIXUPCALL_NORM);
+
+	BTFIXUPSET_CALL(__flush_page_to_ram, leon_flush_cache_all,
+			BTFIXUPCALL_NOP);
+	BTFIXUPSET_CALL(flush_sig_insns, leon_flush_cache_all, BTFIXUPCALL_NOP);
+
+	poke_srmmu = poke_leonsparc;
+
+	srmmu_cache_pagetables = 0;
+
+	leon_flush_during_switch = leon_flush_needed();
+}
+#endif
+
 /* Probe for the srmmu chip version. */
 static void __init get_srmmu_type(void)
 {
@@ -1992,7 +2035,15 @@ static void __init get_srmmu_type(void)
 	psr_typ = (psr >> 28) & 0xf;
 	psr_vers = (psr >> 24) & 0xf;
 
-	/* First, check for HyperSparc or Cypress. */
+	/* First, check for sparc-leon. */
+	if (sparc_cpu_model == sparc_leon) {
+		psr_typ = 0xf;	/* hardcoded ids for older models/simulators */
+		psr_vers = 2;
+		init_leon();
+		return;
+	}
+
+	/* Second, check for HyperSparc or Cypress. */
 	if(mod_typ == 1) {
 		switch(mod_rev) {
 		case 7:
-- 
cgit v1.2.3


From 1660e9d3d04b6c636b7171bf6c08ac7b82a7de79 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 17 Aug 2009 14:36:32 +1000
Subject: powerpc/32: Always order writes to halves of 64-bit PTEs

On 32-bit systems with 64-bit PTEs, the PTEs have to be written in two
32-bit halves.  On SMP we write the higher-order half and then the
lower-order half, with a write barrier between the two halves, but on
UP there was no particular ordering of the writes to the two halves.

This extends the ordering that we already do on SMP to the UP case as
well.  The reason is that with the perf_counter subsystem potentially
accessing user memory at interrupt time to get stack traces, we have
to be careful not to create an incorrect but apparently valid PTE even
on UP.

Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/include/asm/pgtable.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index eb17da78112..2a5da069714 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -104,8 +104,8 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
 	else
 		pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte));
 
-#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) && defined(CONFIG_SMP)
-	/* Second case is 32-bit with 64-bit PTE in SMP mode. In this case, we
+#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
+	/* Second case is 32-bit with 64-bit PTE.  In this case, we
 	 * can just store as long as we do the two halves in the right order
 	 * with a barrier in between. This is possible because we take care,
 	 * in the hash code, to pre-invalidate if the PTE was already hashed,
@@ -140,7 +140,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
 
 #else
 	/* Anything else just stores the PTE normally. That covers all 64-bit
-	 * cases, and 32-bit non-hash with 64-bit PTEs in UP mode
+	 * cases, and 32-bit non-hash with 32-bit PTEs.
 	 */
 	*ptep = pte;
 #endif
-- 
cgit v1.2.3


From 9c1e105238c474d19905af504f2e7f42d4f71f9e Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 17 Aug 2009 15:17:54 +1000
Subject: powerpc: Allow perf_counters to access user memory at interrupt time

This provides a mechanism to allow the perf_counters code to access
user memory in a PMU interrupt routine.  Such an access can cause
various kinds of interrupt: SLB miss, MMU hash table miss, segment
table miss, or TLB miss, depending on the processor.  This commit
only deals with 64-bit classic/server processors, which use an MMU
hash table.  32-bit processors are already able to access user memory
at interrupt time.  Since we don't soft-disable on 32-bit, we avoid
the possibility of reentering hash_page or the TLB miss handlers,
since they run with interrupts disabled.

On 64-bit processors, an SLB miss interrupt on a user address will
update the slb_cache and slb_cache_ptr fields in the paca.  This is
OK except in the case where a PMU interrupt occurs in switch_slb,
which also accesses those fields.  To prevent this, we hard-disable
interrupts in switch_slb.  Interrupts are already soft-disabled at
this point, and will get hard-enabled when they get soft-enabled
later.

This also reworks slb_flush_and_rebolt: to avoid hard-disabling twice,
and to make sure that it clears the slb_cache_ptr when called from
other callers than switch_slb, the existing routine is renamed to
__slb_flush_and_rebolt, which is called by switch_slb and the new
version of slb_flush_and_rebolt.

Similarly, switch_stab (used on POWER3 and RS64 processors) gets a
hard_irq_disable() to protect the per-cpu variables used there and
in ste_allocate.

If a MMU hashtable miss interrupt occurs, normally we would call
hash_page to look up the Linux PTE for the address and create a HPTE.
However, hash_page is fairly complex and takes some locks, so to
avoid the possibility of deadlock, we check the preemption count
to see if we are in a (pseudo-)NMI handler, and if so, we don't call
hash_page but instead treat it like a bad access that will get
reported up through the exception table mechanism.  An interrupt
whose handler runs even though the interrupt occurred when
soft-disabled (such as the PMU interrupt) is considered a pseudo-NMI
handler, which should use nmi_enter()/nmi_exit() rather than
irq_enter()/irq_exit().

Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/asm-offsets.c    |  2 ++
 arch/powerpc/kernel/exceptions-64s.S | 19 ++++++++++++++++++
 arch/powerpc/mm/slb.c                | 37 +++++++++++++++++++++++++-----------
 arch/powerpc/mm/stab.c               | 11 ++++++++++-
 4 files changed, 57 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 561b6465231..197b15646ee 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -67,6 +67,8 @@ int main(void)
 	DEFINE(MMCONTEXTID, offsetof(struct mm_struct, context.id));
 #ifdef CONFIG_PPC64
 	DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context));
+	DEFINE(SIGSEGV, SIGSEGV);
+	DEFINE(NMI_MASK, NMI_MASK);
 #else
 	DEFINE(THREAD_INFO, offsetof(struct task_struct, stack));
 #endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index eb898112e57..8ac85e08ffa 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -729,6 +729,11 @@ BEGIN_FTR_SECTION
 	bne-	do_ste_alloc		/* If so handle it */
 END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
 
+	clrrdi	r11,r1,THREAD_SHIFT
+	lwz	r0,TI_PREEMPT(r11)	/* If we're in an "NMI" */
+	andis.	r0,r0,NMI_MASK@h	/* (i.e. an irq when soft-disabled) */
+	bne	77f			/* then don't call hash_page now */
+
 	/*
 	 * On iSeries, we soft-disable interrupts here, then
 	 * hard-enable interrupts so that the hash_page code can spin on
@@ -833,6 +838,20 @@ handle_page_fault:
 	bl	.low_hash_fault
 	b	.ret_from_except
 
+/*
+ * We come here as a result of a DSI at a point where we don't want
+ * to call hash_page, such as when we are accessing memory (possibly
+ * user memory) inside a PMU interrupt that occurred while interrupts
+ * were soft-disabled.  We want to invoke the exception handler for
+ * the access, or panic if there isn't a handler.
+ */
+77:	bl	.save_nvgprs
+	mr	r4,r3
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	li	r5,SIGSEGV
+	bl	.bad_page_fault
+	b	.ret_from_except
+
 	/* here we have a segment miss */
 do_ste_alloc:
 	bl	.ste_allocate		/* try to insert stab entry */
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 5b7038f248b..a685652effe 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -92,15 +92,13 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize,
 		     : "memory" );
 }
 
-void slb_flush_and_rebolt(void)
+static void __slb_flush_and_rebolt(void)
 {
 	/* If you change this make sure you change SLB_NUM_BOLTED
 	 * appropriately too. */
 	unsigned long linear_llp, vmalloc_llp, lflags, vflags;
 	unsigned long ksp_esid_data, ksp_vsid_data;
 
-	WARN_ON(!irqs_disabled());
-
 	linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
 	vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp;
 	lflags = SLB_VSID_KERNEL | linear_llp;
@@ -117,12 +115,6 @@ void slb_flush_and_rebolt(void)
 		ksp_vsid_data = get_slb_shadow()->save_area[2].vsid;
 	}
 
-	/*
-	 * We can't take a PMU exception in the following code, so hard
-	 * disable interrupts.
-	 */
-	hard_irq_disable();
-
 	/* We need to do this all in asm, so we're sure we don't touch
 	 * the stack between the slbia and rebolting it. */
 	asm volatile("isync\n"
@@ -139,6 +131,21 @@ void slb_flush_and_rebolt(void)
 		     : "memory");
 }
 
+void slb_flush_and_rebolt(void)
+{
+
+	WARN_ON(!irqs_disabled());
+
+	/*
+	 * We can't take a PMU exception in the following code, so hard
+	 * disable interrupts.
+	 */
+	hard_irq_disable();
+
+	__slb_flush_and_rebolt();
+	get_paca()->slb_cache_ptr = 0;
+}
+
 void slb_vmalloc_update(void)
 {
 	unsigned long vflags;
@@ -180,12 +187,20 @@ static inline int esids_match(unsigned long addr1, unsigned long addr2)
 /* Flush all user entries from the segment table of the current processor. */
 void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 {
-	unsigned long offset = get_paca()->slb_cache_ptr;
+	unsigned long offset;
 	unsigned long slbie_data = 0;
 	unsigned long pc = KSTK_EIP(tsk);
 	unsigned long stack = KSTK_ESP(tsk);
 	unsigned long unmapped_base;
 
+	/*
+	 * We need interrupts hard-disabled here, not just soft-disabled,
+	 * so that a PMU interrupt can't occur, which might try to access
+	 * user memory (to get a stack trace) and possible cause an SLB miss
+	 * which would update the slb_cache/slb_cache_ptr fields in the PACA.
+	 */
+	hard_irq_disable();
+	offset = get_paca()->slb_cache_ptr;
 	if (!cpu_has_feature(CPU_FTR_NO_SLBIE_B) &&
 	    offset <= SLB_CACHE_ENTRIES) {
 		int i;
@@ -200,7 +215,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 		}
 		asm volatile("isync" : : : "memory");
 	} else {
-		slb_flush_and_rebolt();
+		__slb_flush_and_rebolt();
 	}
 
 	/* Workaround POWER5 < DD2.1 issue */
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
index 98cd1dc2ae7..ab5fb48b3e9 100644
--- a/arch/powerpc/mm/stab.c
+++ b/arch/powerpc/mm/stab.c
@@ -164,7 +164,7 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
 {
 	struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr;
 	struct stab_entry *ste;
-	unsigned long offset = __get_cpu_var(stab_cache_ptr);
+	unsigned long offset;
 	unsigned long pc = KSTK_EIP(tsk);
 	unsigned long stack = KSTK_ESP(tsk);
 	unsigned long unmapped_base;
@@ -172,6 +172,15 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
 	/* Force previous translations to complete. DRENG */
 	asm volatile("isync" : : : "memory");
 
+	/*
+	 * We need interrupts hard-disabled here, not just soft-disabled,
+	 * so that a PMU interrupt can't occur, which might try to access
+	 * user memory (to get a stack trace) and possible cause an STAB miss
+	 * which would update the stab_cache/stab_cache_ptr per-cpu variables.
+	 */
+	hard_irq_disable();
+
+	offset = __get_cpu_var(stab_cache_ptr);
 	if (offset <= NR_STAB_CACHE_ENTRIES) {
 		int i;
 
-- 
cgit v1.2.3


From 20002ded4d937ca87aca6253b874920a96a763c4 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 18 Aug 2009 08:25:32 +1000
Subject: perf_counter: powerpc: Add callchain support

This adds support for tracing callchains for powerpc, both 32-bit
and 64-bit, and both in the kernel and userspace, from PMU interrupt
context.

The first three entries stored for each callchain are the NIP (next
instruction pointer), LR (link register), and the contents of the LR
save area in the second stack frame (the first is ignored because the
ABI convention on powerpc is that functions save their return address
in their caller's stack frame).  Because leaf functions don't have to
save their return address (LR value) and don't have to establish a
stack frame, it's possible for either or both of LR and the second
stack frame's LR save area to have valid return addresses in them.
This is basically impossible to disambiguate without either reading
the code or looking at auxiliary information such as CFI tables.
Since we don't want to do either of those things at interrupt time,
we store both LR and the second stack frame's LR save area.

Once we get past the second stack frame, there is no ambiguity; all
return addresses we get are reliable.

For kernel traces, we check whether they are valid kernel instruction
addresses and store zero instead if they are not (rather than
omitting them, which would make it impossible for userspace to know
which was which).  We also store zero instead of the second stack
frame's LR save area value if it is the same as LR.

For kernel traces, we check for interrupt frames, and for user traces,
we check for signal frames.  In each case, since we're starting a new
trace, we store a PERF_CONTEXT_KERNEL/USER marker so that userspace
knows that the next three entries are NIP, LR and the second stack frame
for the interrupted context.

We read user memory with __get_user_inatomic.  On 64-bit, if this
PMU interrupt occurred while interrupts are soft-disabled, and
there is no MMU hash table entry for the page, we will get an
-EFAULT return from __get_user_inatomic even if there is a valid
Linux PTE for the page, since hash_page isn't reentrant.  Thus we
have code here to read the Linux PTE and access the page via the
kernel linear mapping.  Since 64-bit doesn't use (or need) highmem
there is no need to do kmap_atomic.  On 32-bit, we don't do soft
interrupt disabling, so this complication doesn't occur and there
is no need to fall back to reading the Linux PTE, since hash_page
(or the TLB miss handler) will get called automatically if necessary.

Note that we cannot get PMU interrupts in the interval during
context switch between switch_mm (which switches the user address
space) and switch_to (which actually changes current to the new
process).  On 64-bit this is because interrupts are hard-disabled
in switch_mm and stay hard-disabled until they are soft-enabled
later, after switch_to has returned.  So there is no possibility
of trying to do a user stack trace when the user address space is
not current's address space.

Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/Makefile         |   2 +-
 arch/powerpc/kernel/perf_callchain.c | 527 +++++++++++++++++++++++++++++++++++
 2 files changed, 528 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/kernel/perf_callchain.c

(limited to 'arch')

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index b73396b9390..9619285f64e 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -97,7 +97,7 @@ obj64-$(CONFIG_AUDIT)		+= compat_audit.o
 
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o
-obj-$(CONFIG_PPC_PERF_CTRS)	+= perf_counter.o
+obj-$(CONFIG_PPC_PERF_CTRS)	+= perf_counter.o perf_callchain.o
 obj64-$(CONFIG_PPC_PERF_CTRS)	+= power4-pmu.o ppc970-pmu.o power5-pmu.o \
 				   power5+-pmu.o power6-pmu.o power7-pmu.o
 obj32-$(CONFIG_PPC_PERF_CTRS)	+= mpc7450-pmu.o
diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c
new file mode 100644
index 00000000000..f74b62c6751
--- /dev/null
+++ b/arch/powerpc/kernel/perf_callchain.c
@@ -0,0 +1,527 @@
+/*
+ * Performance counter callchain support - powerpc architecture code
+ *
+ * Copyright © 2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_counter.h>
+#include <linux/percpu.h>
+#include <linux/uaccess.h>
+#include <linux/mm.h>
+#include <asm/ptrace.h>
+#include <asm/pgtable.h>
+#include <asm/sigcontext.h>
+#include <asm/ucontext.h>
+#include <asm/vdso.h>
+#ifdef CONFIG_PPC64
+#include "ppc32.h"
+#endif
+
+/*
+ * Store another value in a callchain_entry.
+ */
+static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
+{
+	unsigned int nr = entry->nr;
+
+	if (nr < PERF_MAX_STACK_DEPTH) {
+		entry->ip[nr] = ip;
+		entry->nr = nr + 1;
+	}
+}
+
+/*
+ * Is sp valid as the address of the next kernel stack frame after prev_sp?
+ * The next frame may be in a different stack area but should not go
+ * back down in the same stack area.
+ */
+static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
+{
+	if (sp & 0xf)
+		return 0;		/* must be 16-byte aligned */
+	if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
+		return 0;
+	if (sp >= prev_sp + STACK_FRAME_OVERHEAD)
+		return 1;
+	/*
+	 * sp could decrease when we jump off an interrupt stack
+	 * back to the regular process stack.
+	 */
+	if ((sp & ~(THREAD_SIZE - 1)) != (prev_sp & ~(THREAD_SIZE - 1)))
+		return 1;
+	return 0;
+}
+
+static void perf_callchain_kernel(struct pt_regs *regs,
+				  struct perf_callchain_entry *entry)
+{
+	unsigned long sp, next_sp;
+	unsigned long next_ip;
+	unsigned long lr;
+	long level = 0;
+	unsigned long *fp;
+
+	lr = regs->link;
+	sp = regs->gpr[1];
+	callchain_store(entry, PERF_CONTEXT_KERNEL);
+	callchain_store(entry, regs->nip);
+
+	if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
+		return;
+
+	for (;;) {
+		fp = (unsigned long *) sp;
+		next_sp = fp[0];
+
+		if (next_sp == sp + STACK_INT_FRAME_SIZE &&
+		    fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
+			/*
+			 * This looks like an interrupt frame for an
+			 * interrupt that occurred in the kernel
+			 */
+			regs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD);
+			next_ip = regs->nip;
+			lr = regs->link;
+			level = 0;
+			callchain_store(entry, PERF_CONTEXT_KERNEL);
+
+		} else {
+			if (level == 0)
+				next_ip = lr;
+			else
+				next_ip = fp[STACK_FRAME_LR_SAVE];
+
+			/*
+			 * We can't tell which of the first two addresses
+			 * we get are valid, but we can filter out the
+			 * obviously bogus ones here.  We replace them
+			 * with 0 rather than removing them entirely so
+			 * that userspace can tell which is which.
+			 */
+			if ((level == 1 && next_ip == lr) ||
+			    (level <= 1 && !kernel_text_address(next_ip)))
+				next_ip = 0;
+
+			++level;
+		}
+
+		callchain_store(entry, next_ip);
+		if (!valid_next_sp(next_sp, sp))
+			return;
+		sp = next_sp;
+	}
+}
+
+#ifdef CONFIG_PPC64
+
+#ifdef CONFIG_HUGETLB_PAGE
+#define is_huge_psize(pagesize)	(HPAGE_SHIFT && mmu_huge_psizes[pagesize])
+#else
+#define is_huge_psize(pagesize)	0
+#endif
+
+/*
+ * On 64-bit we don't want to invoke hash_page on user addresses from
+ * interrupt context, so if the access faults, we read the page tables
+ * to find which page (if any) is mapped and access it directly.
+ */
+static int read_user_stack_slow(void __user *ptr, void *ret, int nb)
+{
+	pgd_t *pgdir;
+	pte_t *ptep, pte;
+	int pagesize;
+	unsigned long addr = (unsigned long) ptr;
+	unsigned long offset;
+	unsigned long pfn;
+	void *kaddr;
+
+	pgdir = current->mm->pgd;
+	if (!pgdir)
+		return -EFAULT;
+
+	pagesize = get_slice_psize(current->mm, addr);
+
+	/* align address to page boundary */
+	offset = addr & ((1ul << mmu_psize_defs[pagesize].shift) - 1);
+	addr -= offset;
+
+	if (is_huge_psize(pagesize))
+		ptep = huge_pte_offset(current->mm, addr);
+	else
+		ptep = find_linux_pte(pgdir, addr);
+
+	if (ptep == NULL)
+		return -EFAULT;
+	pte = *ptep;
+	if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER))
+		return -EFAULT;
+	pfn = pte_pfn(pte);
+	if (!page_is_ram(pfn))
+		return -EFAULT;
+
+	/* no highmem to worry about here */
+	kaddr = pfn_to_kaddr(pfn);
+	memcpy(ret, kaddr + offset, nb);
+	return 0;
+}
+
+static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret)
+{
+	if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) ||
+	    ((unsigned long)ptr & 7))
+		return -EFAULT;
+
+	if (!__get_user_inatomic(*ret, ptr))
+		return 0;
+
+	return read_user_stack_slow(ptr, ret, 8);
+}
+
+static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
+{
+	if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
+	    ((unsigned long)ptr & 3))
+		return -EFAULT;
+
+	if (!__get_user_inatomic(*ret, ptr))
+		return 0;
+
+	return read_user_stack_slow(ptr, ret, 4);
+}
+
+static inline int valid_user_sp(unsigned long sp, int is_64)
+{
+	if (!sp || (sp & 7) || sp > (is_64 ? TASK_SIZE : 0x100000000UL) - 32)
+		return 0;
+	return 1;
+}
+
+/*
+ * 64-bit user processes use the same stack frame for RT and non-RT signals.
+ */
+struct signal_frame_64 {
+	char		dummy[__SIGNAL_FRAMESIZE];
+	struct ucontext	uc;
+	unsigned long	unused[2];
+	unsigned int	tramp[6];
+	struct siginfo	*pinfo;
+	void		*puc;
+	struct siginfo	info;
+	char		abigap[288];
+};
+
+static int is_sigreturn_64_address(unsigned long nip, unsigned long fp)
+{
+	if (nip == fp + offsetof(struct signal_frame_64, tramp))
+		return 1;
+	if (vdso64_rt_sigtramp && current->mm->context.vdso_base &&
+	    nip == current->mm->context.vdso_base + vdso64_rt_sigtramp)
+		return 1;
+	return 0;
+}
+
+/*
+ * Do some sanity checking on the signal frame pointed to by sp.
+ * We check the pinfo and puc pointers in the frame.
+ */
+static int sane_signal_64_frame(unsigned long sp)
+{
+	struct signal_frame_64 __user *sf;
+	unsigned long pinfo, puc;
+
+	sf = (struct signal_frame_64 __user *) sp;
+	if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) ||
+	    read_user_stack_64((unsigned long __user *) &sf->puc, &puc))
+		return 0;
+	return pinfo == (unsigned long) &sf->info &&
+		puc == (unsigned long) &sf->uc;
+}
+
+static void perf_callchain_user_64(struct pt_regs *regs,
+				   struct perf_callchain_entry *entry)
+{
+	unsigned long sp, next_sp;
+	unsigned long next_ip;
+	unsigned long lr;
+	long level = 0;
+	struct signal_frame_64 __user *sigframe;
+	unsigned long __user *fp, *uregs;
+
+	next_ip = regs->nip;
+	lr = regs->link;
+	sp = regs->gpr[1];
+	callchain_store(entry, PERF_CONTEXT_USER);
+	callchain_store(entry, next_ip);
+
+	for (;;) {
+		fp = (unsigned long __user *) sp;
+		if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp))
+			return;
+		if (level > 0 && read_user_stack_64(&fp[2], &next_ip))
+			return;
+
+		/*
+		 * Note: the next_sp - sp >= signal frame size check
+		 * is true when next_sp < sp, which can happen when
+		 * transitioning from an alternate signal stack to the
+		 * normal stack.
+		 */
+		if (next_sp - sp >= sizeof(struct signal_frame_64) &&
+		    (is_sigreturn_64_address(next_ip, sp) ||
+		     (level <= 1 && is_sigreturn_64_address(lr, sp))) &&
+		    sane_signal_64_frame(sp)) {
+			/*
+			 * This looks like an signal frame
+			 */
+			sigframe = (struct signal_frame_64 __user *) sp;
+			uregs = sigframe->uc.uc_mcontext.gp_regs;
+			if (read_user_stack_64(&uregs[PT_NIP], &next_ip) ||
+			    read_user_stack_64(&uregs[PT_LNK], &lr) ||
+			    read_user_stack_64(&uregs[PT_R1], &sp))
+				return;
+			level = 0;
+			callchain_store(entry, PERF_CONTEXT_USER);
+			callchain_store(entry, next_ip);
+			continue;
+		}
+
+		if (level == 0)
+			next_ip = lr;
+		callchain_store(entry, next_ip);
+		++level;
+		sp = next_sp;
+	}
+}
+
+static inline int current_is_64bit(void)
+{
+	/*
+	 * We can't use test_thread_flag() here because we may be on an
+	 * interrupt stack, and the thread flags don't get copied over
+	 * from the thread_info on the main stack to the interrupt stack.
+	 */
+	return !test_ti_thread_flag(task_thread_info(current), TIF_32BIT);
+}
+
+#else  /* CONFIG_PPC64 */
+/*
+ * On 32-bit we just access the address and let hash_page create a
+ * HPTE if necessary, so there is no need to fall back to reading
+ * the page tables.  Since this is called at interrupt level,
+ * do_page_fault() won't treat a DSI as a page fault.
+ */
+static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
+{
+	if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
+	    ((unsigned long)ptr & 3))
+		return -EFAULT;
+
+	return __get_user_inatomic(*ret, ptr);
+}
+
+static inline void perf_callchain_user_64(struct pt_regs *regs,
+					  struct perf_callchain_entry *entry)
+{
+}
+
+static inline int current_is_64bit(void)
+{
+	return 0;
+}
+
+static inline int valid_user_sp(unsigned long sp, int is_64)
+{
+	if (!sp || (sp & 7) || sp > TASK_SIZE - 32)
+		return 0;
+	return 1;
+}
+
+#define __SIGNAL_FRAMESIZE32	__SIGNAL_FRAMESIZE
+#define sigcontext32		sigcontext
+#define mcontext32		mcontext
+#define ucontext32		ucontext
+#define compat_siginfo_t	struct siginfo
+
+#endif /* CONFIG_PPC64 */
+
+/*
+ * Layout for non-RT signal frames
+ */
+struct signal_frame_32 {
+	char			dummy[__SIGNAL_FRAMESIZE32];
+	struct sigcontext32	sctx;
+	struct mcontext32	mctx;
+	int			abigap[56];
+};
+
+/*
+ * Layout for RT signal frames
+ */
+struct rt_signal_frame_32 {
+	char			dummy[__SIGNAL_FRAMESIZE32 + 16];
+	compat_siginfo_t	info;
+	struct ucontext32	uc;
+	int			abigap[56];
+};
+
+static int is_sigreturn_32_address(unsigned int nip, unsigned int fp)
+{
+	if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad))
+		return 1;
+	if (vdso32_sigtramp && current->mm->context.vdso_base &&
+	    nip == current->mm->context.vdso_base + vdso32_sigtramp)
+		return 1;
+	return 0;
+}
+
+static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp)
+{
+	if (nip == fp + offsetof(struct rt_signal_frame_32,
+				 uc.uc_mcontext.mc_pad))
+		return 1;
+	if (vdso32_rt_sigtramp && current->mm->context.vdso_base &&
+	    nip == current->mm->context.vdso_base + vdso32_rt_sigtramp)
+		return 1;
+	return 0;
+}
+
+static int sane_signal_32_frame(unsigned int sp)
+{
+	struct signal_frame_32 __user *sf;
+	unsigned int regs;
+
+	sf = (struct signal_frame_32 __user *) (unsigned long) sp;
+	if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, &regs))
+		return 0;
+	return regs == (unsigned long) &sf->mctx;
+}
+
+static int sane_rt_signal_32_frame(unsigned int sp)
+{
+	struct rt_signal_frame_32 __user *sf;
+	unsigned int regs;
+
+	sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
+	if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, &regs))
+		return 0;
+	return regs == (unsigned long) &sf->uc.uc_mcontext;
+}
+
+static unsigned int __user *signal_frame_32_regs(unsigned int sp,
+				unsigned int next_sp, unsigned int next_ip)
+{
+	struct mcontext32 __user *mctx = NULL;
+	struct signal_frame_32 __user *sf;
+	struct rt_signal_frame_32 __user *rt_sf;
+
+	/*
+	 * Note: the next_sp - sp >= signal frame size check
+	 * is true when next_sp < sp, for example, when
+	 * transitioning from an alternate signal stack to the
+	 * normal stack.
+	 */
+	if (next_sp - sp >= sizeof(struct signal_frame_32) &&
+	    is_sigreturn_32_address(next_ip, sp) &&
+	    sane_signal_32_frame(sp)) {
+		sf = (struct signal_frame_32 __user *) (unsigned long) sp;
+		mctx = &sf->mctx;
+	}
+
+	if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) &&
+	    is_rt_sigreturn_32_address(next_ip, sp) &&
+	    sane_rt_signal_32_frame(sp)) {
+		rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
+		mctx = &rt_sf->uc.uc_mcontext;
+	}
+
+	if (!mctx)
+		return NULL;
+	return mctx->mc_gregs;
+}
+
+static void perf_callchain_user_32(struct pt_regs *regs,
+				   struct perf_callchain_entry *entry)
+{
+	unsigned int sp, next_sp;
+	unsigned int next_ip;
+	unsigned int lr;
+	long level = 0;
+	unsigned int __user *fp, *uregs;
+
+	next_ip = regs->nip;
+	lr = regs->link;
+	sp = regs->gpr[1];
+	callchain_store(entry, PERF_CONTEXT_USER);
+	callchain_store(entry, next_ip);
+
+	while (entry->nr < PERF_MAX_STACK_DEPTH) {
+		fp = (unsigned int __user *) (unsigned long) sp;
+		if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp))
+			return;
+		if (level > 0 && read_user_stack_32(&fp[1], &next_ip))
+			return;
+
+		uregs = signal_frame_32_regs(sp, next_sp, next_ip);
+		if (!uregs && level <= 1)
+			uregs = signal_frame_32_regs(sp, next_sp, lr);
+		if (uregs) {
+			/*
+			 * This looks like an signal frame, so restart
+			 * the stack trace with the values in it.
+			 */
+			if (read_user_stack_32(&uregs[PT_NIP], &next_ip) ||
+			    read_user_stack_32(&uregs[PT_LNK], &lr) ||
+			    read_user_stack_32(&uregs[PT_R1], &sp))
+				return;
+			level = 0;
+			callchain_store(entry, PERF_CONTEXT_USER);
+			callchain_store(entry, next_ip);
+			continue;
+		}
+
+		if (level == 0)
+			next_ip = lr;
+		callchain_store(entry, next_ip);
+		++level;
+		sp = next_sp;
+	}
+}
+
+/*
+ * Since we can't get PMU interrupts inside a PMU interrupt handler,
+ * we don't need separate irq and nmi entries here.
+ */
+static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
+
+struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+{
+	struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
+
+	entry->nr = 0;
+
+	if (current->pid == 0)		/* idle task? */
+		return entry;
+
+	if (!user_mode(regs)) {
+		perf_callchain_kernel(regs, entry);
+		if (current->mm)
+			regs = task_pt_regs(current);
+		else
+			regs = NULL;
+	}
+
+	if (regs) {
+		if (current_is_64bit())
+			perf_callchain_user_64(regs, entry);
+		else
+			perf_callchain_user_32(regs, entry);
+	}
+
+	return entry;
+}
-- 
cgit v1.2.3


From 0914b93f4f36d1a0233bd64da9a35e37e72304ec Mon Sep 17 00:00:00 2001
From: Joonyoung Shim <jy0922.shim@samsung.com>
Date: Tue, 18 Aug 2009 21:56:19 +0900
Subject: ASoC: Fix data format configuration for S3C64XX IISv2

The data format configuration for S3C64xx IISv2 was hardcoded for IISMOD
register. This patch changes to the defined values it.

And instead of bits 9 and 10 of IISMOD we should clear bits 13 and 14.

Signed-off-by: Joonyoung Shim <jy0922.shim@samsung.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 arch/arm/plat-s3c/include/plat/regs-s3c2412-iis.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/plat-s3c/include/plat/regs-s3c2412-iis.h b/arch/arm/plat-s3c/include/plat/regs-s3c2412-iis.h
index 0fad7571030..07659dad174 100644
--- a/arch/arm/plat-s3c/include/plat/regs-s3c2412-iis.h
+++ b/arch/arm/plat-s3c/include/plat/regs-s3c2412-iis.h
@@ -33,6 +33,11 @@
 #define S3C2412_IISCON_RXDMA_ACTIVE	(1 << 1)
 #define S3C2412_IISCON_IIS_ACTIVE	(1 << 0)
 
+#define S3C64XX_IISMOD_BLC_16BIT	(0 << 13)
+#define S3C64XX_IISMOD_BLC_8BIT		(1 << 13)
+#define S3C64XX_IISMOD_BLC_24BIT	(2 << 13)
+#define S3C64XX_IISMOD_BLC_MASK		(3 << 13)
+
 #define S3C64XX_IISMOD_IMS_PCLK		(0 << 10)
 #define S3C64XX_IISMOD_IMS_SYSMUX	(1 << 10)
 
-- 
cgit v1.2.3


From 5e9ad7df9fd056f1071af8aa91034a1c3170257d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 18 Aug 2009 10:41:57 +0200
Subject: [S390] ftrace: update system call tracer support

Commit fb34a08c3 ("tracing: Add trace events for each syscall
entry/exit") changed the lowlevel API to ftrace syscall tracing
but did not update s390 which started making use of it recently.

This broke the s390 build, as reported by Paul Mundt.

Update the callbacks with the syscall number and the syscall
return code values. This allows per syscall tracepoints,
syscall argument enumeration /debug/tracing/events/syscalls/
and perfcounters support and integration on s390 too.

Reported-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <tip-fb34a08c3469b2be9eae626ccb96476b4687b810@git.kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/s390/kernel/ptrace.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 43acd73105b..c5e87d891ca 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -51,6 +51,9 @@
 #include "compat_ptrace.h"
 #endif
 
+DEFINE_TRACE(syscall_enter);
+DEFINE_TRACE(syscall_exit);
+
 enum s390_regset {
 	REGSET_GENERAL,
 	REGSET_FP,
@@ -662,7 +665,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 	}
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
-		ftrace_syscall_enter(regs);
+		trace_syscall_enter(regs, regs->gprs[2]);
 
 	if (unlikely(current->audit_context))
 		audit_syscall_entry(is_compat_task() ?
@@ -680,7 +683,7 @@ asmlinkage void do_syscall_trace_exit(struct pt_regs *regs)
 				   regs->gprs[2]);
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
-		ftrace_syscall_exit(regs);
+		trace_syscall_exit(regs, regs->gprs[2]);
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
 		tracehook_report_syscall_exit(regs, 0);
-- 
cgit v1.2.3


From 9abea08e43c6cfc18399e42cbc6028d111532f61 Mon Sep 17 00:00:00 2001
From: Eero Nurkkala <ext-eero.nurkkala@nokia.com>
Date: Thu, 20 Aug 2009 16:18:07 +0300
Subject: OMAP: McBSP: Provide functions for ASoC frame syncronization

ASoC has an annoying bug letting either L or R channel to be
played on L channel. In other words, L and R channels can
switch at random. This provides McBSP funtionality that may
be used to fix this feature.

Signed-off-by: Eero Nurkkala <ext-eero.nurkkala@nokia.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 arch/arm/plat-omap/include/mach/mcbsp.h |  2 ++
 arch/arm/plat-omap/mcbsp.c              | 52 +++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/plat-omap/include/mach/mcbsp.h b/arch/arm/plat-omap/include/mach/mcbsp.h
index 57249bb1e9b..b85e6e600ed 100644
--- a/arch/arm/plat-omap/include/mach/mcbsp.h
+++ b/arch/arm/plat-omap/include/mach/mcbsp.h
@@ -389,6 +389,8 @@ int omap_mcbsp_request(unsigned int id);
 void omap_mcbsp_free(unsigned int id);
 void omap_mcbsp_start(unsigned int id, int tx, int rx);
 void omap_mcbsp_stop(unsigned int id, int tx, int rx);
+void omap_mcbsp_xmit_enable(unsigned int id, u8 enable);
+void omap_mcbsp_recv_enable(unsigned int id, u8 enable);
 void omap_mcbsp_xmit_word(unsigned int id, u32 word);
 u32 omap_mcbsp_recv_word(unsigned int id);
 
diff --git a/arch/arm/plat-omap/mcbsp.c b/arch/arm/plat-omap/mcbsp.c
index a3d2313460b..0aa2524186f 100644
--- a/arch/arm/plat-omap/mcbsp.c
+++ b/arch/arm/plat-omap/mcbsp.c
@@ -412,6 +412,58 @@ void omap_mcbsp_stop(unsigned int id, int tx, int rx)
 }
 EXPORT_SYMBOL(omap_mcbsp_stop);
 
+void omap_mcbsp_xmit_enable(unsigned int id, u8 enable)
+{
+	struct omap_mcbsp *mcbsp;
+	void __iomem *io_base;
+	u16 w;
+
+	if (!(cpu_is_omap2430() || cpu_is_omap34xx()))
+		return;
+
+	if (!omap_mcbsp_check_valid_id(id)) {
+		printk(KERN_ERR "%s: Invalid id (%d)\n", __func__, id + 1);
+		return;
+	}
+
+	mcbsp = id_to_mcbsp_ptr(id);
+	io_base = mcbsp->io_base;
+
+	w = OMAP_MCBSP_READ(io_base, XCCR);
+
+	if (enable)
+		OMAP_MCBSP_WRITE(io_base, XCCR, w & ~(XDISABLE));
+	else
+		OMAP_MCBSP_WRITE(io_base, XCCR, w | XDISABLE);
+}
+EXPORT_SYMBOL(omap_mcbsp_xmit_enable);
+
+void omap_mcbsp_recv_enable(unsigned int id, u8 enable)
+{
+	struct omap_mcbsp *mcbsp;
+	void __iomem *io_base;
+	u16 w;
+
+	if (!(cpu_is_omap2430() || cpu_is_omap34xx()))
+		return;
+
+	if (!omap_mcbsp_check_valid_id(id)) {
+		printk(KERN_ERR "%s: Invalid id (%d)\n", __func__, id + 1);
+		return;
+	}
+
+	mcbsp = id_to_mcbsp_ptr(id);
+	io_base = mcbsp->io_base;
+
+	w = OMAP_MCBSP_READ(io_base, RCCR);
+
+	if (enable)
+		OMAP_MCBSP_WRITE(io_base, RCCR, w & ~(RDISABLE));
+	else
+		OMAP_MCBSP_WRITE(io_base, RCCR, w | RDISABLE);
+}
+EXPORT_SYMBOL(omap_mcbsp_recv_enable);
+
 /* polled mcbsp i/o operations */
 int omap_mcbsp_pollwrite(unsigned int id, u16 buf)
 {
-- 
cgit v1.2.3


From 946a49a95dabc9dd10344ae9ab4db9f14c5ad502 Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <eduardo.valentin@nokia.com>
Date: Thu, 20 Aug 2009 16:18:08 +0300
Subject: OMAP: McBSP: Add IRQEN, IRQSTATUS, THRESHOLD2 and THRESHOLD1
 registers.

Adding McBSP register definition for IRQEN, IRQSTATUS, THRESHOLD2 and THRESHOLD1 registers.

Signed-off-by: Eduardo Valentin <eduardo.valentin@nokia.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 arch/arm/plat-omap/include/mach/mcbsp.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/plat-omap/include/mach/mcbsp.h b/arch/arm/plat-omap/include/mach/mcbsp.h
index b85e6e600ed..ceaf9eee61b 100644
--- a/arch/arm/plat-omap/include/mach/mcbsp.h
+++ b/arch/arm/plat-omap/include/mach/mcbsp.h
@@ -134,6 +134,10 @@
 #define OMAP_MCBSP_REG_XCERG	0x74
 #define OMAP_MCBSP_REG_XCERH	0x78
 #define OMAP_MCBSP_REG_SYSCON	0x8C
+#define OMAP_MCBSP_REG_THRSH2	0x90
+#define OMAP_MCBSP_REG_THRSH1	0x94
+#define OMAP_MCBSP_REG_IRQST	0xA0
+#define OMAP_MCBSP_REG_IRQEN	0xA4
 #define OMAP_MCBSP_REG_XCCR	0xAC
 #define OMAP_MCBSP_REG_RCCR	0xB0
 
-- 
cgit v1.2.3


From 44a6311c0a83f682bcf18fae389a1b270df29314 Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <eduardo.valentin@nokia.com>
Date: Thu, 20 Aug 2009 16:18:09 +0300
Subject: OMAP: McBSP: Use appropriate value for startup delay

Increasing startup delay value as worst case:
 CLKSRG*2 = 8000khz: (1/8000) * 2 * 2 usec

Although, 100us may give enough time for two CLKSRG,
due to some unknown PM related, clock gating etc. reason,
this patch increases it to 500us.

Signed-off-by: Eduardo Valentin <eduardo.valentin@nokia.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 arch/arm/plat-omap/mcbsp.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/plat-omap/mcbsp.c b/arch/arm/plat-omap/mcbsp.c
index 0aa2524186f..e9dd70320f7 100644
--- a/arch/arm/plat-omap/mcbsp.c
+++ b/arch/arm/plat-omap/mcbsp.c
@@ -365,7 +365,13 @@ void omap_mcbsp_start(unsigned int id, int tx, int rx)
 	w = OMAP_MCBSP_READ(io_base, SPCR1);
 	OMAP_MCBSP_WRITE(io_base, SPCR1, w | (rx & 1));
 
-	udelay(100);
+	/*
+	 * Worst case: CLKSRG*2 = 8000khz: (1/8000) * 2 * 2 usec
+	 * REVISIT: 100us may give enough time for two CLKSRG, however
+	 * due to some unknown PM related, clock gating etc. reason it
+	 * is now at 500us.
+	 */
+	udelay(500);
 
 	if (idle) {
 		/* Start frame sync */
-- 
cgit v1.2.3


From 7aa9ff56cae7a6a4fa2e1a503cc5f8bbd887d6e3 Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <eduardo.valentin@nokia.com>
Date: Thu, 20 Aug 2009 16:18:10 +0300
Subject: OMAP: McBSP: Add transmit/receive threshold handler

This patch adds a way to handle transmit/receive threshold.
It export to mcbsp users a callback registration procedure.

Signed-off-by: Eduardo Valentin <eduardo.valentin@nokia.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 arch/arm/plat-omap/include/mach/mcbsp.h |  9 ++++++
 arch/arm/plat-omap/mcbsp.c              | 50 +++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/plat-omap/include/mach/mcbsp.h b/arch/arm/plat-omap/include/mach/mcbsp.h
index ceaf9eee61b..2544aa5bccd 100644
--- a/arch/arm/plat-omap/include/mach/mcbsp.h
+++ b/arch/arm/plat-omap/include/mach/mcbsp.h
@@ -389,6 +389,15 @@ int omap_mcbsp_init(void);
 void omap_mcbsp_register_board_cfg(struct omap_mcbsp_platform_data *config,
 					int size);
 void omap_mcbsp_config(unsigned int id, const struct omap_mcbsp_reg_cfg * config);
+#ifdef CONFIG_ARCH_OMAP34XX
+void omap_mcbsp_set_tx_threshold(unsigned int id, u16 threshold);
+void omap_mcbsp_set_rx_threshold(unsigned int id, u16 threshold);
+#else
+static inline void omap_mcbsp_set_tx_threshold(unsigned int id, u16 threshold)
+{ }
+static inline void omap_mcbsp_set_rx_threshold(unsigned int id, u16 threshold)
+{ }
+#endif
 int omap_mcbsp_request(unsigned int id);
 void omap_mcbsp_free(unsigned int id);
 void omap_mcbsp_start(unsigned int id, int tx, int rx);
diff --git a/arch/arm/plat-omap/mcbsp.c b/arch/arm/plat-omap/mcbsp.c
index e9dd70320f7..081f2eace92 100644
--- a/arch/arm/plat-omap/mcbsp.c
+++ b/arch/arm/plat-omap/mcbsp.c
@@ -198,6 +198,56 @@ void omap_mcbsp_config(unsigned int id, const struct omap_mcbsp_reg_cfg *config)
 }
 EXPORT_SYMBOL(omap_mcbsp_config);
 
+#ifdef CONFIG_ARCH_OMAP34XX
+/*
+ * omap_mcbsp_set_tx_threshold configures how to deal
+ * with transmit threshold. the threshold value and handler can be
+ * configure in here.
+ */
+void omap_mcbsp_set_tx_threshold(unsigned int id, u16 threshold)
+{
+	struct omap_mcbsp *mcbsp;
+	void __iomem *io_base;
+
+	if (!cpu_is_omap34xx())
+		return;
+
+	if (!omap_mcbsp_check_valid_id(id)) {
+		printk(KERN_ERR "%s: Invalid id (%d)\n", __func__, id + 1);
+		return;
+	}
+	mcbsp = id_to_mcbsp_ptr(id);
+	io_base = mcbsp->io_base;
+
+	OMAP_MCBSP_WRITE(io_base, THRSH2, threshold);
+}
+EXPORT_SYMBOL(omap_mcbsp_set_tx_threshold);
+
+/*
+ * omap_mcbsp_set_rx_threshold configures how to deal
+ * with receive threshold. the threshold value and handler can be
+ * configure in here.
+ */
+void omap_mcbsp_set_rx_threshold(unsigned int id, u16 threshold)
+{
+	struct omap_mcbsp *mcbsp;
+	void __iomem *io_base;
+
+	if (!cpu_is_omap34xx())
+		return;
+
+	if (!omap_mcbsp_check_valid_id(id)) {
+		printk(KERN_ERR "%s: Invalid id (%d)\n", __func__, id + 1);
+		return;
+	}
+	mcbsp = id_to_mcbsp_ptr(id);
+	io_base = mcbsp->io_base;
+
+	OMAP_MCBSP_WRITE(io_base, THRSH1, threshold);
+}
+EXPORT_SYMBOL(omap_mcbsp_set_rx_threshold);
+#endif
+
 /*
  * We can choose between IRQ based or polled IO.
  * This needs to be called before omap_mcbsp_request().
-- 
cgit v1.2.3


From a1a56f5faa41327116bf960a8e79f21a8ea35dce Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <eduardo.valentin@nokia.com>
Date: Thu, 20 Aug 2009 16:18:11 +0300
Subject: OMAP: McBSP: Create and export max_(r|t)x_thres property

This patch export through sysfs two properties to configure
maximum threshold for transmission and reception on each
mcbsp instance. Also, it exports two helper functions to
allow mcbsp users to read this values.

Signed-off-by: Eduardo Valentin <eduardo.valentin@nokia.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 arch/arm/mach-omap2/mcbsp.c             |   5 ++
 arch/arm/plat-omap/include/mach/mcbsp.h |  11 +++
 arch/arm/plat-omap/mcbsp.c              | 122 ++++++++++++++++++++++++++++++++
 3 files changed, 138 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/mcbsp.c b/arch/arm/mach-omap2/mcbsp.c
index a5c0f0435cd..f837114d1d6 100644
--- a/arch/arm/mach-omap2/mcbsp.c
+++ b/arch/arm/mach-omap2/mcbsp.c
@@ -129,6 +129,7 @@ static struct omap_mcbsp_platform_data omap34xx_mcbsp_pdata[] = {
 		.rx_irq		= INT_24XX_MCBSP1_IRQ_RX,
 		.tx_irq		= INT_24XX_MCBSP1_IRQ_TX,
 		.ops		= &omap2_mcbsp_ops,
+		.buffer_size	= 0x7F,
 	},
 	{
 		.phys_base	= OMAP34XX_MCBSP2_BASE,
@@ -137,6 +138,7 @@ static struct omap_mcbsp_platform_data omap34xx_mcbsp_pdata[] = {
 		.rx_irq		= INT_24XX_MCBSP2_IRQ_RX,
 		.tx_irq		= INT_24XX_MCBSP2_IRQ_TX,
 		.ops		= &omap2_mcbsp_ops,
+		.buffer_size	= 0x3FF,
 	},
 	{
 		.phys_base	= OMAP34XX_MCBSP3_BASE,
@@ -145,6 +147,7 @@ static struct omap_mcbsp_platform_data omap34xx_mcbsp_pdata[] = {
 		.rx_irq		= INT_24XX_MCBSP3_IRQ_RX,
 		.tx_irq		= INT_24XX_MCBSP3_IRQ_TX,
 		.ops		= &omap2_mcbsp_ops,
+		.buffer_size	= 0x7F,
 	},
 	{
 		.phys_base	= OMAP34XX_MCBSP4_BASE,
@@ -153,6 +156,7 @@ static struct omap_mcbsp_platform_data omap34xx_mcbsp_pdata[] = {
 		.rx_irq		= INT_24XX_MCBSP4_IRQ_RX,
 		.tx_irq		= INT_24XX_MCBSP4_IRQ_TX,
 		.ops		= &omap2_mcbsp_ops,
+		.buffer_size	= 0x7F,
 	},
 	{
 		.phys_base	= OMAP34XX_MCBSP5_BASE,
@@ -161,6 +165,7 @@ static struct omap_mcbsp_platform_data omap34xx_mcbsp_pdata[] = {
 		.rx_irq		= INT_24XX_MCBSP5_IRQ_RX,
 		.tx_irq		= INT_24XX_MCBSP5_IRQ_TX,
 		.ops		= &omap2_mcbsp_ops,
+		.buffer_size	= 0x7F,
 	},
 };
 #define OMAP34XX_MCBSP_PDATA_SZ		ARRAY_SIZE(omap34xx_mcbsp_pdata)
diff --git a/arch/arm/plat-omap/include/mach/mcbsp.h b/arch/arm/plat-omap/include/mach/mcbsp.h
index 2544aa5bccd..832330d7cb3 100644
--- a/arch/arm/plat-omap/include/mach/mcbsp.h
+++ b/arch/arm/plat-omap/include/mach/mcbsp.h
@@ -348,6 +348,9 @@ struct omap_mcbsp_platform_data {
 	u8 dma_rx_sync, dma_tx_sync;
 	u16 rx_irq, tx_irq;
 	struct omap_mcbsp_ops *ops;
+#ifdef CONFIG_ARCH_OMAP34XX
+	u16 buffer_size;
+#endif
 };
 
 struct omap_mcbsp {
@@ -381,6 +384,10 @@ struct omap_mcbsp {
 	struct omap_mcbsp_platform_data *pdata;
 	struct clk *iclk;
 	struct clk *fclk;
+#ifdef CONFIG_ARCH_OMAP34XX
+	u16 max_tx_thres;
+	u16 max_rx_thres;
+#endif
 };
 extern struct omap_mcbsp **mcbsp_ptr;
 extern int omap_mcbsp_count;
@@ -392,11 +399,15 @@ void omap_mcbsp_config(unsigned int id, const struct omap_mcbsp_reg_cfg * config
 #ifdef CONFIG_ARCH_OMAP34XX
 void omap_mcbsp_set_tx_threshold(unsigned int id, u16 threshold);
 void omap_mcbsp_set_rx_threshold(unsigned int id, u16 threshold);
+u16 omap_mcbsp_get_max_tx_threshold(unsigned int id);
+u16 omap_mcbsp_get_max_rx_threshold(unsigned int id);
 #else
 static inline void omap_mcbsp_set_tx_threshold(unsigned int id, u16 threshold)
 { }
 static inline void omap_mcbsp_set_rx_threshold(unsigned int id, u16 threshold)
 { }
+static inline u16 omap_mcbsp_get_max_tx_threshold(unsigned int id) { return 0; }
+static inline u16 omap_mcbsp_get_max_rx_threshold(unsigned int id) { return 0; }
 #endif
 int omap_mcbsp_request(unsigned int id);
 void omap_mcbsp_free(unsigned int id);
diff --git a/arch/arm/plat-omap/mcbsp.c b/arch/arm/plat-omap/mcbsp.c
index 081f2eace92..0bbc912e548 100644
--- a/arch/arm/plat-omap/mcbsp.c
+++ b/arch/arm/plat-omap/mcbsp.c
@@ -246,6 +246,42 @@ void omap_mcbsp_set_rx_threshold(unsigned int id, u16 threshold)
 	OMAP_MCBSP_WRITE(io_base, THRSH1, threshold);
 }
 EXPORT_SYMBOL(omap_mcbsp_set_rx_threshold);
+
+/*
+ * omap_mcbsp_get_max_tx_thres just return the current configured
+ * maximum threshold for transmission
+ */
+u16 omap_mcbsp_get_max_tx_threshold(unsigned int id)
+{
+	struct omap_mcbsp *mcbsp;
+
+	if (!omap_mcbsp_check_valid_id(id)) {
+		printk(KERN_ERR "%s: Invalid id (%d)\n", __func__, id + 1);
+		return -ENODEV;
+	}
+	mcbsp = id_to_mcbsp_ptr(id);
+
+	return mcbsp->max_tx_thres;
+}
+EXPORT_SYMBOL(omap_mcbsp_get_max_tx_threshold);
+
+/*
+ * omap_mcbsp_get_max_rx_thres just return the current configured
+ * maximum threshold for reception
+ */
+u16 omap_mcbsp_get_max_rx_threshold(unsigned int id)
+{
+	struct omap_mcbsp *mcbsp;
+
+	if (!omap_mcbsp_check_valid_id(id)) {
+		printk(KERN_ERR "%s: Invalid id (%d)\n", __func__, id + 1);
+		return -ENODEV;
+	}
+	mcbsp = id_to_mcbsp_ptr(id);
+
+	return mcbsp->max_rx_thres;
+}
+EXPORT_SYMBOL(omap_mcbsp_get_max_rx_threshold);
 #endif
 
 /*
@@ -1005,6 +1041,86 @@ void omap_mcbsp_set_spi_mode(unsigned int id,
 }
 EXPORT_SYMBOL(omap_mcbsp_set_spi_mode);
 
+#ifdef CONFIG_ARCH_OMAP34XX
+#define max_thres(m)			(mcbsp->pdata->buffer_size)
+#define valid_threshold(m, val)		((val) <= max_thres(m))
+#define THRESHOLD_PROP_BUILDER(prop)					\
+static ssize_t prop##_show(struct device *dev,				\
+			struct device_attribute *attr, char *buf)	\
+{									\
+	struct omap_mcbsp *mcbsp = dev_get_drvdata(dev);		\
+									\
+	return sprintf(buf, "%u\n", mcbsp->prop);			\
+}									\
+									\
+static ssize_t prop##_store(struct device *dev,				\
+				struct device_attribute *attr,		\
+				const char *buf, size_t size)		\
+{									\
+	struct omap_mcbsp *mcbsp = dev_get_drvdata(dev);		\
+	unsigned long val;						\
+	int status;							\
+									\
+	status = strict_strtoul(buf, 0, &val);				\
+	if (status)							\
+		return status;						\
+									\
+	if (!valid_threshold(mcbsp, val))				\
+		return -EDOM;						\
+									\
+	mcbsp->prop = val;						\
+	return size;							\
+}									\
+									\
+static DEVICE_ATTR(prop, 0644, prop##_show, prop##_store);
+
+THRESHOLD_PROP_BUILDER(max_tx_thres);
+THRESHOLD_PROP_BUILDER(max_rx_thres);
+
+static const struct attribute *threshold_attrs[] = {
+	&dev_attr_max_tx_thres.attr,
+	&dev_attr_max_rx_thres.attr,
+	NULL,
+};
+
+static const struct attribute_group threshold_attr_group = {
+	.attrs = (struct attribute **)threshold_attrs,
+};
+
+static inline int __devinit omap_thres_add(struct device *dev)
+{
+	return sysfs_create_group(&dev->kobj, &threshold_attr_group);
+}
+
+static inline void __devexit omap_thres_remove(struct device *dev)
+{
+	sysfs_remove_group(&dev->kobj, &threshold_attr_group);
+}
+
+static inline void __devinit omap34xx_device_init(struct omap_mcbsp *mcbsp)
+{
+	if (cpu_is_omap34xx()) {
+		mcbsp->max_tx_thres = max_thres(mcbsp);
+		mcbsp->max_rx_thres = max_thres(mcbsp);
+		if (omap_thres_add(mcbsp->dev))
+			dev_warn(mcbsp->dev,
+				"Unable to create threshold controls\n");
+	} else {
+		mcbsp->max_tx_thres = -EINVAL;
+		mcbsp->max_rx_thres = -EINVAL;
+	}
+}
+
+static inline void __devexit omap34xx_device_exit(struct omap_mcbsp *mcbsp)
+{
+	if (cpu_is_omap34xx())
+		omap_thres_remove(mcbsp->dev);
+}
+#else
+static inline void __devinit omap34xx_device_init(struct omap_mcbsp *mcbsp) {}
+static inline void __devexit omap34xx_device_exit(struct omap_mcbsp *mcbsp) {}
+#endif /* CONFIG_ARCH_OMAP34XX */
+
 /*
  * McBSP1 and McBSP3 are directly mapped on 1610 and 1510.
  * 730 has only 2 McBSP, and both of them are MPU peripherals.
@@ -1075,6 +1191,10 @@ static int __devinit omap_mcbsp_probe(struct platform_device *pdev)
 	mcbsp->dev = &pdev->dev;
 	mcbsp_ptr[id] = mcbsp;
 	platform_set_drvdata(pdev, mcbsp);
+
+	/* Initialize mcbsp properties for OMAP34XX if needed / applicable */
+	omap34xx_device_init(mcbsp);
+
 	return 0;
 
 err_fclk:
@@ -1098,6 +1218,8 @@ static int __devexit omap_mcbsp_remove(struct platform_device *pdev)
 				mcbsp->pdata->ops->free)
 			mcbsp->pdata->ops->free(mcbsp->id);
 
+		omap34xx_device_exit(mcbsp);
+
 		clk_disable(mcbsp->fclk);
 		clk_disable(mcbsp->iclk);
 		clk_put(mcbsp->fclk);
-- 
cgit v1.2.3


From 7e4f943b725008272d5c50e676a89d642232a4e3 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Thu, 20 Aug 2009 16:18:12 +0300
Subject: OMAP3: McBSP: Lower the maximum buffersize for McBSP1, 3, 4, 5

Do not allow applications to use the full buffer found on
McBSP1,3,4,5. Using the full buffer in threshold mode causes
the McBSP buffer to run dry, which can be observed as channels
are switching (in reality the channels are shifting).

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 arch/arm/mach-omap2/mcbsp.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/mcbsp.c b/arch/arm/mach-omap2/mcbsp.c
index f837114d1d6..7d22caf6009 100644
--- a/arch/arm/mach-omap2/mcbsp.c
+++ b/arch/arm/mach-omap2/mcbsp.c
@@ -129,7 +129,7 @@ static struct omap_mcbsp_platform_data omap34xx_mcbsp_pdata[] = {
 		.rx_irq		= INT_24XX_MCBSP1_IRQ_RX,
 		.tx_irq		= INT_24XX_MCBSP1_IRQ_TX,
 		.ops		= &omap2_mcbsp_ops,
-		.buffer_size	= 0x7F,
+		.buffer_size	= 0x6F,
 	},
 	{
 		.phys_base	= OMAP34XX_MCBSP2_BASE,
@@ -147,7 +147,7 @@ static struct omap_mcbsp_platform_data omap34xx_mcbsp_pdata[] = {
 		.rx_irq		= INT_24XX_MCBSP3_IRQ_RX,
 		.tx_irq		= INT_24XX_MCBSP3_IRQ_TX,
 		.ops		= &omap2_mcbsp_ops,
-		.buffer_size	= 0x7F,
+		.buffer_size	= 0x6F,
 	},
 	{
 		.phys_base	= OMAP34XX_MCBSP4_BASE,
@@ -156,7 +156,7 @@ static struct omap_mcbsp_platform_data omap34xx_mcbsp_pdata[] = {
 		.rx_irq		= INT_24XX_MCBSP4_IRQ_RX,
 		.tx_irq		= INT_24XX_MCBSP4_IRQ_TX,
 		.ops		= &omap2_mcbsp_ops,
-		.buffer_size	= 0x7F,
+		.buffer_size	= 0x6F,
 	},
 	{
 		.phys_base	= OMAP34XX_MCBSP5_BASE,
@@ -165,7 +165,7 @@ static struct omap_mcbsp_platform_data omap34xx_mcbsp_pdata[] = {
 		.rx_irq		= INT_24XX_MCBSP5_IRQ_RX,
 		.tx_irq		= INT_24XX_MCBSP5_IRQ_TX,
 		.ops		= &omap2_mcbsp_ops,
-		.buffer_size	= 0x7F,
+		.buffer_size	= 0x6F,
 	},
 };
 #define OMAP34XX_MCBSP_PDATA_SZ		ARRAY_SIZE(omap34xx_mcbsp_pdata)
-- 
cgit v1.2.3


From 4c8200aeb04b8aeaf8c7425f49caf761e2cda95a Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <eduardo.valentin@nokia.com>
Date: Thu, 20 Aug 2009 16:18:13 +0300
Subject: OMAP: McBSP: Rename thres sysfs symbols

This patch renames the symbols that handles threshold
sysfs properties. This way we can add more sysfs properties
to them.

Signed-off-by: Eduardo Valentin <eduardo.valentin@nokia.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 arch/arm/plat-omap/mcbsp.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/plat-omap/mcbsp.c b/arch/arm/plat-omap/mcbsp.c
index 0bbc912e548..ccaa9aedb0a 100644
--- a/arch/arm/plat-omap/mcbsp.c
+++ b/arch/arm/plat-omap/mcbsp.c
@@ -1077,24 +1077,24 @@ static DEVICE_ATTR(prop, 0644, prop##_show, prop##_store);
 THRESHOLD_PROP_BUILDER(max_tx_thres);
 THRESHOLD_PROP_BUILDER(max_rx_thres);
 
-static const struct attribute *threshold_attrs[] = {
+static const struct attribute *additional_attrs[] = {
 	&dev_attr_max_tx_thres.attr,
 	&dev_attr_max_rx_thres.attr,
 	NULL,
 };
 
-static const struct attribute_group threshold_attr_group = {
-	.attrs = (struct attribute **)threshold_attrs,
+static const struct attribute_group additional_attr_group = {
+	.attrs = (struct attribute **)additional_attrs,
 };
 
-static inline int __devinit omap_thres_add(struct device *dev)
+static inline int __devinit omap_additional_add(struct device *dev)
 {
-	return sysfs_create_group(&dev->kobj, &threshold_attr_group);
+	return sysfs_create_group(&dev->kobj, &additional_attr_group);
 }
 
-static inline void __devexit omap_thres_remove(struct device *dev)
+static inline void __devexit omap_additional_remove(struct device *dev)
 {
-	sysfs_remove_group(&dev->kobj, &threshold_attr_group);
+	sysfs_remove_group(&dev->kobj, &additional_attr_group);
 }
 
 static inline void __devinit omap34xx_device_init(struct omap_mcbsp *mcbsp)
@@ -1102,9 +1102,9 @@ static inline void __devinit omap34xx_device_init(struct omap_mcbsp *mcbsp)
 	if (cpu_is_omap34xx()) {
 		mcbsp->max_tx_thres = max_thres(mcbsp);
 		mcbsp->max_rx_thres = max_thres(mcbsp);
-		if (omap_thres_add(mcbsp->dev))
+		if (omap_additional_add(mcbsp->dev))
 			dev_warn(mcbsp->dev,
-				"Unable to create threshold controls\n");
+				"Unable to create additional controls\n");
 	} else {
 		mcbsp->max_tx_thres = -EINVAL;
 		mcbsp->max_rx_thres = -EINVAL;
@@ -1114,7 +1114,7 @@ static inline void __devinit omap34xx_device_init(struct omap_mcbsp *mcbsp)
 static inline void __devexit omap34xx_device_exit(struct omap_mcbsp *mcbsp)
 {
 	if (cpu_is_omap34xx())
-		omap_thres_remove(mcbsp->dev);
+		omap_additional_remove(mcbsp->dev);
 }
 #else
 static inline void __devinit omap34xx_device_init(struct omap_mcbsp *mcbsp) {}
-- 
cgit v1.2.3


From 98cb20e88957faf9c99e194242caac7f55dd47e4 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Thu, 20 Aug 2009 16:18:14 +0300
Subject: OMAP: McBSP: Add link DMA mode selection

It adds a new sysfs file, where the user can configure the mcbsp mode to use.
If the mcbsp channel is in use, it does not allow the change.
Than in omap_pcm_open we can call the omap_mcbsp_get_opmode to get the mode,
store it, than use it to implement the different modes.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Eduardo Valentin <eduardo.valentin@nokia.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 arch/arm/plat-omap/include/mach/mcbsp.h |  8 ++++
 arch/arm/plat-omap/mcbsp.c              | 84 +++++++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/plat-omap/include/mach/mcbsp.h b/arch/arm/plat-omap/include/mach/mcbsp.h
index 832330d7cb3..bd5b759991c 100644
--- a/arch/arm/plat-omap/include/mach/mcbsp.h
+++ b/arch/arm/plat-omap/include/mach/mcbsp.h
@@ -255,6 +255,11 @@
 /********************** McBSP SYSCONFIG bit definitions ********************/
 #define SOFTRST			0x0002
 
+/********************** McBSP DMA operating modes **************************/
+#define MCBSP_DMA_MODE_ELEMENT		0
+#define MCBSP_DMA_MODE_THRESHOLD	1
+#define MCBSP_DMA_MODE_FRAME		2
+
 /* we don't do multichannel for now */
 struct omap_mcbsp_reg_cfg {
 	u16 spcr2;
@@ -385,6 +390,7 @@ struct omap_mcbsp {
 	struct clk *iclk;
 	struct clk *fclk;
 #ifdef CONFIG_ARCH_OMAP34XX
+	int dma_op_mode;
 	u16 max_tx_thres;
 	u16 max_rx_thres;
 #endif
@@ -401,6 +407,7 @@ void omap_mcbsp_set_tx_threshold(unsigned int id, u16 threshold);
 void omap_mcbsp_set_rx_threshold(unsigned int id, u16 threshold);
 u16 omap_mcbsp_get_max_tx_threshold(unsigned int id);
 u16 omap_mcbsp_get_max_rx_threshold(unsigned int id);
+int omap_mcbsp_get_dma_op_mode(unsigned int id);
 #else
 static inline void omap_mcbsp_set_tx_threshold(unsigned int id, u16 threshold)
 { }
@@ -408,6 +415,7 @@ static inline void omap_mcbsp_set_rx_threshold(unsigned int id, u16 threshold)
 { }
 static inline u16 omap_mcbsp_get_max_tx_threshold(unsigned int id) { return 0; }
 static inline u16 omap_mcbsp_get_max_rx_threshold(unsigned int id) { return 0; }
+static inline int omap_mcbsp_get_dma_op_mode(unsigned int id) { return 0; }
 #endif
 int omap_mcbsp_request(unsigned int id);
 void omap_mcbsp_free(unsigned int id);
diff --git a/arch/arm/plat-omap/mcbsp.c b/arch/arm/plat-omap/mcbsp.c
index ccaa9aedb0a..9e699947a69 100644
--- a/arch/arm/plat-omap/mcbsp.c
+++ b/arch/arm/plat-omap/mcbsp.c
@@ -282,6 +282,29 @@ u16 omap_mcbsp_get_max_rx_threshold(unsigned int id)
 	return mcbsp->max_rx_thres;
 }
 EXPORT_SYMBOL(omap_mcbsp_get_max_rx_threshold);
+
+/*
+ * omap_mcbsp_get_dma_op_mode just return the current configured
+ * operating mode for the mcbsp channel
+ */
+int omap_mcbsp_get_dma_op_mode(unsigned int id)
+{
+	struct omap_mcbsp *mcbsp;
+	int dma_op_mode;
+
+	if (!omap_mcbsp_check_valid_id(id)) {
+		printk(KERN_ERR "%s: Invalid id (%u)\n", __func__, id + 1);
+		return -ENODEV;
+	}
+	mcbsp = id_to_mcbsp_ptr(id);
+
+	spin_lock_irq(&mcbsp->lock);
+	dma_op_mode = mcbsp->dma_op_mode;
+	spin_unlock_irq(&mcbsp->lock);
+
+	return dma_op_mode;
+}
+EXPORT_SYMBOL(omap_mcbsp_get_dma_op_mode);
 #endif
 
 /*
@@ -1077,9 +1100,65 @@ static DEVICE_ATTR(prop, 0644, prop##_show, prop##_store);
 THRESHOLD_PROP_BUILDER(max_tx_thres);
 THRESHOLD_PROP_BUILDER(max_rx_thres);
 
+static ssize_t dma_op_mode_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct omap_mcbsp *mcbsp = dev_get_drvdata(dev);
+	int dma_op_mode;
+
+	spin_lock_irq(&mcbsp->lock);
+	dma_op_mode = mcbsp->dma_op_mode;
+	spin_unlock_irq(&mcbsp->lock);
+
+	return sprintf(buf, "current mode: %d\n"
+			"possible mode values are:\n"
+			"%d - %s\n"
+			"%d - %s\n"
+			"%d - %s\n",
+			dma_op_mode,
+			MCBSP_DMA_MODE_ELEMENT, "element mode",
+			MCBSP_DMA_MODE_THRESHOLD, "threshold mode",
+			MCBSP_DMA_MODE_FRAME, "frame mode");
+}
+
+static ssize_t dma_op_mode_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	struct omap_mcbsp *mcbsp = dev_get_drvdata(dev);
+	unsigned long val;
+	int status;
+
+	status = strict_strtoul(buf, 0, &val);
+	if (status)
+		return status;
+
+	spin_lock_irq(&mcbsp->lock);
+
+	if (!mcbsp->free) {
+		size = -EBUSY;
+		goto unlock;
+	}
+
+	if (val > MCBSP_DMA_MODE_FRAME || val < MCBSP_DMA_MODE_ELEMENT) {
+		size = -EINVAL;
+		goto unlock;
+	}
+
+	mcbsp->dma_op_mode = val;
+
+unlock:
+	spin_unlock_irq(&mcbsp->lock);
+
+	return size;
+}
+
+static DEVICE_ATTR(dma_op_mode, 0644, dma_op_mode_show, dma_op_mode_store);
+
 static const struct attribute *additional_attrs[] = {
 	&dev_attr_max_tx_thres.attr,
 	&dev_attr_max_rx_thres.attr,
+	&dev_attr_dma_op_mode.attr,
 	NULL,
 };
 
@@ -1099,9 +1178,14 @@ static inline void __devexit omap_additional_remove(struct device *dev)
 
 static inline void __devinit omap34xx_device_init(struct omap_mcbsp *mcbsp)
 {
+	mcbsp->dma_op_mode = MCBSP_DMA_MODE_ELEMENT;
 	if (cpu_is_omap34xx()) {
 		mcbsp->max_tx_thres = max_thres(mcbsp);
 		mcbsp->max_rx_thres = max_thres(mcbsp);
+		/*
+		 * REVISIT: Set dmap_op_mode to THRESHOLD as default
+		 * for mcbsp2 instances.
+		 */
 		if (omap_additional_add(mcbsp->dev))
 			dev_warn(mcbsp->dev,
 				"Unable to create additional controls\n");
-- 
cgit v1.2.3


From 2122fdc629f05634537e796c0630028e4db76953 Mon Sep 17 00:00:00 2001
From: Eero Nurkkala <ext-eero.nurkkala@nokia.com>
Date: Thu, 20 Aug 2009 16:18:15 +0300
Subject: OMAP: McBSP: Wakeups utilized

This patch enables the smart idle mode while
McBPS is being utilized. Once it's done,
force idle mode is taken instead. Apart of it,
it also configures what signals will wake mcbsp up.

Signed-off-by: Eero Nurkkala <ext-eero.nurkkala@nokia.com>
Signed-off-by: Eduardo Valentin <eduardo.valentin@nokia.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 arch/arm/plat-omap/include/mach/mcbsp.h | 17 ++++++++++++
 arch/arm/plat-omap/mcbsp.c              | 46 +++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/plat-omap/include/mach/mcbsp.h b/arch/arm/plat-omap/include/mach/mcbsp.h
index bd5b759991c..333061d4f4a 100644
--- a/arch/arm/plat-omap/include/mach/mcbsp.h
+++ b/arch/arm/plat-omap/include/mach/mcbsp.h
@@ -138,6 +138,7 @@
 #define OMAP_MCBSP_REG_THRSH1	0x94
 #define OMAP_MCBSP_REG_IRQST	0xA0
 #define OMAP_MCBSP_REG_IRQEN	0xA4
+#define OMAP_MCBSP_REG_WAKEUPEN	0xA8
 #define OMAP_MCBSP_REG_XCCR	0xAC
 #define OMAP_MCBSP_REG_RCCR	0xB0
 
@@ -253,6 +254,8 @@
 #define RDISABLE		0x0001
 
 /********************** McBSP SYSCONFIG bit definitions ********************/
+#define SIDLEMODE(value)	((value)<<3)
+#define ENAWAKEUP		0x0004
 #define SOFTRST			0x0002
 
 /********************** McBSP DMA operating modes **************************/
@@ -260,6 +263,20 @@
 #define MCBSP_DMA_MODE_THRESHOLD	1
 #define MCBSP_DMA_MODE_FRAME		2
 
+/********************** McBSP WAKEUPEN bit definitions *********************/
+#define XEMPTYEOFEN		0x4000
+#define XRDYEN			0x0400
+#define XEOFEN			0x0200
+#define XFSXEN			0x0100
+#define XSYNCERREN		0x0080
+#define RRDYEN			0x0008
+#define REOFEN			0x0004
+#define RFSREN			0x0002
+#define RSYNCERREN		0x0001
+#define WAKEUPEN_ALL		(XEMPTYEOFEN | XRDYEN | XEOFEN | XFSXEN | \
+				 XSYNCERREN | RRDYEN | REOFEN | RFSREN | \
+				 RSYNCERREN)
+
 /* we don't do multichannel for now */
 struct omap_mcbsp_reg_cfg {
 	u16 spcr2;
diff --git a/arch/arm/plat-omap/mcbsp.c b/arch/arm/plat-omap/mcbsp.c
index 9e699947a69..136f8c5d0da 100644
--- a/arch/arm/plat-omap/mcbsp.c
+++ b/arch/arm/plat-omap/mcbsp.c
@@ -305,6 +305,46 @@ int omap_mcbsp_get_dma_op_mode(unsigned int id)
 	return dma_op_mode;
 }
 EXPORT_SYMBOL(omap_mcbsp_get_dma_op_mode);
+
+static inline void omap34xx_mcbsp_request(struct omap_mcbsp *mcbsp)
+{
+	/*
+	 * Enable wakup behavior, smart idle and all wakeups
+	 * REVISIT: some wakeups may be unnecessary
+	 */
+	if (cpu_is_omap34xx()) {
+		u16 syscon;
+
+		syscon = OMAP_MCBSP_READ(mcbsp->io_base, SYSCON);
+		syscon &= ~(ENAWAKEUP | SIDLEMODE(0x03));
+		syscon |= (ENAWAKEUP | SIDLEMODE(0x02));
+		OMAP_MCBSP_WRITE(mcbsp->io_base, SYSCON, syscon);
+
+		OMAP_MCBSP_WRITE(mcbsp->io_base, WAKEUPEN, WAKEUPEN_ALL);
+	}
+}
+
+static inline void omap34xx_mcbsp_free(struct omap_mcbsp *mcbsp)
+{
+	/*
+	 * Disable wakup behavior, smart idle and all wakeups
+	 */
+	if (cpu_is_omap34xx()) {
+		u16 syscon;
+		u16 wakeupen;
+
+		syscon = OMAP_MCBSP_READ(mcbsp->io_base, SYSCON);
+		syscon &= ~(ENAWAKEUP | SIDLEMODE(0x03));
+		OMAP_MCBSP_WRITE(mcbsp->io_base, SYSCON, syscon);
+
+		wakeupen = OMAP_MCBSP_READ(mcbsp->io_base, WAKEUPEN);
+		wakeupen &= ~WAKEUPEN_ALL;
+		OMAP_MCBSP_WRITE(mcbsp->io_base, WAKEUPEN, wakeupen);
+	}
+}
+#else
+static inline void omap34xx_mcbsp_request(struct omap_mcbsp *mcbsp) {}
+static inline void omap34xx_mcbsp_free(struct omap_mcbsp *mcbsp) {}
 #endif
 
 /*
@@ -366,6 +406,9 @@ int omap_mcbsp_request(unsigned int id)
 	clk_enable(mcbsp->iclk);
 	clk_enable(mcbsp->fclk);
 
+	/* Do procedure specific to omap34xx arch, if applicable */
+	omap34xx_mcbsp_request(mcbsp);
+
 	/*
 	 * Make sure that transmitter, receiver and sample-rate generator are
 	 * not running before activating IRQs.
@@ -414,6 +457,9 @@ void omap_mcbsp_free(unsigned int id)
 	if (mcbsp->pdata && mcbsp->pdata->ops && mcbsp->pdata->ops->free)
 		mcbsp->pdata->ops->free(id);
 
+	/* Do procedure specific to omap34xx arch, if applicable */
+	omap34xx_mcbsp_free(mcbsp);
+
 	clk_disable(mcbsp->fclk);
 	clk_disable(mcbsp->iclk);
 
-- 
cgit v1.2.3


From d9a9b3f5f7d4736cfe6fae95b9d63b07faeb702e Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <eduardo.valentin@nokia.com>
Date: Thu, 20 Aug 2009 16:18:16 +0300
Subject: OMAP: McBSP: Change wakeup signals

Configure only XRDYEN and RRDYEN wakeup signals
in order to get better power consumption.

Signed-off-by: Eduardo Valentin <eduardo.valentin@nokia.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 arch/arm/plat-omap/include/mach/mcbsp.h | 3 ---
 arch/arm/plat-omap/mcbsp.c              | 7 ++-----
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/plat-omap/include/mach/mcbsp.h b/arch/arm/plat-omap/include/mach/mcbsp.h
index 333061d4f4a..fe10ae8b027 100644
--- a/arch/arm/plat-omap/include/mach/mcbsp.h
+++ b/arch/arm/plat-omap/include/mach/mcbsp.h
@@ -273,9 +273,6 @@
 #define REOFEN			0x0004
 #define RFSREN			0x0002
 #define RSYNCERREN		0x0001
-#define WAKEUPEN_ALL		(XEMPTYEOFEN | XRDYEN | XEOFEN | XFSXEN | \
-				 XSYNCERREN | RRDYEN | REOFEN | RFSREN | \
-				 RSYNCERREN)
 
 /* we don't do multichannel for now */
 struct omap_mcbsp_reg_cfg {
diff --git a/arch/arm/plat-omap/mcbsp.c b/arch/arm/plat-omap/mcbsp.c
index 136f8c5d0da..86bfad88abd 100644
--- a/arch/arm/plat-omap/mcbsp.c
+++ b/arch/arm/plat-omap/mcbsp.c
@@ -320,7 +320,7 @@ static inline void omap34xx_mcbsp_request(struct omap_mcbsp *mcbsp)
 		syscon |= (ENAWAKEUP | SIDLEMODE(0x02));
 		OMAP_MCBSP_WRITE(mcbsp->io_base, SYSCON, syscon);
 
-		OMAP_MCBSP_WRITE(mcbsp->io_base, WAKEUPEN, WAKEUPEN_ALL);
+		OMAP_MCBSP_WRITE(mcbsp->io_base, WAKEUPEN, XRDYEN | RRDYEN);
 	}
 }
 
@@ -331,15 +331,12 @@ static inline void omap34xx_mcbsp_free(struct omap_mcbsp *mcbsp)
 	 */
 	if (cpu_is_omap34xx()) {
 		u16 syscon;
-		u16 wakeupen;
 
 		syscon = OMAP_MCBSP_READ(mcbsp->io_base, SYSCON);
 		syscon &= ~(ENAWAKEUP | SIDLEMODE(0x03));
 		OMAP_MCBSP_WRITE(mcbsp->io_base, SYSCON, syscon);
 
-		wakeupen = OMAP_MCBSP_READ(mcbsp->io_base, WAKEUPEN);
-		wakeupen &= ~WAKEUPEN_ALL;
-		OMAP_MCBSP_WRITE(mcbsp->io_base, WAKEUPEN, wakeupen);
+		OMAP_MCBSP_WRITE(mcbsp->io_base, WAKEUPEN, 0);
 	}
 }
 #else
-- 
cgit v1.2.3


From 2ba93f8fa77c0140de163e8a31bb9c09b5ded74c Mon Sep 17 00:00:00 2001
From: Eero Nurkkala <ext-eero.nurkkala@nokia.com>
Date: Thu, 20 Aug 2009 16:18:17 +0300
Subject: OMAP: McBSP: Retain McBSP FCLK clockactivity

FCLK may get autogated so that it prevents the McBSP
to work properly. It is the bit 9 that must be set
for maintaining the McBSP FCLK.

Signed-off-by: Eero Nurkkala <ext-eero.nurkkala@nokia.com>
Signed-off-by: Eduardo Valentin <eduardo.valentin@nokia.com>
Acked-by: Jarkko Nikula <jarkko.nikula@nokia.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 arch/arm/plat-omap/include/mach/mcbsp.h | 1 +
 arch/arm/plat-omap/mcbsp.c              | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/plat-omap/include/mach/mcbsp.h b/arch/arm/plat-omap/include/mach/mcbsp.h
index fe10ae8b027..70e950e295e 100644
--- a/arch/arm/plat-omap/include/mach/mcbsp.h
+++ b/arch/arm/plat-omap/include/mach/mcbsp.h
@@ -254,6 +254,7 @@
 #define RDISABLE		0x0001
 
 /********************** McBSP SYSCONFIG bit definitions ********************/
+#define CLOCKACTIVITY(value)	((value)<<8)
 #define SIDLEMODE(value)	((value)<<3)
 #define ENAWAKEUP		0x0004
 #define SOFTRST			0x0002
diff --git a/arch/arm/plat-omap/mcbsp.c b/arch/arm/plat-omap/mcbsp.c
index 86bfad88abd..2c274e6f0de 100644
--- a/arch/arm/plat-omap/mcbsp.c
+++ b/arch/arm/plat-omap/mcbsp.c
@@ -316,8 +316,8 @@ static inline void omap34xx_mcbsp_request(struct omap_mcbsp *mcbsp)
 		u16 syscon;
 
 		syscon = OMAP_MCBSP_READ(mcbsp->io_base, SYSCON);
-		syscon &= ~(ENAWAKEUP | SIDLEMODE(0x03));
-		syscon |= (ENAWAKEUP | SIDLEMODE(0x02));
+		syscon &= ~(ENAWAKEUP | SIDLEMODE(0x03) | CLOCKACTIVITY(0x03));
+		syscon |= (ENAWAKEUP | SIDLEMODE(0x02) | CLOCKACTIVITY(0x02));
 		OMAP_MCBSP_WRITE(mcbsp->io_base, SYSCON, syscon);
 
 		OMAP_MCBSP_WRITE(mcbsp->io_base, WAKEUPEN, XRDYEN | RRDYEN);
@@ -333,7 +333,7 @@ static inline void omap34xx_mcbsp_free(struct omap_mcbsp *mcbsp)
 		u16 syscon;
 
 		syscon = OMAP_MCBSP_READ(mcbsp->io_base, SYSCON);
-		syscon &= ~(ENAWAKEUP | SIDLEMODE(0x03));
+		syscon &= ~(ENAWAKEUP | SIDLEMODE(0x03) | CLOCKACTIVITY(0x03));
 		OMAP_MCBSP_WRITE(mcbsp->io_base, SYSCON, syscon);
 
 		OMAP_MCBSP_WRITE(mcbsp->io_base, WAKEUPEN, 0);
-- 
cgit v1.2.3


From d99a7454e57d3dc9d04d6a77292a056f430a9ece Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <eduardo.valentin@nokia.com>
Date: Thu, 20 Aug 2009 16:18:18 +0300
Subject: OMAP: McBSP: Configure NO IDLE mode for DMA mode different of
 threshold

Use dma mode property to configure NO IDLE or SMART IDLE of McBSPs.

Signed-off-by: Eduardo Valentin <eduardo.valentin@nokia.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 arch/arm/plat-omap/mcbsp.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/plat-omap/mcbsp.c b/arch/arm/plat-omap/mcbsp.c
index 2c274e6f0de..85176a5f414 100644
--- a/arch/arm/plat-omap/mcbsp.c
+++ b/arch/arm/plat-omap/mcbsp.c
@@ -317,7 +317,15 @@ static inline void omap34xx_mcbsp_request(struct omap_mcbsp *mcbsp)
 
 		syscon = OMAP_MCBSP_READ(mcbsp->io_base, SYSCON);
 		syscon &= ~(ENAWAKEUP | SIDLEMODE(0x03) | CLOCKACTIVITY(0x03));
-		syscon |= (ENAWAKEUP | SIDLEMODE(0x02) | CLOCKACTIVITY(0x02));
+
+		spin_lock_irq(&mcbsp->lock);
+		if (mcbsp->dma_op_mode == MCBSP_DMA_MODE_THRESHOLD)
+			syscon |= SIDLEMODE(0x02);
+		else
+			syscon |= SIDLEMODE(0x01);
+		spin_unlock_irq(&mcbsp->lock);
+
+		syscon |= (ENAWAKEUP | CLOCKACTIVITY(0x02));
 		OMAP_MCBSP_WRITE(mcbsp->io_base, SYSCON, syscon);
 
 		OMAP_MCBSP_WRITE(mcbsp->io_base, WAKEUPEN, XRDYEN | RRDYEN);
-- 
cgit v1.2.3


From fa3935ba34667ffd35fbb33958cd1d67035fdf82 Mon Sep 17 00:00:00 2001
From: Eero Nurkkala <ext-eero.nurkkala@nokia.com>
Date: Thu, 20 Aug 2009 16:18:19 +0300
Subject: OMAP: McBSP: Do not enable wakeups for no-idle mode

When no-idle mode is taken, wakeups need not to be enabled.
Moreover, CLOCKACTIVITY bits are unnecessary with this mode
also.

Signed-off-by: Eero Nurkkala <ext-eero.nurkkala@nokia.com>
Acked-by: Eduardo Valentin <eduardo.valentin@nokia.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 arch/arm/plat-omap/mcbsp.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/plat-omap/mcbsp.c b/arch/arm/plat-omap/mcbsp.c
index 85176a5f414..56a56887ded 100644
--- a/arch/arm/plat-omap/mcbsp.c
+++ b/arch/arm/plat-omap/mcbsp.c
@@ -319,16 +319,17 @@ static inline void omap34xx_mcbsp_request(struct omap_mcbsp *mcbsp)
 		syscon &= ~(ENAWAKEUP | SIDLEMODE(0x03) | CLOCKACTIVITY(0x03));
 
 		spin_lock_irq(&mcbsp->lock);
-		if (mcbsp->dma_op_mode == MCBSP_DMA_MODE_THRESHOLD)
-			syscon |= SIDLEMODE(0x02);
-		else
+		if (mcbsp->dma_op_mode == MCBSP_DMA_MODE_THRESHOLD) {
+			syscon |= (ENAWAKEUP | SIDLEMODE(0x02) |
+					CLOCKACTIVITY(0x02));
+			OMAP_MCBSP_WRITE(mcbsp->io_base, WAKEUPEN,
+					XRDYEN | RRDYEN);
+		} else {
 			syscon |= SIDLEMODE(0x01);
+		}
 		spin_unlock_irq(&mcbsp->lock);
 
-		syscon |= (ENAWAKEUP | CLOCKACTIVITY(0x02));
 		OMAP_MCBSP_WRITE(mcbsp->io_base, SYSCON, syscon);
-
-		OMAP_MCBSP_WRITE(mcbsp->io_base, WAKEUPEN, XRDYEN | RRDYEN);
 	}
 }
 
-- 
cgit v1.2.3


From 72cc6d715d5b018e2cff4adb68966855850d4e77 Mon Sep 17 00:00:00 2001
From: Eero Nurkkala <ext-eero.nurkkala@nokia.com>
Date: Thu, 20 Aug 2009 16:18:20 +0300
Subject: OMAP: McBSP: Let element DMA mode hit retention also

The device no longer hits retention if element DMA
mode is taken for at least the duration of the
serial console timeout. Force element DMA mode to
shut down through smartidle.

Signed-off-by: Eero Nurkkala <ext-eero.nurkkala@nokia.com>
Acked-by: Eduardo Valentin <eduardo.valentin@nokia.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 arch/arm/plat-omap/mcbsp.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/plat-omap/mcbsp.c b/arch/arm/plat-omap/mcbsp.c
index 56a56887ded..b63a7209b41 100644
--- a/arch/arm/plat-omap/mcbsp.c
+++ b/arch/arm/plat-omap/mcbsp.c
@@ -343,6 +343,15 @@ static inline void omap34xx_mcbsp_free(struct omap_mcbsp *mcbsp)
 
 		syscon = OMAP_MCBSP_READ(mcbsp->io_base, SYSCON);
 		syscon &= ~(ENAWAKEUP | SIDLEMODE(0x03) | CLOCKACTIVITY(0x03));
+		/*
+		 * HW bug workaround - If no_idle mode is taken, we need to
+		 * go to smart_idle before going to always_idle, or the
+		 * device will not hit retention anymore.
+		 */
+		syscon |= SIDLEMODE(0x02);
+		OMAP_MCBSP_WRITE(mcbsp->io_base, SYSCON, syscon);
+
+		syscon &= ~(SIDLEMODE(0x03));
 		OMAP_MCBSP_WRITE(mcbsp->io_base, SYSCON, syscon);
 
 		OMAP_MCBSP_WRITE(mcbsp->io_base, WAKEUPEN, 0);
-- 
cgit v1.2.3


From 14412acde5b57450b8afb3d4b03132419b6abebf Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Thu, 20 Aug 2009 22:50:40 +0100
Subject: ASoC: S3C24XX: Add audio core and tlv320aic23 for Simtec boards

Add core support for the range of S3C24XX Simtec boards with TLV320AIC23
CODECs on them. Since there are also boards with similar IIS routing the
AMP and the configuration code is placed in a core file for re-use with
other CODEC bindings.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 arch/arm/plat-s3c/include/plat/audio-simtec.h | 37 +++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 arch/arm/plat-s3c/include/plat/audio-simtec.h

(limited to 'arch')

diff --git a/arch/arm/plat-s3c/include/plat/audio-simtec.h b/arch/arm/plat-s3c/include/plat/audio-simtec.h
new file mode 100644
index 00000000000..0f440b9168d
--- /dev/null
+++ b/arch/arm/plat-s3c/include/plat/audio-simtec.h
@@ -0,0 +1,37 @@
+/* arch/arm/plat-s3c/include/plat/audio-simtec.h
+ *
+ * Copyright 2008 Simtec Electronics
+ *	http://armlinux.simtec.co.uk/
+ *	Ben Dooks <ben@simtec.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Simtec Audio support.
+*/
+
+/**
+ * struct s3c24xx_audio_simtec_pdata - platform data for simtec audio
+ * @use_mpllin: Select codec clock from MPLLin
+ * @output_cdclk: Need to output CDCLK to the codec
+ * @have_mic: Set if we have a MIC socket
+ * @have_lout: Set if we have a LineOut socket
+ * @amp_gpio: GPIO pin to enable the AMP
+ * @amp_gain: Option GPIO to control AMP gain
+ */
+struct s3c24xx_audio_simtec_pdata {
+	unsigned int	use_mpllin:1;
+	unsigned int	output_cdclk:1;
+
+	unsigned int	have_mic:1;
+	unsigned int	have_lout:1;
+
+	int		amp_gpio;
+	int		amp_gain[2];
+
+	void	(*startup)(void);
+};
+
+extern int simtec_audio_add(const char *codec_name,
+			    struct s3c24xx_audio_simtec_pdata *pdata);
-- 
cgit v1.2.3


From 8b5a10fc6fd02289ea03480f93382b1a99006142 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Wed, 19 Aug 2009 08:40:48 +0100
Subject: x86: properly annotate alternatives.c

Some of the NOPs tables aren't used on 64-bits, quite some code and
data is needed post-init for module loading only, and a couple of
functions aren't used outside that file (i.e. can be static, and don't
need to be exported).

The change to __INITDATA/__INITRODATA is needed to avoid an assembler
warning.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
LKML-Reference: <4A8BC8A00200007800010823@vpn.id2.novell.com>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/alternative.h |  7 -----
 arch/x86/kernel/alternative.c      | 56 ++++++++++++++++++++++----------------
 2 files changed, 32 insertions(+), 31 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 1a37bcdc860..c240efc74e0 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -73,8 +73,6 @@ static inline void alternatives_smp_module_del(struct module *mod) {}
 static inline void alternatives_smp_switch(int smp) {}
 #endif	/* CONFIG_SMP */
 
-const unsigned char *const *find_nop_table(void);
-
 /* alternative assembly primitive: */
 #define ALTERNATIVE(oldinstr, newinstr, feature)			\
 									\
@@ -144,8 +142,6 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
 #define __parainstructions_end	NULL
 #endif
 
-extern void add_nops(void *insns, unsigned int len);
-
 /*
  * Clear and restore the kernel write-protection flag on the local CPU.
  * Allows the kernel to edit read-only pages.
@@ -161,10 +157,7 @@ extern void add_nops(void *insns, unsigned int len);
  * Intel's errata.
  * On the local CPU you need to be protected again NMI or MCE handlers seeing an
  * inconsistent instruction while you patch.
- * The _early version expects the memory to already be RW.
  */
-
 extern void *text_poke(void *addr, const void *opcode, size_t len);
-extern void *text_poke_early(void *addr, const void *opcode, size_t len);
 
 #endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index f5765870257..486935143e0 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -2,6 +2,7 @@
 #include <linux/sched.h>
 #include <linux/mutex.h>
 #include <linux/list.h>
+#include <linux/stringify.h>
 #include <linux/kprobes.h>
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
@@ -32,7 +33,7 @@ __setup("smp-alt-boot", bootonly);
 #define smp_alt_once 1
 #endif
 
-static int debug_alternative;
+static int __initdata_or_module debug_alternative;
 
 static int __init debug_alt(char *str)
 {
@@ -51,7 +52,7 @@ static int __init setup_noreplace_smp(char *str)
 __setup("noreplace-smp", setup_noreplace_smp);
 
 #ifdef CONFIG_PARAVIRT
-static int noreplace_paravirt = 0;
+static int __initdata_or_module noreplace_paravirt = 0;
 
 static int __init setup_noreplace_paravirt(char *str)
 {
@@ -64,16 +65,17 @@ __setup("noreplace-paravirt", setup_noreplace_paravirt);
 #define DPRINTK(fmt, args...) if (debug_alternative) \
 	printk(KERN_DEBUG fmt, args)
 
-#ifdef GENERIC_NOP1
+#if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64)
 /* Use inline assembly to define this because the nops are defined
    as inline assembly strings in the include files and we cannot
    get them easily into strings. */
-asm("\t.section .rodata, \"a\"\nintelnops: "
+asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nintelnops: "
 	GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
 	GENERIC_NOP7 GENERIC_NOP8
     "\t.previous");
 extern const unsigned char intelnops[];
-static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = {
+static const unsigned char *const __initconst_or_module
+intel_nops[ASM_NOP_MAX+1] = {
 	NULL,
 	intelnops,
 	intelnops + 1,
@@ -87,12 +89,13 @@ static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = {
 #endif
 
 #ifdef K8_NOP1
-asm("\t.section .rodata, \"a\"\nk8nops: "
+asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk8nops: "
 	K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
 	K8_NOP7 K8_NOP8
     "\t.previous");
 extern const unsigned char k8nops[];
-static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = {
+static const unsigned char *const __initconst_or_module
+k8_nops[ASM_NOP_MAX+1] = {
 	NULL,
 	k8nops,
 	k8nops + 1,
@@ -105,13 +108,14 @@ static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = {
 };
 #endif
 
-#ifdef K7_NOP1
-asm("\t.section .rodata, \"a\"\nk7nops: "
+#if defined(K7_NOP1) && !defined(CONFIG_X86_64)
+asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk7nops: "
 	K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
 	K7_NOP7 K7_NOP8
     "\t.previous");
 extern const unsigned char k7nops[];
-static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = {
+static const unsigned char *const __initconst_or_module
+k7_nops[ASM_NOP_MAX+1] = {
 	NULL,
 	k7nops,
 	k7nops + 1,
@@ -125,12 +129,13 @@ static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = {
 #endif
 
 #ifdef P6_NOP1
-asm("\t.section .rodata, \"a\"\np6nops: "
+asm("\t" __stringify(__INITRODATA_OR_MODULE) "\np6nops: "
 	P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6
 	P6_NOP7 P6_NOP8
     "\t.previous");
 extern const unsigned char p6nops[];
-static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
+static const unsigned char *const __initconst_or_module
+p6_nops[ASM_NOP_MAX+1] = {
 	NULL,
 	p6nops,
 	p6nops + 1,
@@ -146,7 +151,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
 #ifdef CONFIG_X86_64
 
 extern char __vsyscall_0;
-const unsigned char *const *find_nop_table(void)
+static const unsigned char *const *__init_or_module find_nop_table(void)
 {
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
 	    boot_cpu_has(X86_FEATURE_NOPL))
@@ -157,7 +162,7 @@ const unsigned char *const *find_nop_table(void)
 
 #else /* CONFIG_X86_64 */
 
-const unsigned char *const *find_nop_table(void)
+static const unsigned char *const *__init_or_module find_nop_table(void)
 {
 	if (boot_cpu_has(X86_FEATURE_K8))
 		return k8_nops;
@@ -172,7 +177,7 @@ const unsigned char *const *find_nop_table(void)
 #endif /* CONFIG_X86_64 */
 
 /* Use this to add nops to a buffer, then text_poke the whole buffer. */
-void add_nops(void *insns, unsigned int len)
+static void __init_or_module add_nops(void *insns, unsigned int len)
 {
 	const unsigned char *const *noptable = find_nop_table();
 
@@ -185,10 +190,10 @@ void add_nops(void *insns, unsigned int len)
 		len -= noplen;
 	}
 }
-EXPORT_SYMBOL_GPL(add_nops);
 
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 extern u8 *__smp_locks[], *__smp_locks_end[];
+static void *text_poke_early(void *addr, const void *opcode, size_t len);
 
 /* Replace instructions with better alternatives for this CPU type.
    This runs before SMP is initialized to avoid SMP problems with
@@ -196,7 +201,8 @@ extern u8 *__smp_locks[], *__smp_locks_end[];
    APs have less capabilities than the boot processor are not handled.
    Tough. Make sure you disable such features by hand. */
 
-void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
+void __init_or_module apply_alternatives(struct alt_instr *start,
+					 struct alt_instr *end)
 {
 	struct alt_instr *a;
 	char insnbuf[MAX_PATCH_LEN];
@@ -279,9 +285,10 @@ static LIST_HEAD(smp_alt_modules);
 static DEFINE_MUTEX(smp_alt);
 static int smp_mode = 1;	/* protected by smp_alt */
 
-void alternatives_smp_module_add(struct module *mod, char *name,
-				 void *locks, void *locks_end,
-				 void *text,  void *text_end)
+void __init_or_module alternatives_smp_module_add(struct module *mod,
+						  char *name,
+						  void *locks, void *locks_end,
+						  void *text,  void *text_end)
 {
 	struct smp_alt_module *smp;
 
@@ -317,7 +324,7 @@ void alternatives_smp_module_add(struct module *mod, char *name,
 	mutex_unlock(&smp_alt);
 }
 
-void alternatives_smp_module_del(struct module *mod)
+void __init_or_module alternatives_smp_module_del(struct module *mod)
 {
 	struct smp_alt_module *item;
 
@@ -386,8 +393,8 @@ void alternatives_smp_switch(int smp)
 #endif
 
 #ifdef CONFIG_PARAVIRT
-void apply_paravirt(struct paravirt_patch_site *start,
-		    struct paravirt_patch_site *end)
+void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
+				     struct paravirt_patch_site *end)
 {
 	struct paravirt_patch_site *p;
 	char insnbuf[MAX_PATCH_LEN];
@@ -485,7 +492,8 @@ void __init alternative_instructions(void)
  * instructions. And on the local CPU you need to be protected again NMI or MCE
  * handlers seeing an inconsistent instruction while you patch.
  */
-void *text_poke_early(void *addr, const void *opcode, size_t len)
+static void *__init_or_module text_poke_early(void *addr, const void *opcode,
+					      size_t len)
 {
 	unsigned long flags;
 	local_irq_save(flags);
-- 
cgit v1.2.3


From a157229cabd6dd8cfa82525fc9bf730c94cc9ac2 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 22 Aug 2009 13:56:51 -0700
Subject: rcu: Simplify rcu_pending()/rcu_check_callbacks() API

All calls from outside RCU are of the form:

	if (rcu_pending(cpu))
		rcu_check_callbacks(cpu, user);

This is silly, instead we put a call to rcu_pending() in
rcu_check_callbacks(), and then make the outside calls be to
rcu_check_callbacks().  This cuts down on the code a bit and
also gives the compiler a better chance of optimizing.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: josht@linux.vnet.ibm.com
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
LKML-Reference: <125097461311-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/xen/time.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c
index fb833269017..dbeadb9c8e2 100644
--- a/arch/ia64/xen/time.c
+++ b/arch/ia64/xen/time.c
@@ -133,8 +133,7 @@ consider_steal_time(unsigned long new_itm)
 		account_idle_ticks(blocked);
 		run_local_timers();
 
-		if (rcu_pending(cpu))
-			rcu_check_callbacks(cpu, user_mode(get_irq_regs()));
+		rcu_check_callbacks(cpu, user_mode(get_irq_regs()));
 
 		scheduler_tick();
 		run_posix_cpu_timers(p);
-- 
cgit v1.2.3


From 366d19e181be873c70f4aafca3931d77d781ccd7 Mon Sep 17 00:00:00 2001
From: Tobias Doerffel <tobias.doerffel@gmail.com>
Date: Fri, 21 Aug 2009 23:06:23 +0200
Subject: x86: add specific support for Intel Atom architecture

Add another option when selecting CPU family so the kernel can be
optimized for Intel Atom CPUs. If GCC supports tuning options for
Intel Atom they will be used.

Signed-off-by: Tobias Doerffel <tobias.doerffel@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
LKML-Reference: <1251018457-19157-1-git-send-email-tobias.doerffel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig.cpu          | 19 ++++++++++++++-----
 arch/x86/Makefile             |  2 ++
 arch/x86/Makefile_32.cpu      |  2 ++
 arch/x86/include/asm/module.h |  2 ++
 4 files changed, 20 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 8130334329c..527519b8a9f 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -262,6 +262,15 @@ config MCORE2
 	  family in /proc/cpuinfo. Newer ones have 6 and older ones 15
 	  (not a typo)
 
+config MATOM
+	bool "Intel Atom"
+	---help---
+
+	  Select this for the Intel Atom platform. Intel Atom CPUs have an
+	  in-order pipelining architecture and thus can benefit from
+	  accordingly optimized code. Use a recent GCC with specific Atom
+	  support in order to fully benefit from selecting this option.
+
 config GENERIC_CPU
 	bool "Generic-x86-64"
 	depends on X86_64
@@ -295,7 +304,7 @@ config X86_CPU
 config X86_L1_CACHE_BYTES
 	int
 	default "128" if MPSC
-	default "64" if GENERIC_CPU || MK8 || MCORE2 || X86_32
+	default "64" if GENERIC_CPU || MK8 || MCORE2 || MATOM || X86_32
 
 config X86_INTERNODE_CACHE_BYTES
 	int
@@ -310,7 +319,7 @@ config X86_L1_CACHE_SHIFT
 	default "7" if MPENTIUM4 || MPSC
 	default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
 	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
-	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 || X86_GENERIC || GENERIC_CPU
+	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
 
 config X86_XADD
 	def_bool y
@@ -359,7 +368,7 @@ config X86_INTEL_USERCOPY
 
 config X86_USE_PPRO_CHECKSUM
 	def_bool y
-	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2
+	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
 
 config X86_USE_3DNOW
 	def_bool y
@@ -387,7 +396,7 @@ config X86_P6_NOP
 
 config X86_TSC
 	def_bool y
-	depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64
+	depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) && !X86_NUMAQ) || X86_64
 
 config X86_CMPXCHG64
 	def_bool y
@@ -397,7 +406,7 @@ config X86_CMPXCHG64
 # generates cmov.
 config X86_CMOV
 	def_bool y
-	depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64)
+	depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM)
 
 config X86_MINIMUM_CPU_FAMILY
 	int
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 1b68659c41b..8a4c24c96d0 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -55,6 +55,8 @@ else
 
         cflags-$(CONFIG_MCORE2) += \
                 $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
+	cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \
+		$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
         cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
         KBUILD_CFLAGS += $(cflags-y)
 
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
index 80177ec052f..30e9a264f69 100644
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -33,6 +33,8 @@ cflags-$(CONFIG_MCYRIXIII)	+= $(call cc-option,-march=c3,-march=i486) $(align)-f
 cflags-$(CONFIG_MVIAC3_2)	+= $(call cc-option,-march=c3-2,-march=i686)
 cflags-$(CONFIG_MVIAC7)		+= -march=i686
 cflags-$(CONFIG_MCORE2)		+= -march=i686 $(call tune,core2)
+cflags-$(CONFIG_MATOM)		+= $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \
+	$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
 
 # AMD Elan support
 cflags-$(CONFIG_X86_ELAN)	+= -march=i486
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 47d62743c4d..e959c4afab5 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -28,6 +28,8 @@ struct mod_arch_specific {};
 #define MODULE_PROC_FAMILY "586MMX "
 #elif defined CONFIG_MCORE2
 #define MODULE_PROC_FAMILY "CORE2 "
+#elif defined CONFIG_MATOM
+#define MODULE_PROC_FAMILY "ATOM "
 #elif defined CONFIG_M686
 #define MODULE_PROC_FAMILY "686 "
 #elif defined CONFIG_MPENTIUMII
-- 
cgit v1.2.3


From 10f02d1168585edf66229bb2ec90a42f32667a78 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 23 Aug 2009 23:17:27 +0400
Subject: x86: uv: Clean up uv_ptc_init(), use proc_create()

create_proc_entry() is getting duhprecated.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: cpw@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/tlb_uv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 77b9689f8ed..503c1f2e883 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -640,13 +640,13 @@ static int __init uv_ptc_init(void)
 	if (!is_uv_system())
 		return 0;
 
-	proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL);
+	proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL,
+				  &proc_uv_ptc_operations);
 	if (!proc_uv_ptc) {
 		printk(KERN_ERR "unable to create %s proc entry\n",
 		       UV_PTC_BASENAME);
 		return -EINVAL;
 	}
-	proc_uv_ptc->proc_fops = &proc_uv_ptc_operations;
 	return 0;
 }
 
-- 
cgit v1.2.3


From 9f0f4ae570a148c76be6e86c959c8d4ed912fb1f Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <jkrzyszt@tis.icnet.pl>
Date: Sun, 23 Aug 2009 17:56:12 +0200
Subject: ARM: OMAP: DMA: Add support for DMA channel self linking on OMAP1510

Implement DMA channel self linking on OMAP1510 using AUTO_INIT and REPEAT
flags of the DMA CCR register.

Created against linux-2.6.31-rc5.

Tested on Amstrad Delta.

Signed-off-by: Janusz Krzysztofik <jkrzyszt@tis.icnet.pl>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 arch/arm/plat-omap/dma.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c
index def14ec265b..da4cc5288db 100644
--- a/arch/arm/plat-omap/dma.c
+++ b/arch/arm/plat-omap/dma.c
@@ -1125,6 +1125,11 @@ int omap_dma_running(void)
 void omap_dma_link_lch(int lch_head, int lch_queue)
 {
 	if (omap_dma_in_1510_mode()) {
+		if (lch_head == lch_queue) {
+			dma_write(dma_read(CCR(lch_head)) | (3 << 8),
+								CCR(lch_head));
+			return;
+		}
 		printk(KERN_ERR "DMA linking is not supported in 1510 mode\n");
 		BUG();
 		return;
@@ -1147,6 +1152,11 @@ EXPORT_SYMBOL(omap_dma_link_lch);
 void omap_dma_unlink_lch(int lch_head, int lch_queue)
 {
 	if (omap_dma_in_1510_mode()) {
+		if (lch_head == lch_queue) {
+			dma_write(dma_read(CCR(lch_head)) & ~(3 << 8),
+								CCR(lch_head));
+			return;
+		}
 		printk(KERN_ERR "DMA linking is not supported in 1510 mode\n");
 		BUG();
 		return;
-- 
cgit v1.2.3


From 9b30050908fad96968497e73b88626056ea33c96 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jhnikula@gmail.com>
Date: Mon, 24 Aug 2009 17:45:50 +0300
Subject: OMAP: McBSP: Use textual values in DMA operating mode sysfs files

Use more descriptive than numerical value when showing and storing the
McBSP DMA operating mode. Show function is using similar syntax than e.g.
the led triggers so that all possible values for store function are
printed but with current value surrounded with square brackets.

Signed-off-by: Jarkko Nikula <jhnikula@gmail.com>
Cc: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Acked-by: Eduardo Valentin <eduardo.valentin@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 arch/arm/plat-omap/mcbsp.c | 48 ++++++++++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/plat-omap/mcbsp.c b/arch/arm/plat-omap/mcbsp.c
index b63a7209b41..ee60ab68251 100644
--- a/arch/arm/plat-omap/mcbsp.c
+++ b/arch/arm/plat-omap/mcbsp.c
@@ -1161,25 +1161,31 @@ static DEVICE_ATTR(prop, 0644, prop##_show, prop##_store);
 THRESHOLD_PROP_BUILDER(max_tx_thres);
 THRESHOLD_PROP_BUILDER(max_rx_thres);
 
+static const char *dma_op_modes[] = {
+	"element", "threshold", "frame",
+};
+
 static ssize_t dma_op_mode_show(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
 	struct omap_mcbsp *mcbsp = dev_get_drvdata(dev);
-	int dma_op_mode;
+	int dma_op_mode, i = 0;
+	ssize_t len = 0;
+	const char * const *s;
 
 	spin_lock_irq(&mcbsp->lock);
 	dma_op_mode = mcbsp->dma_op_mode;
 	spin_unlock_irq(&mcbsp->lock);
 
-	return sprintf(buf, "current mode: %d\n"
-			"possible mode values are:\n"
-			"%d - %s\n"
-			"%d - %s\n"
-			"%d - %s\n",
-			dma_op_mode,
-			MCBSP_DMA_MODE_ELEMENT, "element mode",
-			MCBSP_DMA_MODE_THRESHOLD, "threshold mode",
-			MCBSP_DMA_MODE_FRAME, "frame mode");
+	for (s = &dma_op_modes[i]; i < ARRAY_SIZE(dma_op_modes); s++, i++) {
+		if (dma_op_mode == i)
+			len += sprintf(buf + len, "[%s] ", *s);
+		else
+			len += sprintf(buf + len, "%s ", *s);
+	}
+	len += sprintf(buf + len, "\n");
+
+	return len;
 }
 
 static ssize_t dma_op_mode_store(struct device *dev,
@@ -1187,26 +1193,22 @@ static ssize_t dma_op_mode_store(struct device *dev,
 				const char *buf, size_t size)
 {
 	struct omap_mcbsp *mcbsp = dev_get_drvdata(dev);
-	unsigned long val;
-	int status;
+	const char * const *s;
+	int i = 0;
 
-	status = strict_strtoul(buf, 0, &val);
-	if (status)
-		return status;
+	for (s = &dma_op_modes[i]; i < ARRAY_SIZE(dma_op_modes); s++, i++)
+		if (sysfs_streq(buf, *s))
+			break;
 
-	spin_lock_irq(&mcbsp->lock);
+	if (i == ARRAY_SIZE(dma_op_modes))
+		return -EINVAL;
 
+	spin_lock_irq(&mcbsp->lock);
 	if (!mcbsp->free) {
 		size = -EBUSY;
 		goto unlock;
 	}
-
-	if (val > MCBSP_DMA_MODE_FRAME || val < MCBSP_DMA_MODE_ELEMENT) {
-		size = -EINVAL;
-		goto unlock;
-	}
-
-	mcbsp->dma_op_mode = val;
+	mcbsp->dma_op_mode = i;
 
 unlock:
 	spin_unlock_irq(&mcbsp->lock);
-- 
cgit v1.2.3


From d09a2afc9359407114b7062519101f1ee2d05388 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jhnikula@gmail.com>
Date: Sun, 23 Aug 2009 12:24:27 +0300
Subject: ARM: OMAP: McBSP: Merge two functions into omap_mcbsp_start/_stop

Functionality of functions omap_mcbsp_xmit_enable and omap_mcbsp_recv_enable
can be merged into omap_mcbsp_start and omap_mcbsp_stop since API of
those omap_mcbsp_start and omap_mcbsp_stop was changed recently allowing
to start and stop individually the transmitter and receiver.

This cleans up the code in arch/arm/plat-omap/mcbsp.c and in
sound/soc/omap/omap-mcbsp.c which was the only user for those removed
functions.

Signed-off-by: Jarkko Nikula <jhnikula@gmail.com>
Acked-by: Eero Nurkkala <ext-eero.nurkkala@nokia.com>
Cc: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 arch/arm/plat-omap/include/mach/mcbsp.h |  2 -
 arch/arm/plat-omap/mcbsp.c              | 84 +++++++++++----------------------
 2 files changed, 28 insertions(+), 58 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/plat-omap/include/mach/mcbsp.h b/arch/arm/plat-omap/include/mach/mcbsp.h
index 70e950e295e..63a3f254af7 100644
--- a/arch/arm/plat-omap/include/mach/mcbsp.h
+++ b/arch/arm/plat-omap/include/mach/mcbsp.h
@@ -436,8 +436,6 @@ int omap_mcbsp_request(unsigned int id);
 void omap_mcbsp_free(unsigned int id);
 void omap_mcbsp_start(unsigned int id, int tx, int rx);
 void omap_mcbsp_stop(unsigned int id, int tx, int rx);
-void omap_mcbsp_xmit_enable(unsigned int id, u8 enable);
-void omap_mcbsp_recv_enable(unsigned int id, u8 enable);
 void omap_mcbsp_xmit_word(unsigned int id, u32 word);
 u32 omap_mcbsp_recv_word(unsigned int id);
 
diff --git a/arch/arm/plat-omap/mcbsp.c b/arch/arm/plat-omap/mcbsp.c
index ee60ab68251..8dc7927906f 100644
--- a/arch/arm/plat-omap/mcbsp.c
+++ b/arch/arm/plat-omap/mcbsp.c
@@ -529,11 +529,13 @@ void omap_mcbsp_start(unsigned int id, int tx, int rx)
 	}
 
 	/* Enable transmitter and receiver */
+	tx &= 1;
 	w = OMAP_MCBSP_READ(io_base, SPCR2);
-	OMAP_MCBSP_WRITE(io_base, SPCR2, w | (tx & 1));
+	OMAP_MCBSP_WRITE(io_base, SPCR2, w | tx);
 
+	rx &= 1;
 	w = OMAP_MCBSP_READ(io_base, SPCR1);
-	OMAP_MCBSP_WRITE(io_base, SPCR1, w | (rx & 1));
+	OMAP_MCBSP_WRITE(io_base, SPCR1, w | rx);
 
 	/*
 	 * Worst case: CLKSRG*2 = 8000khz: (1/8000) * 2 * 2 usec
@@ -549,6 +551,16 @@ void omap_mcbsp_start(unsigned int id, int tx, int rx)
 		OMAP_MCBSP_WRITE(io_base, SPCR2, w | (1 << 7));
 	}
 
+	if (cpu_is_omap2430() || cpu_is_omap34xx()) {
+		/* Release the transmitter and receiver */
+		w = OMAP_MCBSP_READ(io_base, XCCR);
+		w &= ~(tx ? XDISABLE : 0);
+		OMAP_MCBSP_WRITE(io_base, XCCR, w);
+		w = OMAP_MCBSP_READ(io_base, RCCR);
+		w &= ~(rx ? RDISABLE : 0);
+		OMAP_MCBSP_WRITE(io_base, RCCR, w);
+	}
+
 	/* Dump McBSP Regs */
 	omap_mcbsp_dump_reg(id);
 }
@@ -570,12 +582,24 @@ void omap_mcbsp_stop(unsigned int id, int tx, int rx)
 	io_base = mcbsp->io_base;
 
 	/* Reset transmitter */
+	tx &= 1;
+	if (cpu_is_omap2430() || cpu_is_omap34xx()) {
+		w = OMAP_MCBSP_READ(io_base, XCCR);
+		w |= (tx ? XDISABLE : 0);
+		OMAP_MCBSP_WRITE(io_base, XCCR, w);
+	}
 	w = OMAP_MCBSP_READ(io_base, SPCR2);
-	OMAP_MCBSP_WRITE(io_base, SPCR2, w & ~(tx & 1));
+	OMAP_MCBSP_WRITE(io_base, SPCR2, w & ~tx);
 
 	/* Reset receiver */
+	rx &= 1;
+	if (cpu_is_omap2430() || cpu_is_omap34xx()) {
+		w = OMAP_MCBSP_READ(io_base, RCCR);
+		w |= (tx ? RDISABLE : 0);
+		OMAP_MCBSP_WRITE(io_base, RCCR, w);
+	}
 	w = OMAP_MCBSP_READ(io_base, SPCR1);
-	OMAP_MCBSP_WRITE(io_base, SPCR1, w & ~(rx & 1));
+	OMAP_MCBSP_WRITE(io_base, SPCR1, w & ~rx);
 
 	idle = !((OMAP_MCBSP_READ(io_base, SPCR2) |
 		  OMAP_MCBSP_READ(io_base, SPCR1)) & 1);
@@ -588,58 +612,6 @@ void omap_mcbsp_stop(unsigned int id, int tx, int rx)
 }
 EXPORT_SYMBOL(omap_mcbsp_stop);
 
-void omap_mcbsp_xmit_enable(unsigned int id, u8 enable)
-{
-	struct omap_mcbsp *mcbsp;
-	void __iomem *io_base;
-	u16 w;
-
-	if (!(cpu_is_omap2430() || cpu_is_omap34xx()))
-		return;
-
-	if (!omap_mcbsp_check_valid_id(id)) {
-		printk(KERN_ERR "%s: Invalid id (%d)\n", __func__, id + 1);
-		return;
-	}
-
-	mcbsp = id_to_mcbsp_ptr(id);
-	io_base = mcbsp->io_base;
-
-	w = OMAP_MCBSP_READ(io_base, XCCR);
-
-	if (enable)
-		OMAP_MCBSP_WRITE(io_base, XCCR, w & ~(XDISABLE));
-	else
-		OMAP_MCBSP_WRITE(io_base, XCCR, w | XDISABLE);
-}
-EXPORT_SYMBOL(omap_mcbsp_xmit_enable);
-
-void omap_mcbsp_recv_enable(unsigned int id, u8 enable)
-{
-	struct omap_mcbsp *mcbsp;
-	void __iomem *io_base;
-	u16 w;
-
-	if (!(cpu_is_omap2430() || cpu_is_omap34xx()))
-		return;
-
-	if (!omap_mcbsp_check_valid_id(id)) {
-		printk(KERN_ERR "%s: Invalid id (%d)\n", __func__, id + 1);
-		return;
-	}
-
-	mcbsp = id_to_mcbsp_ptr(id);
-	io_base = mcbsp->io_base;
-
-	w = OMAP_MCBSP_READ(io_base, RCCR);
-
-	if (enable)
-		OMAP_MCBSP_WRITE(io_base, RCCR, w & ~(RDISABLE));
-	else
-		OMAP_MCBSP_WRITE(io_base, RCCR, w | RDISABLE);
-}
-EXPORT_SYMBOL(omap_mcbsp_recv_enable);
-
 /* polled mcbsp i/o operations */
 int omap_mcbsp_pollwrite(unsigned int id, u16 buf)
 {
-- 
cgit v1.2.3


From 005155b1f626d2b2d7932e4afdf4fead168c6888 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Tue, 25 Aug 2009 15:35:12 +0200
Subject: x86: Fix x86_model test in es7000_apic_is_cluster()

For the x86_model to be greater than 6 or less than 12 is
logically always true.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/es7000_32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 69328ac8de9..420f95da7bf 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -167,7 +167,7 @@ static int es7000_apic_is_cluster(void)
 {
 	/* MPENTIUMIII */
 	if (boot_cpu_data.x86 == 6 &&
-	    (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11))
+	    (boot_cpu_data.x86_model >= 7 && boot_cpu_data.x86_model <= 11))
 		return 1;
 
 	return 0;
-- 
cgit v1.2.3


From 667000011927b4fcc359beac4a2447889db6d349 Mon Sep 17 00:00:00 2001
From: Josh Stone <jistone@redhat.com>
Date: Mon, 24 Aug 2009 14:43:11 -0700
Subject: tracing: Rename FTRACE_SYSCALLS for tracepoints

s/HAVE_FTRACE_SYSCALLS/HAVE_SYSCALL_TRACEPOINTS/g
s/TIF_SYSCALL_FTRACE/TIF_SYSCALL_TRACEPOINT/g

The syscall enter/exit tracing is no longer specific to just ftrace, so
they now have names that reflect their tie to tracepoints instead.

Signed-off-by: Josh Stone <jistone@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
LKML-Reference: <1251150194-1713-2-git-send-email-jistone@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/s390/Kconfig                   |  2 +-
 arch/s390/defconfig                 |  2 +-
 arch/s390/include/asm/thread_info.h |  4 ++--
 arch/s390/kernel/entry.S            |  2 +-
 arch/s390/kernel/entry64.S          |  2 +-
 arch/s390/kernel/ptrace.c           |  4 ++--
 arch/x86/Kconfig                    |  2 +-
 arch/x86/configs/i386_defconfig     |  2 +-
 arch/x86/configs/x86_64_defconfig   |  2 +-
 arch/x86/include/asm/thread_info.h  | 13 +++++++------
 arch/x86/kernel/ptrace.c            |  4 ++--
 11 files changed, 20 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 2ae5d72f47e..7238ef4c7a6 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -84,7 +84,7 @@ config S390
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	select HAVE_FTRACE_MCOUNT_RECORD
-	select HAVE_FTRACE_SYSCALLS
+	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_DEFAULT_NO_SPIN_MUTEXES
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index fcba206529f..4e91a2573cc 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -900,7 +900,7 @@ CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
 CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
-CONFIG_HAVE_FTRACE_SYSCALLS=y
+CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
 CONFIG_TRACING_SUPPORT=y
 CONFIG_FTRACE=y
 # CONFIG_FUNCTION_TRACER is not set
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index ba1cab9fc1f..07eb61b2fb3 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -92,7 +92,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_SYSCALL_TRACE	8	/* syscall trace active */
 #define TIF_SYSCALL_AUDIT	9	/* syscall auditing active */
 #define TIF_SECCOMP		10	/* secure computing */
-#define TIF_SYSCALL_FTRACE	11	/* ftrace syscall instrumentation */
+#define TIF_SYSCALL_TRACEPOINT	11	/* syscall tracepoint instrumentation */
 #define TIF_USEDFPU		16	/* FPU was used by this task this quantum (SMP) */
 #define TIF_POLLING_NRFLAG	17	/* true if poll_idle() is polling 
 					   TIF_NEED_RESCHED */
@@ -111,7 +111,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1<<TIF_SECCOMP)
-#define _TIF_SYSCALL_FTRACE	(1<<TIF_SYSCALL_FTRACE)
+#define _TIF_SYSCALL_TRACEPOINT	(1<<TIF_SYSCALL_TRACEPOINT)
 #define _TIF_USEDFPU		(1<<TIF_USEDFPU)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_31BIT		(1<<TIF_31BIT)
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index c4c80a22bc1..5d40fce878a 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -54,7 +54,7 @@ _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING)
 _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \
-		_TIF_SECCOMP>>8 | _TIF_SYSCALL_FTRACE>>8)
+		_TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8)
 
 STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
 STACK_SIZE  = 1 << STACK_SHIFT
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index f6618e9e15e..3ceb53c9c49 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -57,7 +57,7 @@ _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING)
 _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \
-		_TIF_SECCOMP>>8 | _TIF_SYSCALL_FTRACE>>8)
+		_TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8)
 
 #define BASED(name) name-system_call(%r13)
 
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index c5e87d891ca..9d3dcfa79ea 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -664,7 +664,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 		ret = -1;
 	}
 
-	if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
+	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_syscall_enter(regs, regs->gprs[2]);
 
 	if (unlikely(current->audit_context))
@@ -682,7 +682,7 @@ asmlinkage void do_syscall_trace_exit(struct pt_regs *regs)
 		audit_syscall_exit(AUDITSC_RESULT(regs->gprs[2]),
 				   regs->gprs[2]);
 
-	if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
+	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_syscall_exit(regs, regs->gprs[2]);
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 738bdc6b0f8..d59cbf758f3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -37,7 +37,7 @@ config X86
 	select HAVE_FUNCTION_GRAPH_FP_TEST
 	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
-	select HAVE_FTRACE_SYSCALLS
+	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_KVM
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_TRACEHOOK
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index edb992ebef9..d28fad19654 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -2355,7 +2355,7 @@ CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_HAVE_HW_BRANCH_TRACER=y
-CONFIG_HAVE_FTRACE_SYSCALLS=y
+CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
 CONFIG_TRACING_SUPPORT=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index cee1dd2e69b..6c86acd847a 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -2329,7 +2329,7 @@ CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_HAVE_HW_BRANCH_TRACER=y
-CONFIG_HAVE_FTRACE_SYSCALLS=y
+CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
 CONFIG_TRACING_SUPPORT=y
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index fad7d40b75f..6f7786aea4f 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -95,7 +95,7 @@ struct thread_info {
 #define TIF_DEBUGCTLMSR		25	/* uses thread_struct.debugctlmsr */
 #define TIF_DS_AREA_MSR		26      /* uses thread_struct.ds_area_msr */
 #define TIF_LAZY_MMU_UPDATES	27	/* task is updating the mmu lazily */
-#define TIF_SYSCALL_FTRACE	28	/* for ftrace syscall instrumentation */
+#define TIF_SYSCALL_TRACEPOINT	28	/* syscall tracepoint instrumentation */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
@@ -118,17 +118,17 @@ struct thread_info {
 #define _TIF_DEBUGCTLMSR	(1 << TIF_DEBUGCTLMSR)
 #define _TIF_DS_AREA_MSR	(1 << TIF_DS_AREA_MSR)
 #define _TIF_LAZY_MMU_UPDATES	(1 << TIF_LAZY_MMU_UPDATES)
-#define _TIF_SYSCALL_FTRACE	(1 << TIF_SYSCALL_FTRACE)
+#define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
 
 /* work to do in syscall_trace_enter() */
 #define _TIF_WORK_SYSCALL_ENTRY	\
-	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_FTRACE |	\
-	 _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | _TIF_SINGLESTEP)
+	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT |	\
+	 _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)
 
 /* work to do in syscall_trace_leave() */
 #define _TIF_WORK_SYSCALL_EXIT	\
 	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP |	\
-	 _TIF_SYSCALL_FTRACE)
+	 _TIF_SYSCALL_TRACEPOINT)
 
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK							\
@@ -137,7 +137,8 @@ struct thread_info {
 	   _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU))
 
 /* work to do on any return to user space */
-#define _TIF_ALLWORK_MASK ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_FTRACE)
+#define _TIF_ALLWORK_MASK						\
+	((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT)
 
 /* Only used for 64 bit */
 #define _TIF_DO_NOTIFY_MASK						\
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 34dd6f15185..a909afef44f 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1500,7 +1500,7 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
 	    tracehook_report_syscall_entry(regs))
 		ret = -1L;
 
-	if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
+	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_syscall_enter(regs, regs->orig_ax);
 
 	if (unlikely(current->audit_context)) {
@@ -1526,7 +1526,7 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
 	if (unlikely(current->audit_context))
 		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
 
-	if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
+	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_syscall_exit(regs, regs->ax);
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
-- 
cgit v1.2.3


From 97419875865859fd2403e66266c02ce028e2f5ab Mon Sep 17 00:00:00 2001
From: Josh Stone <jistone@redhat.com>
Date: Mon, 24 Aug 2009 14:43:13 -0700
Subject: tracing: Move tracepoint callbacks from declaration to definition

It's not strictly correct for the tracepoint reg/unreg callbacks to
occur when a client is hooking up, because the actual tracepoint may not
be present yet.  This happens to be fine for syscall, since that's in
the core kernel, but it would cause problems for tracepoints defined in
a module that hasn't been loaded yet.  It also means the reg/unreg has
to be EXPORTed for any modules to use the tracepoint (as in SystemTap).

This patch removes DECLARE_TRACE_WITH_CALLBACK, and instead introduces
DEFINE_TRACE_FN which stores the callbacks in struct tracepoint.  The
callbacks are used now when the active state of the tracepoint changes
in set_tracepoint & disable_tracepoint.

This also introduces TRACE_EVENT_FN, so ftrace events can also provide
registration callbacks if needed.

Signed-off-by: Josh Stone <jistone@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
LKML-Reference: <1251150194-1713-4-git-send-email-jistone@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/s390/kernel/ptrace.c | 4 ++--
 arch/x86/kernel/ptrace.c  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 9d3dcfa79ea..c05b44b80c2 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -51,8 +51,8 @@
 #include "compat_ptrace.h"
 #endif
 
-DEFINE_TRACE(syscall_enter);
-DEFINE_TRACE(syscall_exit);
+DEFINE_TRACE_FN(syscall_enter, syscall_regfunc, syscall_unregfunc);
+DEFINE_TRACE_FN(syscall_exit, syscall_regfunc, syscall_unregfunc);
 
 enum s390_regset {
 	REGSET_GENERAL,
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index a909afef44f..31e9b97ec4d 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -37,8 +37,8 @@
 
 #include <trace/syscall.h>
 
-DEFINE_TRACE(syscall_enter);
-DEFINE_TRACE(syscall_exit);
+DEFINE_TRACE_FN(syscall_enter, syscall_regfunc, syscall_unregfunc);
+DEFINE_TRACE_FN(syscall_exit, syscall_regfunc, syscall_unregfunc);
 
 #include "tls.h"
 
-- 
cgit v1.2.3


From 1c569f0264ea629c10bbab471dd0626ce4d3f19f Mon Sep 17 00:00:00 2001
From: Josh Stone <jistone@redhat.com>
Date: Mon, 24 Aug 2009 14:43:14 -0700
Subject: tracing: Create generic syscall TRACE_EVENTs

This converts the syscall_enter/exit tracepoints into TRACE_EVENTs, so
you can have generic ftrace events that capture all system calls with
arguments and return values.  These generic events are also renamed to
sys_enter/exit, so they're more closely aligned to the specific
sys_enter_foo events.

Signed-off-by: Josh Stone <jistone@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
LKML-Reference: <1251150194-1713-5-git-send-email-jistone@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/s390/kernel/ptrace.c |  8 ++++----
 arch/x86/kernel/ptrace.c  | 12 +++++-------
 2 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index c05b44b80c2..f3ddd7ac06c 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -51,8 +51,8 @@
 #include "compat_ptrace.h"
 #endif
 
-DEFINE_TRACE_FN(syscall_enter, syscall_regfunc, syscall_unregfunc);
-DEFINE_TRACE_FN(syscall_exit, syscall_regfunc, syscall_unregfunc);
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
 
 enum s390_regset {
 	REGSET_GENERAL,
@@ -665,7 +665,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 	}
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
-		trace_syscall_enter(regs, regs->gprs[2]);
+		trace_sys_enter(regs, regs->gprs[2]);
 
 	if (unlikely(current->audit_context))
 		audit_syscall_entry(is_compat_task() ?
@@ -683,7 +683,7 @@ asmlinkage void do_syscall_trace_exit(struct pt_regs *regs)
 				   regs->gprs[2]);
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
-		trace_syscall_exit(regs, regs->gprs[2]);
+		trace_sys_exit(regs, regs->gprs[2]);
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
 		tracehook_report_syscall_exit(regs, 0);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 31e9b97ec4d..8d7d5c9c1be 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -35,13 +35,11 @@
 #include <asm/proto.h>
 #include <asm/ds.h>
 
-#include <trace/syscall.h>
-
-DEFINE_TRACE_FN(syscall_enter, syscall_regfunc, syscall_unregfunc);
-DEFINE_TRACE_FN(syscall_exit, syscall_regfunc, syscall_unregfunc);
-
 #include "tls.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
 enum x86_regset {
 	REGSET_GENERAL,
 	REGSET_FP,
@@ -1501,7 +1499,7 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
 		ret = -1L;
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
-		trace_syscall_enter(regs, regs->orig_ax);
+		trace_sys_enter(regs, regs->orig_ax);
 
 	if (unlikely(current->audit_context)) {
 		if (IS_IA32)
@@ -1527,7 +1525,7 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
 		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
-		trace_syscall_exit(regs, regs->ax);
+		trace_sys_exit(regs, regs->ax);
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
 		tracehook_report_syscall_exit(regs, 0);
-- 
cgit v1.2.3


From ab94fcf528d127fcb490175512a8910f37e5b346 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Tue, 25 Aug 2009 16:47:16 -0700
Subject: x86: allow "=rm" in native_save_fl()

This is a partial revert of f1f029c7bfbf4ee1918b90a431ab823bed812504.

"=rm" is allowed in this context, because "pop" is explicitly defined
to adjust the stack pointer *before* it evaluates its effective
address, if it has one.  Thus, we do end up writing to the correct
address even if we use an on-stack memory argument.

The original reporter for f1f029c7bfbf4ee1918b90a431ab823bed812504 was
apparently using a broken x86 simulator.

[ Impact: performance ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Gabe Black <spamforgabe@umich.edu>
---
 arch/x86/include/asm/irqflags.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index c6ccbe7e81a..9e2b952f810 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -13,14 +13,13 @@ static inline unsigned long native_save_fl(void)
 	unsigned long flags;
 
 	/*
-	 * Note: this needs to be "=r" not "=rm", because we have the
-	 * stack offset from what gcc expects at the time the "pop" is
-	 * executed, and so a memory reference with respect to the stack
-	 * would end up using the wrong address.
+	 * "=rm" is safe here, because "pop" adjusts the stack before
+	 * it evaluates its effective address -- this is part of the
+	 * documented behavior of the "pop" instruction.
 	 */
 	asm volatile("# __raw_save_flags\n\t"
 		     "pushf ; pop %0"
-		     : "=r" (flags)
+		     : "=rm" (flags)
 		     : /* no input */
 		     : "memory");
 
-- 
cgit v1.2.3


From 8f3e1df48baf728bbb0f242c9dff9c9d7108218a Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Mon, 24 Aug 2009 21:53:36 +0400
Subject: x86, ioapic: Define IO_APIC_DEFAULT_PHYS_BASE constant

We already have APIC_DEFAULT_PHYS_BASE so just to be
consistent.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
LKML-Reference: <20090824175550.927946757@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/apicdef.h |  3 ++-
 arch/x86/kernel/mpparse.c      | 10 +++++-----
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 7ddb36ab933..7386bfa4f4b 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -8,7 +8,8 @@
  * Ingo Molnar <mingo@redhat.com>, 1999, 2000
  */
 
-#define	APIC_DEFAULT_PHYS_BASE	0xfee00000
+#define IO_APIC_DEFAULT_PHYS_BASE	0xfec00000
+#define	APIC_DEFAULT_PHYS_BASE		0xfee00000
 
 #define	APIC_ID		0x20
 
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 651c93b2886..fcd513bf284 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -482,11 +482,11 @@ static void __init construct_ioapic_table(int mpc_default_type)
 		MP_bus_info(&bus);
 	}
 
-	ioapic.type = MP_IOAPIC;
-	ioapic.apicid = 2;
-	ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01;
-	ioapic.flags = MPC_APIC_USABLE;
-	ioapic.apicaddr = 0xFEC00000;
+	ioapic.type	= MP_IOAPIC;
+	ioapic.apicid	= 2;
+	ioapic.apicver	= mpc_default_type > 4 ? 0x10 : 0x01;
+	ioapic.flags	= MPC_APIC_USABLE;
+	ioapic.apicaddr	= IO_APIC_DEFAULT_PHYS_BASE;
 	MP_ioapic_info(&ioapic);
 
 	/*
-- 
cgit v1.2.3


From ffc438366c2660a6a811b94ba33229bf217f8254 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Mon, 24 Aug 2009 21:53:39 +0400
Subject: x86, ioapic: Get rid of needless check and simplify
 ioapic_setup_resources()

alloc_bootmem() already panics on allocation failure. There is
no need to check the result.

Also there is a way to unbind global variable from its body and
use it as a parameter which allow us to simplify
ioapic_init_mappings as well -- "for" cycle already uses
nr_ioapics as a conditional variable and there is no need to
check if ioapic_setup_resources was returning NULL again.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <20090824175551.493629148@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/io_apic.c | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 2999f3dd588..d836b4d347e 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -4053,7 +4053,7 @@ void __init setup_ioapic_dest(void)
 
 static struct resource *ioapic_resources;
 
-static struct resource * __init ioapic_setup_resources(void)
+static struct resource * __init ioapic_setup_resources(int nr_ioapics)
 {
 	unsigned long n;
 	struct resource *res;
@@ -4069,15 +4069,13 @@ static struct resource * __init ioapic_setup_resources(void)
 	mem = alloc_bootmem(n);
 	res = (void *)mem;
 
-	if (mem != NULL) {
-		mem += sizeof(struct resource) * nr_ioapics;
+	mem += sizeof(struct resource) * nr_ioapics;
 
-		for (i = 0; i < nr_ioapics; i++) {
-			res[i].name = mem;
-			res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-			sprintf(mem,  "IOAPIC %u", i);
-			mem += IOAPIC_RESOURCE_NAME_SIZE;
-		}
+	for (i = 0; i < nr_ioapics; i++) {
+		res[i].name = mem;
+		res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		sprintf(mem,  "IOAPIC %u", i);
+		mem += IOAPIC_RESOURCE_NAME_SIZE;
 	}
 
 	ioapic_resources = res;
@@ -4091,7 +4089,7 @@ void __init ioapic_init_mappings(void)
 	struct resource *ioapic_res;
 	int i;
 
-	ioapic_res = ioapic_setup_resources();
+	ioapic_res = ioapic_setup_resources(nr_ioapics);
 	for (i = 0; i < nr_ioapics; i++) {
 		if (smp_found_config) {
 			ioapic_phys = mp_ioapics[i].apicaddr;
@@ -4120,11 +4118,9 @@ fake_ioapic_page:
 			    __fix_to_virt(idx), ioapic_phys);
 		idx++;
 
-		if (ioapic_res != NULL) {
-			ioapic_res->start = ioapic_phys;
-			ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
-			ioapic_res++;
-		}
+		ioapic_res->start = ioapic_phys;
+		ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
+		ioapic_res++;
 	}
 }
 
-- 
cgit v1.2.3


From 5051fd69773d2d044734b78516317a04d3774871 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Mon, 24 Aug 2009 21:53:37 +0400
Subject: x86, e820: Guard against array overflowed in __e820_add_region()

Better to be paranoid against unpredicted nr_map modifications.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
LKML-Reference: <20090824175551.146070377@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 7271fa33d79..2e5e0faa99b 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -115,7 +115,7 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
 {
 	int x = e820x->nr_map;
 
-	if (x == ARRAY_SIZE(e820x->map)) {
+	if (x >= ARRAY_SIZE(e820x->map)) {
 		printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
 		return;
 	}
-- 
cgit v1.2.3


From d3a247bfb2c26f5b67367d58af7ad8c2efbbc6c1 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Wed, 26 Aug 2009 21:13:24 +0400
Subject: x86, apic: Slim down stack usage in early_init_lapic_mapping()

As far as I see there is no external poking of mp_lapic_addr in
this procedure which could lead to unpredited changes and
require local storage unit for it. Lets use it plain forward.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <20090826171324.GC4548@lenovo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/apic.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 3fc3a6c428d..159740decc4 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1562,8 +1562,6 @@ no_apic:
 #ifdef CONFIG_X86_64
 void __init early_init_lapic_mapping(void)
 {
-	unsigned long phys_addr;
-
 	/*
 	 * If no local APIC can be found then go out
 	 * : it means there is no mpatable and MADT
@@ -1571,11 +1569,9 @@ void __init early_init_lapic_mapping(void)
 	if (!smp_found_config)
 		return;
 
-	phys_addr = mp_lapic_addr;
-
-	set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
+	set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
 	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
-		    APIC_BASE, phys_addr);
+		    APIC_BASE, mp_lapic_addr);
 
 	/*
 	 * Fetch the APIC ID of the BSP in case we have a
-- 
cgit v1.2.3


From 7515bf59f87f19b2a17972b74230d2f91756fe3c Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Tue, 25 Aug 2009 14:31:11 +0200
Subject: tracing: Add syscall tracepoints - s390 arch update

This patch includes s390 arch updates to synchronize with latest
core changes in the syscalls tracing area.

- tracing: Map syscall name to number (syscall_name_to_nr())
- tracing: Call arch_init_ftrace_syscalls at boot
- tracing: add support tracepoint ids (set_syscall_{enter,exit}_id())

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
LKML-Reference: <20090825123111.GD4639@cetus.boeblingen.de.ibm.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/s390/kernel/ftrace.c | 36 +++++++++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 3e298e64f0d..57bdcb1e3cd 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -220,6 +220,29 @@ struct syscall_metadata *syscall_nr_to_meta(int nr)
 	return syscalls_metadata[nr];
 }
 
+int syscall_name_to_nr(char *name)
+{
+	int i;
+
+	if (!syscalls_metadata)
+		return -1;
+	for (i = 0; i < NR_syscalls; i++)
+		if (syscalls_metadata[i])
+			if (!strcmp(syscalls_metadata[i]->name, name))
+				return i;
+	return -1;
+}
+
+void set_syscall_enter_id(int num, int id)
+{
+	syscalls_metadata[num]->enter_id = id;
+}
+
+void set_syscall_exit_id(int num, int id)
+{
+	syscalls_metadata[num]->exit_id = id;
+}
+
 static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
 {
 	struct syscall_metadata *start;
@@ -237,24 +260,19 @@ static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
 	return NULL;
 }
 
-void arch_init_ftrace_syscalls(void)
+static int __init arch_init_ftrace_syscalls(void)
 {
 	struct syscall_metadata *meta;
 	int i;
-	static atomic_t refs;
-
-	if (atomic_inc_return(&refs) != 1)
-		goto out;
 	syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * NR_syscalls,
 				    GFP_KERNEL);
 	if (!syscalls_metadata)
-		goto out;
+		return -ENOMEM;
 	for (i = 0; i < NR_syscalls; i++) {
 		meta = find_syscall_meta((unsigned long)sys_call_table[i]);
 		syscalls_metadata[i] = meta;
 	}
-	return;
-out:
-	atomic_dec(&refs);
+	return 0;
 }
+arch_initcall(arch_init_ftrace_syscalls);
 #endif
-- 
cgit v1.2.3


From dd86dda24cc1dc70031a7d9250dc3c0c430a50e2 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Mon, 24 Aug 2009 17:40:14 -0400
Subject: tracing: Define NR_syscalls for x86 (32)

Add a NR_syscalls #define for x86. This is used in the syscall events
tracing code. Todo: make it dynamic like x86_64.

NR_syscalls is the usual name used to determine the number of syscalls
supported by the current arch. We want to unify the use of this number
across archs that support the syscall tracing. This also prepare to move
some of the arch code to core code in the syscall tracing area.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Josh Stone <jistone@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: H. Peter Anwin <hpa@zytor.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
LKML-Reference: <0f33c0f96d198fccc3ddd9ff7f5334ff5cb42706.1251146513.git.jbaron@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/include/asm/unistd_32.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 732a3070615..8deaada61bc 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -345,6 +345,8 @@
 
 #ifdef __KERNEL__
 
+#define NR_syscalls 337
+
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_OLD_STAT
-- 
cgit v1.2.3


From a5a2f8e2acb991327952c45a13f5441fc09dffd6 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Wed, 26 Aug 2009 12:09:10 -0400
Subject: tracing: Define NR_syscalls for x86_64

Express the available number of syscalls in a standard way by defining
NR_syscalls.

The common way to define it is to place its definition in asm/unistd.h
However, the number of syscalls is defined using __NR_syscall_max in
x86-64 after building a dynamic header file "asm-offsets.h"

The source file that generates this header, asm-offsets-64.c includes
unistd.h, then if we want to express NR_syscalls from __NR_syscall_max
in unistd.h only after generating the dynamic header file, we need a
watchguard.

If unistd.h is included from asm-offsets-64.c, then we are generating
asm-offset.h which defines __NR_syscall_max. At this time, we don't
want to (we can't) define NR_syscalls, then we do nothing.
Otherwise we define NR_syscalls because we know asm-offsets.h has
been generated.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Josh Stone <jistone@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: H. Peter Anwin <hpa@zytor.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
LKML-Reference: <20090826160910.GB2658@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/include/asm/unistd_64.h | 6 ++++++
 arch/x86/kernel/asm-offsets_64.c | 1 +
 2 files changed, 7 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 900e1617e67..b9f3c60de5f 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -688,6 +688,12 @@ __SYSCALL(__NR_perf_counter_open, sys_perf_counter_open)
 #endif	/* __NO_STUBS */
 
 #ifdef __KERNEL__
+
+#ifndef COMPILE_OFFSETS
+#include <asm/asm-offsets.h>
+#define NR_syscalls (__NR_syscall_max + 1)
+#endif
+
 /*
  * "Conditional" syscalls
  *
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 898ecc47e12..4a6aeedcd96 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -3,6 +3,7 @@
  * This code generates raw asm output which is post-processed to extract
  * and format the required data.
  */
+#define COMPILE_OFFSETS
 
 #include <linux/crypto.h>
 #include <linux/sched.h> 
-- 
cgit v1.2.3


From 57421dbbdc932d65f0e6a41ebb027a2bfe3d0669 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Mon, 24 Aug 2009 17:40:22 -0400
Subject: tracing: Convert event tracing code to use NR_syscalls

Convert the syscalls event tracing code to use NR_syscalls, instead of
FTRACE_SYSCALL_MAX. NR_syscalls is standard accross most arches, and
reduces code confusion/complexity.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Josh Stone <jistone@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: H. Peter Anwin <hpa@zytor.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
LKML-Reference: <9b4f1a84ecae57cc6599412772efa36f0d2b815b.1251146513.git.jbaron@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/kernel/ftrace.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 3cff1214e17..9dbb527e165 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -494,7 +494,7 @@ static struct syscall_metadata *find_syscall_meta(unsigned long *syscall)
 
 struct syscall_metadata *syscall_nr_to_meta(int nr)
 {
-	if (!syscalls_metadata || nr >= FTRACE_SYSCALL_MAX || nr < 0)
+	if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
 		return NULL;
 
 	return syscalls_metadata[nr];
@@ -507,7 +507,7 @@ int syscall_name_to_nr(char *name)
 	if (!syscalls_metadata)
 		return -1;
 
-	for (i = 0; i < FTRACE_SYSCALL_MAX; i++) {
+	for (i = 0; i < NR_syscalls; i++) {
 		if (syscalls_metadata[i]) {
 			if (!strcmp(syscalls_metadata[i]->name, name))
 				return i;
@@ -533,13 +533,13 @@ static int __init arch_init_ftrace_syscalls(void)
 	unsigned long **psys_syscall_table = &sys_call_table;
 
 	syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
-					FTRACE_SYSCALL_MAX, GFP_KERNEL);
+					NR_syscalls, GFP_KERNEL);
 	if (!syscalls_metadata) {
 		WARN_ON(1);
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < FTRACE_SYSCALL_MAX; i++) {
+	for (i = 0; i < NR_syscalls; i++) {
 		meta = find_syscall_meta(psys_syscall_table[i]);
 		syscalls_metadata[i] = meta;
 	}
-- 
cgit v1.2.3


From 117226d15850387b55fd01675917ee4fcb9699e8 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Mon, 24 Aug 2009 17:40:26 -0400
Subject: tracing: Remove FTRACE_SYSCALL_MAX definitions

Remove the FTRACE_SYSCALL_MAX definitions now that we have converted the
syscall event tracing code to use NR_syscalls.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Josh Stone <jistone@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: H. Peter Anwin <hpa@zytor.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
LKML-Reference: <f2240cdc8f0b1ca7617390c8f5ec90ba2bd348cf.1251146513.git.jbaron@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/include/asm/ftrace.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 71136545187..db24c2278be 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -28,13 +28,6 @@
 
 #endif
 
-/* FIXME: I don't want to stay hardcoded */
-#ifdef CONFIG_X86_64
-# define FTRACE_SYSCALL_MAX     299
-#else
-# define FTRACE_SYSCALL_MAX     337
-#endif
-
 #ifdef CONFIG_FUNCTION_TRACER
 #define MCOUNT_ADDR		((long)(mcount))
 #define MCOUNT_INSN_SIZE	5 /* sizeof mcount call */
-- 
cgit v1.2.3


From 2a4ab640d3c28c2952967e5f63ea495555bf2a5f Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Tue, 7 Jul 2009 23:01:15 -0400
Subject: ACPI, x86: expose some IO-APIC routines when CONFIG_ACPI=n

Some IO-APIC routines are ACPI specific now, but need to
be exposed when CONFIG_ACPI=n for the benefit of SFI.

Remove #ifdef ACPI around these routines:

io_apic_get_unique_id(int ioapic, int apic_id);
io_apic_get_version(int ioapic);
io_apic_get_redir_entries(int ioapic);

Move these routines from ACPI-specific boot.c to io_apic.c:

uniq_ioapic_id(u8 id)
mp_find_ioapic()
mp_find_ioapic_pin()
mp_register_ioapic()

Also, since uniq_ioapic_id() is now no longer static,
re-name it to io_apic_unique_id() for consistency
with the other public io_apic routines.

For simplicity, do not #ifdef the resulting code ACPI || SFI,
thought that could be done in the future if it is important
to optimize the !ACPI !SFI IO-APIC x86 kernel for size.

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
Cc: x86@kernel.org
---
 arch/x86/include/asm/io_apic.h |  13 +++++-
 arch/x86/kernel/acpi/boot.c    | 102 +---------------------------------------
 arch/x86/kernel/apic/io_apic.c | 103 ++++++++++++++++++++++++++++++++++++++---
 3 files changed, 109 insertions(+), 109 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 330ee807f89..85232d32fcb 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -150,11 +150,10 @@ extern int timer_through_8259;
 #define io_apic_assign_pci_irqs \
 	(mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
 
-#ifdef CONFIG_ACPI
+extern u8 io_apic_unique_id(u8 id);
 extern int io_apic_get_unique_id(int ioapic, int apic_id);
 extern int io_apic_get_version(int ioapic);
 extern int io_apic_get_redir_entries(int ioapic);
-#endif /* CONFIG_ACPI */
 
 struct io_apic_irq_attr;
 extern int io_apic_set_pci_routing(struct device *dev, int irq,
@@ -177,6 +176,16 @@ extern int setup_ioapic_entry(int apic, int irq,
 			      int polarity, int vector, int pin);
 extern void ioapic_write_entry(int apic, int pin,
 			       struct IO_APIC_route_entry e);
+
+struct mp_ioapic_gsi{
+	int gsi_base;
+	int gsi_end;
+};
+extern struct mp_ioapic_gsi  mp_gsi_routing[];
+int mp_find_ioapic(int gsi);
+int mp_find_ioapic_pin(int ioapic, int gsi);
+void __init mp_register_ioapic(int id, u32 address, u32 gsi_base);
+
 #else  /* !CONFIG_X86_IO_APIC */
 #define io_apic_assign_pci_irqs 0
 static const int timer_through_8259 = 0;
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 6b8ca3a0285..7e62d1eb8e3 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -833,106 +833,6 @@ static int __init acpi_parse_madt_lapic_entries(void)
 extern int es7000_plat;
 #endif
 
-static struct {
-	int gsi_base;
-	int gsi_end;
-} mp_ioapic_routing[MAX_IO_APICS];
-
-int mp_find_ioapic(int gsi)
-{
-	int i = 0;
-
-	/* Find the IOAPIC that manages this GSI. */
-	for (i = 0; i < nr_ioapics; i++) {
-		if ((gsi >= mp_ioapic_routing[i].gsi_base)
-		    && (gsi <= mp_ioapic_routing[i].gsi_end))
-			return i;
-	}
-
-	printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
-	return -1;
-}
-
-int mp_find_ioapic_pin(int ioapic, int gsi)
-{
-	if (WARN_ON(ioapic == -1))
-		return -1;
-	if (WARN_ON(gsi > mp_ioapic_routing[ioapic].gsi_end))
-		return -1;
-
-	return gsi - mp_ioapic_routing[ioapic].gsi_base;
-}
-
-static u8 __init uniq_ioapic_id(u8 id)
-{
-#ifdef CONFIG_X86_32
-	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
-	    !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
-		return io_apic_get_unique_id(nr_ioapics, id);
-	else
-		return id;
-#else
-	int i;
-	DECLARE_BITMAP(used, 256);
-	bitmap_zero(used, 256);
-	for (i = 0; i < nr_ioapics; i++) {
-		struct mpc_ioapic *ia = &mp_ioapics[i];
-		__set_bit(ia->apicid, used);
-	}
-	if (!test_bit(id, used))
-		return id;
-	return find_first_zero_bit(used, 256);
-#endif
-}
-
-static int bad_ioapic(unsigned long address)
-{
-	if (nr_ioapics >= MAX_IO_APICS) {
-		printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
-		       "(found %d)\n", MAX_IO_APICS, nr_ioapics);
-		panic("Recompile kernel with bigger MAX_IO_APICS!\n");
-	}
-	if (!address) {
-		printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
-		       " found in table, skipping!\n");
-		return 1;
-	}
-	return 0;
-}
-
-void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
-{
-	int idx = 0;
-
-	if (bad_ioapic(address))
-		return;
-
-	idx = nr_ioapics;
-
-	mp_ioapics[idx].type = MP_IOAPIC;
-	mp_ioapics[idx].flags = MPC_APIC_USABLE;
-	mp_ioapics[idx].apicaddr = address;
-
-	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
-	mp_ioapics[idx].apicid = uniq_ioapic_id(id);
-	mp_ioapics[idx].apicver = io_apic_get_version(idx);
-
-	/*
-	 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
-	 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
-	 */
-	mp_ioapic_routing[idx].gsi_base = gsi_base;
-	mp_ioapic_routing[idx].gsi_end = gsi_base +
-	    io_apic_get_redir_entries(idx);
-
-	printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
-	       "GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
-	       mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr,
-	       mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
-
-	nr_ioapics++;
-}
-
 int __init acpi_probe_gsi(void)
 {
 	int idx;
@@ -947,7 +847,7 @@ int __init acpi_probe_gsi(void)
 
 	max_gsi = 0;
 	for (idx = 0; idx < nr_ioapics; idx++) {
-		gsi = mp_ioapic_routing[idx].gsi_end;
+		gsi = mp_gsi_routing[idx].gsi_end;
 
 		if (gsi > max_gsi)
 			max_gsi = gsi;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index d2ed6c5ddc8..a8c0232b389 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -85,6 +85,9 @@ int nr_ioapic_registers[MAX_IO_APICS];
 struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
 int nr_ioapics;
 
+/* IO APIC gsi routing info */
+struct mp_ioapic_gsi  mp_gsi_routing[MAX_IO_APICS];
+
 /* MP IRQ source entries */
 struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
 
@@ -3941,11 +3944,28 @@ int io_apic_set_pci_routing(struct device *dev, int irq,
 	return __io_apic_set_pci_routing(dev, irq, irq_attr);
 }
 
-/* --------------------------------------------------------------------------
-                          ACPI-based IOAPIC Configuration
-   -------------------------------------------------------------------------- */
+u8 __init io_apic_unique_id(u8 id)
+{
+#ifdef CONFIG_X86_32
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+	    !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+		return io_apic_get_unique_id(nr_ioapics, id);
+	else
+		return id;
+#else
+	int i;
+	DECLARE_BITMAP(used, 256);
 
-#ifdef CONFIG_ACPI
+	bitmap_zero(used, 256);
+	for (i = 0; i < nr_ioapics; i++) {
+		struct mpc_ioapic *ia = &mp_ioapics[i];
+		__set_bit(ia->apicid, used);
+	}
+	if (!test_bit(id, used))
+		return id;
+	return find_first_zero_bit(used, 256);
+#endif
+}
 
 #ifdef CONFIG_X86_32
 int __init io_apic_get_unique_id(int ioapic, int apic_id)
@@ -4054,8 +4074,6 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 	return 0;
 }
 
-#endif /* CONFIG_ACPI */
-
 /*
  * This function currently is only a helper for the i386 smp boot process where
  * we need to reprogram the ioredtbls to cater for the cpus which have come online
@@ -4201,3 +4219,76 @@ void __init ioapic_insert_resources(void)
 		r++;
 	}
 }
+
+int mp_find_ioapic(int gsi)
+{
+	int i = 0;
+
+	/* Find the IOAPIC that manages this GSI. */
+	for (i = 0; i < nr_ioapics; i++) {
+		if ((gsi >= mp_gsi_routing[i].gsi_base)
+		    && (gsi <= mp_gsi_routing[i].gsi_end))
+			return i;
+	}
+
+	printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
+	return -1;
+}
+
+int mp_find_ioapic_pin(int ioapic, int gsi)
+{
+	if (WARN_ON(ioapic == -1))
+		return -1;
+	if (WARN_ON(gsi > mp_gsi_routing[ioapic].gsi_end))
+		return -1;
+
+	return gsi - mp_gsi_routing[ioapic].gsi_base;
+}
+
+static int bad_ioapic(unsigned long address)
+{
+	if (nr_ioapics >= MAX_IO_APICS) {
+		printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded "
+		       "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics);
+		return 1;
+	}
+	if (!address) {
+		printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address"
+		       " found in table, skipping!\n");
+		return 1;
+	}
+	return 0;
+}
+
+void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
+{
+	int idx = 0;
+
+	if (bad_ioapic(address))
+		return;
+
+	idx = nr_ioapics;
+
+	mp_ioapics[idx].type = MP_IOAPIC;
+	mp_ioapics[idx].flags = MPC_APIC_USABLE;
+	mp_ioapics[idx].apicaddr = address;
+
+	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+	mp_ioapics[idx].apicid = io_apic_unique_id(id);
+	mp_ioapics[idx].apicver = io_apic_get_version(idx);
+
+	/*
+	 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
+	 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
+	 */
+	mp_gsi_routing[idx].gsi_base = gsi_base;
+	mp_gsi_routing[idx].gsi_end = gsi_base +
+	    io_apic_get_redir_entries(idx);
+
+	printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
+	       "GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
+	       mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr,
+	       mp_gsi_routing[idx].gsi_base, mp_gsi_routing[idx].gsi_end);
+
+	nr_ioapics++;
+}
-- 
cgit v1.2.3


From 4923576b8ac5bfd36ab2beb176aeb747aaab7e41 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <shimoda.yoshihiro@renesas.com>
Date: Thu, 27 Aug 2009 23:25:03 +0000
Subject: net: sh_eth: add value of ether_link pin in platform_data

The method of ETHER_LINK pin is board dependence.
This patch adding paramters are:
 - no_ether_link          : If set to 1, do not use ETHER_LINK
 - ether_link_active_low  : If set to 1, ETHER_LINK is active low.

Signed-off-by: Yoshihiro Shimoda <shimoda.yoshihiro@renesas.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sh/include/asm/sh_eth.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/sh/include/asm/sh_eth.h b/arch/sh/include/asm/sh_eth.h
index bb832584f3c..acf99700dee 100644
--- a/arch/sh/include/asm/sh_eth.h
+++ b/arch/sh/include/asm/sh_eth.h
@@ -6,6 +6,9 @@ enum {EDMAC_LITTLE_ENDIAN, EDMAC_BIG_ENDIAN};
 struct sh_eth_plat_data {
 	int phy;
 	int edmac_endian;
+
+	unsigned no_ether_link:1;
+	unsigned ether_link_active_low:1;
 };
 
 #endif
-- 
cgit v1.2.3


From 23386d63bbb3199cf247313ec088878d72debcfd Mon Sep 17 00:00:00 2001
From: Michal Schmidt <mschmidt@redhat.com>
Date: Sat, 29 Aug 2009 18:27:18 +0200
Subject: x86: Detect stack protector for i386 builds on x86_64

Stack protector support was not detected when building with
ARCH=i386 on x86_64 systems:

  arch/x86/Makefile:80: stack protector enabled but no compiler support

The "-m32" argument needs to be passed to the detection script.

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Arjan van de Ven <arjan@infradead.org>
LKML-Reference: <20090829182718.10f566b1@leela>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
--
---
 arch/x86/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 1b68659c41b..5e7db44d709 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -72,7 +72,7 @@ endif
 
 ifdef CONFIG_CC_STACKPROTECTOR
 	cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh
-        ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC)),y)
+        ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(biarch)),y)
                 stackp-y := -fstack-protector
                 stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += -fstack-protector-all
                 KBUILD_CFLAGS += $(stackp-y)
-- 
cgit v1.2.3


From ed24157ede901608e00f28b4897398a373e1e926 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Thu, 27 Aug 2009 07:35:50 +0000
Subject: powerpc/qe: Implement qe_alive_during_sleep() helper function

In some CPUs (i.e. MPC8569) QE shuts down completely during sleep,
drivers may want to know that to reinitialize registers and buffer
descriptors.

This patch implements qe_alive_during_sleep() helper function, so far
it just checks if MPC8569-compatible power management controller is
present, which is a sign that QE turns off during sleep.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/powerpc/include/asm/qe.h   |  1 +
 arch/powerpc/sysdev/qe_lib/qe.c | 13 +++++++++++++
 2 files changed, 14 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/qe.h b/arch/powerpc/include/asm/qe.h
index 157c5ca581c..f388f0ab193 100644
--- a/arch/powerpc/include/asm/qe.h
+++ b/arch/powerpc/include/asm/qe.h
@@ -154,6 +154,7 @@ int qe_get_snum(void);
 void qe_put_snum(u8 snum);
 unsigned int qe_get_num_of_risc(void);
 unsigned int qe_get_num_of_snums(void);
+int qe_alive_during_sleep(void);
 
 /* we actually use cpm_muram implementation, define this for convenience */
 #define qe_muram_init cpm_muram_init
diff --git a/arch/powerpc/sysdev/qe_lib/qe.c b/arch/powerpc/sysdev/qe_lib/qe.c
index 237e3654f48..464271bea6c 100644
--- a/arch/powerpc/sysdev/qe_lib/qe.c
+++ b/arch/powerpc/sysdev/qe_lib/qe.c
@@ -65,6 +65,19 @@ static unsigned int qe_num_of_snum;
 
 static phys_addr_t qebase = -1;
 
+int qe_alive_during_sleep(void)
+{
+	static int ret = -1;
+
+	if (ret != -1)
+		return ret;
+
+	ret = !of_find_compatible_node(NULL, NULL, "fsl,mpc8569-pmc");
+
+	return ret;
+}
+EXPORT_SYMBOL(qe_alive_during_sleep);
+
 phys_addr_t get_qe_base(void)
 {
 	struct device_node *qe;
-- 
cgit v1.2.3


From 8307a98097222f4d9c2e62ebccd6f5df439328de Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 31 Aug 2009 14:43:31 +0200
Subject: locking, powerpc: Rename __spin_try_lock() and friends

Needed to avoid namespace conflicts when the common code
function bodies of _spin_try_lock() etc. are moved to a header
file where the function name would be __spin_try_lock().

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Horst Hartmann <horsth@linux.vnet.ibm.com>
Cc: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: David Miller <davem@davemloft.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Roman Zippel <zippel@linux-m68k.org>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <20090831124415.918799705@de.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/include/asm/spinlock.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index c3b193121f8..198266cf9e2 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -54,7 +54,7 @@
  * This returns the old value in the lock, so we succeeded
  * in getting the lock if the return value is 0.
  */
-static inline unsigned long __spin_trylock(raw_spinlock_t *lock)
+static inline unsigned long arch_spin_trylock(raw_spinlock_t *lock)
 {
 	unsigned long tmp, token;
 
@@ -76,7 +76,7 @@ static inline unsigned long __spin_trylock(raw_spinlock_t *lock)
 static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 {
 	CLEAR_IO_SYNC;
-	return __spin_trylock(lock) == 0;
+	return arch_spin_trylock(lock) == 0;
 }
 
 /*
@@ -108,7 +108,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
 {
 	CLEAR_IO_SYNC;
 	while (1) {
-		if (likely(__spin_trylock(lock) == 0))
+		if (likely(arch_spin_trylock(lock) == 0))
 			break;
 		do {
 			HMT_low();
@@ -126,7 +126,7 @@ void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
 
 	CLEAR_IO_SYNC;
 	while (1) {
-		if (likely(__spin_trylock(lock) == 0))
+		if (likely(arch_spin_trylock(lock) == 0))
 			break;
 		local_save_flags(flags_dis);
 		local_irq_restore(flags);
@@ -181,7 +181,7 @@ extern void __raw_spin_unlock_wait(raw_spinlock_t *lock);
  * This returns the old value in the lock + 1,
  * so we got a read lock if the return value is > 0.
  */
-static inline long __read_trylock(raw_rwlock_t *rw)
+static inline long arch_read_trylock(raw_rwlock_t *rw)
 {
 	long tmp;
 
@@ -205,7 +205,7 @@ static inline long __read_trylock(raw_rwlock_t *rw)
  * This returns the old value in the lock,
  * so we got the write lock if the return value is 0.
  */
-static inline long __write_trylock(raw_rwlock_t *rw)
+static inline long arch_write_trylock(raw_rwlock_t *rw)
 {
 	long tmp, token;
 
@@ -228,7 +228,7 @@ static inline long __write_trylock(raw_rwlock_t *rw)
 static inline void __raw_read_lock(raw_rwlock_t *rw)
 {
 	while (1) {
-		if (likely(__read_trylock(rw) > 0))
+		if (likely(arch_read_trylock(rw) > 0))
 			break;
 		do {
 			HMT_low();
@@ -242,7 +242,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw)
 static inline void __raw_write_lock(raw_rwlock_t *rw)
 {
 	while (1) {
-		if (likely(__write_trylock(rw) == 0))
+		if (likely(arch_write_trylock(rw) == 0))
 			break;
 		do {
 			HMT_low();
@@ -255,12 +255,12 @@ static inline void __raw_write_lock(raw_rwlock_t *rw)
 
 static inline int __raw_read_trylock(raw_rwlock_t *rw)
 {
-	return __read_trylock(rw) > 0;
+	return arch_read_trylock(rw) > 0;
 }
 
 static inline int __raw_write_trylock(raw_rwlock_t *rw)
 {
-	return __write_trylock(rw) == 0;
+	return arch_write_trylock(rw) == 0;
 }
 
 static inline void __raw_read_unlock(raw_rwlock_t *rw)
-- 
cgit v1.2.3


From 9f34ceb60357a7166c929d7b52bd057ad7ffc54b Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 31 Aug 2009 14:43:32 +0200
Subject: locking, sparc: Rename __spin_try_lock() and friends

Needed to avoid namespace conflicts when the common code
function bodies of _spin_try_lock() etc. are moved to a header
file where the function name would be __spin_try_lock().

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Horst Hartmann <horsth@linux.vnet.ibm.com>
Cc: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: David Miller <davem@davemloft.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Roman Zippel <zippel@linux-m68k.org>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <20090831124416.306495811@de.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/sparc/include/asm/spinlock_32.h | 12 ++++++------
 arch/sparc/include/asm/spinlock_64.h | 28 ++++++++++++++--------------
 2 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
index 46f91ab66a5..857630cff63 100644
--- a/arch/sparc/include/asm/spinlock_32.h
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -76,7 +76,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
  *
  * Unfortunately this scheme limits us to ~16,000,000 cpus.
  */
-static inline void __read_lock(raw_rwlock_t *rw)
+static inline void arch_read_lock(raw_rwlock_t *rw)
 {
 	register raw_rwlock_t *lp asm("g1");
 	lp = rw;
@@ -92,11 +92,11 @@ static inline void __read_lock(raw_rwlock_t *rw)
 #define __raw_read_lock(lock) \
 do {	unsigned long flags; \
 	local_irq_save(flags); \
-	__read_lock(lock); \
+	arch_read_lock(lock); \
 	local_irq_restore(flags); \
 } while(0)
 
-static inline void __read_unlock(raw_rwlock_t *rw)
+static inline void arch_read_unlock(raw_rwlock_t *rw)
 {
 	register raw_rwlock_t *lp asm("g1");
 	lp = rw;
@@ -112,7 +112,7 @@ static inline void __read_unlock(raw_rwlock_t *rw)
 #define __raw_read_unlock(lock) \
 do {	unsigned long flags; \
 	local_irq_save(flags); \
-	__read_unlock(lock); \
+	arch_read_unlock(lock); \
 	local_irq_restore(flags); \
 } while(0)
 
@@ -150,7 +150,7 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw)
 	return (val == 0);
 }
 
-static inline int __read_trylock(raw_rwlock_t *rw)
+static inline int arch_read_trylock(raw_rwlock_t *rw)
 {
 	register raw_rwlock_t *lp asm("g1");
 	register int res asm("o0");
@@ -169,7 +169,7 @@ static inline int __read_trylock(raw_rwlock_t *rw)
 ({	unsigned long flags; \
 	int res; \
 	local_irq_save(flags); \
-	res = __read_trylock(lock); \
+	res = arch_read_trylock(lock); \
 	local_irq_restore(flags); \
 	res; \
 })
diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h
index f6b2b92ad8d..43e51478358 100644
--- a/arch/sparc/include/asm/spinlock_64.h
+++ b/arch/sparc/include/asm/spinlock_64.h
@@ -92,7 +92,7 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long fla
 
 /* Multi-reader locks, these are much saner than the 32-bit Sparc ones... */
 
-static void inline __read_lock(raw_rwlock_t *lock)
+static void inline arch_read_lock(raw_rwlock_t *lock)
 {
 	unsigned long tmp1, tmp2;
 
@@ -115,7 +115,7 @@ static void inline __read_lock(raw_rwlock_t *lock)
 	: "memory");
 }
 
-static int inline __read_trylock(raw_rwlock_t *lock)
+static int inline arch_read_trylock(raw_rwlock_t *lock)
 {
 	int tmp1, tmp2;
 
@@ -136,7 +136,7 @@ static int inline __read_trylock(raw_rwlock_t *lock)
 	return tmp1;
 }
 
-static void inline __read_unlock(raw_rwlock_t *lock)
+static void inline arch_read_unlock(raw_rwlock_t *lock)
 {
 	unsigned long tmp1, tmp2;
 
@@ -152,7 +152,7 @@ static void inline __read_unlock(raw_rwlock_t *lock)
 	: "memory");
 }
 
-static void inline __write_lock(raw_rwlock_t *lock)
+static void inline arch_write_lock(raw_rwlock_t *lock)
 {
 	unsigned long mask, tmp1, tmp2;
 
@@ -177,7 +177,7 @@ static void inline __write_lock(raw_rwlock_t *lock)
 	: "memory");
 }
 
-static void inline __write_unlock(raw_rwlock_t *lock)
+static void inline arch_write_unlock(raw_rwlock_t *lock)
 {
 	__asm__ __volatile__(
 "	stw		%%g0, [%0]"
@@ -186,7 +186,7 @@ static void inline __write_unlock(raw_rwlock_t *lock)
 	: "memory");
 }
 
-static int inline __write_trylock(raw_rwlock_t *lock)
+static int inline arch_write_trylock(raw_rwlock_t *lock)
 {
 	unsigned long mask, tmp1, tmp2, result;
 
@@ -210,14 +210,14 @@ static int inline __write_trylock(raw_rwlock_t *lock)
 	return result;
 }
 
-#define __raw_read_lock(p)	__read_lock(p)
-#define __raw_read_lock_flags(p, f) __read_lock(p)
-#define __raw_read_trylock(p)	__read_trylock(p)
-#define __raw_read_unlock(p)	__read_unlock(p)
-#define __raw_write_lock(p)	__write_lock(p)
-#define __raw_write_lock_flags(p, f) __write_lock(p)
-#define __raw_write_unlock(p)	__write_unlock(p)
-#define __raw_write_trylock(p)	__write_trylock(p)
+#define __raw_read_lock(p)	arch_read_lock(p)
+#define __raw_read_lock_flags(p, f) arch_read_lock(p)
+#define __raw_read_trylock(p)	arch_read_trylock(p)
+#define __raw_read_unlock(p)	arch_read_unlock(p)
+#define __raw_write_lock(p)	arch_write_lock(p)
+#define __raw_write_lock_flags(p, f) arch_write_lock(p)
+#define __raw_write_unlock(p)	arch_write_unlock(p)
+#define __raw_write_trylock(p)	arch_write_trylock(p)
 
 #define __raw_read_can_lock(rw)		(!((rw)->lock & 0x80000000UL))
 #define __raw_write_can_lock(rw)	(!(rw)->lock)
-- 
cgit v1.2.3


From f159ee782990aacb5494738c98f13a2aa61dbb4a Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 31 Aug 2009 14:43:33 +0200
Subject: locking, m68k/asm-offsets: Rename pt_regs offset defines

In order to be able to use asm-offsets.h in C files the
existing namespace conflicts must be solved first. In
asm-offsets.h e.g. PT_D0 gets defined which is the offset of
the d0 member of the pt_regs structure. However a same define
(with a different meaning) exists in asm/ptregs.h.

So rename the defines created with the asm-offset mechanism to
PT_OFF_D0 etc. There also already exist a few defines with
these names that have the same meaning. So remove the existing
defines and use the asm-offset generated ones.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Horst Hartmann <horsth@linux.vnet.ibm.com>
Cc: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: David Miller <davem@davemloft.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Roman Zippel <zippel@linux-m68k.org>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <20090831124416.666403991@de.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/m68k/include/asm/entry_mm.h |  3 ---
 arch/m68k/include/asm/entry_no.h |  8 ++++----
 arch/m68k/include/asm/math-emu.h | 20 ++++++++++----------
 arch/m68k/kernel/asm-offsets.c   | 26 +++++++++++++-------------
 arch/m68k/kernel/entry.S         | 22 +++++++++++-----------
 arch/m68k/math-emu/fp_entry.S    | 30 +++++++++++++++---------------
 6 files changed, 53 insertions(+), 56 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/include/asm/entry_mm.h b/arch/m68k/include/asm/entry_mm.h
index 5202f5a5b42..4fcfc28c061 100644
--- a/arch/m68k/include/asm/entry_mm.h
+++ b/arch/m68k/include/asm/entry_mm.h
@@ -118,9 +118,6 @@ PT_DTRACE_BIT = 2
 #define STR(X) STR1(X)
 #define STR1(X) #X
 
-#define PT_OFF_ORIG_D0	 0x24
-#define PT_OFF_FORMATVEC 0x32
-#define PT_OFF_SR	 0x2C
 #define SAVE_ALL_INT				\
 	"clrl	%%sp@-;"    /* stk_adj */	\
 	"pea	-1:w;"	    /* orig d0 = -1 */	\
diff --git a/arch/m68k/include/asm/entry_no.h b/arch/m68k/include/asm/entry_no.h
index c2553d26273..907ed03d792 100644
--- a/arch/m68k/include/asm/entry_no.h
+++ b/arch/m68k/include/asm/entry_no.h
@@ -72,8 +72,8 @@ LENOSYS = 38
 	lea	%sp@(-32),%sp		/* space for 8 regs */
 	moveml	%d1-%d5/%a0-%a2,%sp@
 	movel	sw_usp,%a0		/* get usp */
-	movel	%a0@-,%sp@(PT_PC)	/* copy exception program counter */
-	movel	%a0@-,%sp@(PT_FORMATVEC)/* copy exception format/vector/sr */
+	movel	%a0@-,%sp@(PT_OFF_PC)	/* copy exception program counter */
+	movel	%a0@-,%sp@(PT_OFF_FORMATVEC)/*copy exception format/vector/sr */
 	bra	7f
 	6:
 	clrl	%sp@-			/* stkadj */
@@ -89,8 +89,8 @@ LENOSYS = 38
 	bnes	8f			/* no, skip */
 	move	#0x2700,%sr		/* disable intrs */
 	movel	sw_usp,%a0		/* get usp */
-	movel	%sp@(PT_PC),%a0@-	/* copy exception program counter */
-	movel	%sp@(PT_FORMATVEC),%a0@-/* copy exception format/vector/sr */
+	movel	%sp@(PT_OFF_PC),%a0@-	/* copy exception program counter */
+	movel	%sp@(PT_OFF_FORMATVEC),%a0@-/*copy exception format/vector/sr */
 	moveml	%sp@,%d1-%d5/%a0-%a2
 	lea	%sp@(32),%sp		/* space for 8 regs */
 	movel	%sp@+,%d0
diff --git a/arch/m68k/include/asm/math-emu.h b/arch/m68k/include/asm/math-emu.h
index ddfab96403c..5e9249b0014 100644
--- a/arch/m68k/include/asm/math-emu.h
+++ b/arch/m68k/include/asm/math-emu.h
@@ -145,16 +145,16 @@ extern unsigned int fp_debugprint;
  * these are only used during instruction decoding
  * where we always know how deep we're on the stack.
  */
-#define FPS_DO		(PT_D0)
-#define FPS_D1		(PT_D1)
-#define FPS_D2		(PT_D2)
-#define FPS_A0		(PT_A0)
-#define FPS_A1		(PT_A1)
-#define FPS_A2		(PT_A2)
-#define FPS_SR		(PT_SR)
-#define FPS_PC		(PT_PC)
-#define FPS_EA		(PT_PC+6)
-#define FPS_PC2		(PT_PC+10)
+#define FPS_DO		(PT_OFF_D0)
+#define FPS_D1		(PT_OFF_D1)
+#define FPS_D2		(PT_OFF_D2)
+#define FPS_A0		(PT_OFF_A0)
+#define FPS_A1		(PT_OFF_A1)
+#define FPS_A2		(PT_OFF_A2)
+#define FPS_SR		(PT_OFF_SR)
+#define FPS_PC		(PT_OFF_PC)
+#define FPS_EA		(PT_OFF_PC+6)
+#define FPS_PC2		(PT_OFF_PC+10)
 
 .macro	fp_get_fp_reg
 	lea	(FPD_FPREG,FPDATA,%d0.w*4),%a0
diff --git a/arch/m68k/kernel/asm-offsets.c b/arch/m68k/kernel/asm-offsets.c
index b1f012f6c49..0cffab8d244 100644
--- a/arch/m68k/kernel/asm-offsets.c
+++ b/arch/m68k/kernel/asm-offsets.c
@@ -44,20 +44,20 @@ int main(void)
 	DEFINE(TINFO_FLAGS, offsetof(struct thread_info, flags));
 
 	/* offsets into the pt_regs */
-	DEFINE(PT_D0, offsetof(struct pt_regs, d0));
-	DEFINE(PT_ORIG_D0, offsetof(struct pt_regs, orig_d0));
-	DEFINE(PT_D1, offsetof(struct pt_regs, d1));
-	DEFINE(PT_D2, offsetof(struct pt_regs, d2));
-	DEFINE(PT_D3, offsetof(struct pt_regs, d3));
-	DEFINE(PT_D4, offsetof(struct pt_regs, d4));
-	DEFINE(PT_D5, offsetof(struct pt_regs, d5));
-	DEFINE(PT_A0, offsetof(struct pt_regs, a0));
-	DEFINE(PT_A1, offsetof(struct pt_regs, a1));
-	DEFINE(PT_A2, offsetof(struct pt_regs, a2));
-	DEFINE(PT_PC, offsetof(struct pt_regs, pc));
-	DEFINE(PT_SR, offsetof(struct pt_regs, sr));
+	DEFINE(PT_OFF_D0, offsetof(struct pt_regs, d0));
+	DEFINE(PT_OFF_ORIG_D0, offsetof(struct pt_regs, orig_d0));
+	DEFINE(PT_OFF_D1, offsetof(struct pt_regs, d1));
+	DEFINE(PT_OFF_D2, offsetof(struct pt_regs, d2));
+	DEFINE(PT_OFF_D3, offsetof(struct pt_regs, d3));
+	DEFINE(PT_OFF_D4, offsetof(struct pt_regs, d4));
+	DEFINE(PT_OFF_D5, offsetof(struct pt_regs, d5));
+	DEFINE(PT_OFF_A0, offsetof(struct pt_regs, a0));
+	DEFINE(PT_OFF_A1, offsetof(struct pt_regs, a1));
+	DEFINE(PT_OFF_A2, offsetof(struct pt_regs, a2));
+	DEFINE(PT_OFF_PC, offsetof(struct pt_regs, pc));
+	DEFINE(PT_OFF_SR, offsetof(struct pt_regs, sr));
 	/* bitfields are a bit difficult */
-	DEFINE(PT_VECTOR, offsetof(struct pt_regs, pc) + 4);
+	DEFINE(PT_OFF_FORMATVEC, offsetof(struct pt_regs, pc) + 4);
 
 	/* offsets into the irq_handler struct */
 	DEFINE(IRQ_HANDLER, offsetof(struct irq_node, handler));
diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index c3735cd6207..922f52e7ed1 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -77,17 +77,17 @@ ENTRY(ret_from_fork)
 	jra	.Lret_from_exception
 
 do_trace_entry:
-	movel	#-ENOSYS,%sp@(PT_D0)	| needed for strace
+	movel	#-ENOSYS,%sp@(PT_OFF_D0)| needed for strace
 	subql	#4,%sp
 	SAVE_SWITCH_STACK
 	jbsr	syscall_trace
 	RESTORE_SWITCH_STACK
 	addql	#4,%sp
-	movel	%sp@(PT_ORIG_D0),%d0
+	movel	%sp@(PT_OFF_ORIG_D0),%d0
 	cmpl	#NR_syscalls,%d0
 	jcs	syscall
 badsys:
-	movel	#-ENOSYS,%sp@(PT_D0)
+	movel	#-ENOSYS,%sp@(PT_OFF_D0)
 	jra	ret_from_syscall
 
 do_trace_exit:
@@ -103,7 +103,7 @@ ENTRY(ret_from_signal)
 	addql	#4,%sp
 /* on 68040 complete pending writebacks if any */
 #ifdef CONFIG_M68040
-	bfextu	%sp@(PT_VECTOR){#0,#4},%d0
+	bfextu	%sp@(PT_OFF_FORMATVEC){#0,#4},%d0
 	subql	#7,%d0				| bus error frame ?
 	jbne	1f
 	movel	%sp,%sp@-
@@ -127,7 +127,7 @@ ENTRY(system_call)
 	jcc	badsys
 syscall:
 	jbsr	@(sys_call_table,%d0:l:4)@(0)
-	movel	%d0,%sp@(PT_D0)		| save the return value
+	movel	%d0,%sp@(PT_OFF_D0)	| save the return value
 ret_from_syscall:
 	|oriw	#0x0700,%sr
 	movew	%curptr@(TASK_INFO+TINFO_FLAGS+2),%d0
@@ -135,7 +135,7 @@ ret_from_syscall:
 1:	RESTORE_ALL
 
 syscall_exit_work:
-	btst	#5,%sp@(PT_SR)		| check if returning to kernel
+	btst	#5,%sp@(PT_OFF_SR)	| check if returning to kernel
 	bnes	1b			| if so, skip resched, signals
 	lslw	#1,%d0
 	jcs	do_trace_exit
@@ -148,7 +148,7 @@ syscall_exit_work:
 
 ENTRY(ret_from_exception)
 .Lret_from_exception:
-	btst	#5,%sp@(PT_SR)		| check if returning to kernel
+	btst	#5,%sp@(PT_OFF_SR)	| check if returning to kernel
 	bnes	1f			| if so, skip resched, signals
 	| only allow interrupts when we are really the last one on the
 	| kernel stack, otherwise stack overflow can occur during
@@ -182,7 +182,7 @@ do_signal_return:
 	jbra	resume_userspace
 
 do_delayed_trace:
-	bclr	#7,%sp@(PT_SR)		| clear trace bit in SR
+	bclr	#7,%sp@(PT_OFF_SR)	| clear trace bit in SR
 	pea	1			| send SIGTRAP
 	movel	%curptr,%sp@-
 	pea	LSIGTRAP
@@ -199,7 +199,7 @@ ENTRY(auto_inthandler)
 	GET_CURRENT(%d0)
 	addqb	#1,%curptr@(TASK_INFO+TINFO_PREEMPT+1)
 					|  put exception # in d0
-	bfextu	%sp@(PT_VECTOR){#4,#10},%d0
+	bfextu	%sp@(PT_OFF_FORMATVEC){#4,#10},%d0
 	subw	#VEC_SPUR,%d0
 
 	movel	%sp,%sp@-
@@ -216,7 +216,7 @@ ret_from_interrupt:
 	ALIGN
 ret_from_last_interrupt:
 	moveq	#(~ALLOWINT>>8)&0xff,%d0
-	andb	%sp@(PT_SR),%d0
+	andb	%sp@(PT_OFF_SR),%d0
 	jne	2b
 
 	/* check if we need to do software interrupts */
@@ -232,7 +232,7 @@ ENTRY(user_inthandler)
 	GET_CURRENT(%d0)
 	addqb	#1,%curptr@(TASK_INFO+TINFO_PREEMPT+1)
 					|  put exception # in d0
-	bfextu	%sp@(PT_VECTOR){#4,#10},%d0
+	bfextu	%sp@(PT_OFF_FORMATVEC){#4,#10},%d0
 user_irqvec_fixup = . + 2
 	subw	#VEC_USER,%d0
 
diff --git a/arch/m68k/math-emu/fp_entry.S b/arch/m68k/math-emu/fp_entry.S
index 954b4f304a7..916c6f3d1f0 100644
--- a/arch/m68k/math-emu/fp_entry.S
+++ b/arch/m68k/math-emu/fp_entry.S
@@ -122,17 +122,17 @@ fp_get_data_reg:
 	.long	fp_get_d6, fp_get_d7
 
 fp_get_d0:
-	move.l	(PT_D0+8,%sp),%d0
+	move.l	(PT_OFF_D0+8,%sp),%d0
 	printf	PREGISTER,"{d0->%08x}",1,%d0
 	rts
 
 fp_get_d1:
-	move.l	(PT_D1+8,%sp),%d0
+	move.l	(PT_OFF_D1+8,%sp),%d0
 	printf	PREGISTER,"{d1->%08x}",1,%d0
 	rts
 
 fp_get_d2:
-	move.l	(PT_D2+8,%sp),%d0
+	move.l	(PT_OFF_D2+8,%sp),%d0
 	printf	PREGISTER,"{d2->%08x}",1,%d0
 	rts
 
@@ -173,35 +173,35 @@ fp_put_data_reg:
 
 fp_put_d0:
 	printf	PREGISTER,"{d0<-%08x}",1,%d0
-	move.l	%d0,(PT_D0+8,%sp)
+	move.l	%d0,(PT_OFF_D0+8,%sp)
 	rts
 
 fp_put_d1:
 	printf	PREGISTER,"{d1<-%08x}",1,%d0
-	move.l	%d0,(PT_D1+8,%sp)
+	move.l	%d0,(PT_OFF_D1+8,%sp)
 	rts
 
 fp_put_d2:
 	printf	PREGISTER,"{d2<-%08x}",1,%d0
-	move.l	%d0,(PT_D2+8,%sp)
+	move.l	%d0,(PT_OFF_D2+8,%sp)
 	rts
 
 fp_put_d3:
 	printf	PREGISTER,"{d3<-%08x}",1,%d0
 |	move.l	%d0,%d3
-	move.l	%d0,(PT_D3+8,%sp)
+	move.l	%d0,(PT_OFF_D3+8,%sp)
 	rts
 
 fp_put_d4:
 	printf	PREGISTER,"{d4<-%08x}",1,%d0
 |	move.l	%d0,%d4
-	move.l	%d0,(PT_D4+8,%sp)
+	move.l	%d0,(PT_OFF_D4+8,%sp)
 	rts
 
 fp_put_d5:
 	printf	PREGISTER,"{d5<-%08x}",1,%d0
 |	move.l	%d0,%d5
-	move.l	%d0,(PT_D5+8,%sp)
+	move.l	%d0,(PT_OFF_D5+8,%sp)
 	rts
 
 fp_put_d6:
@@ -225,17 +225,17 @@ fp_get_addr_reg:
 	.long	fp_get_a6, fp_get_a7
 
 fp_get_a0:
-	move.l	(PT_A0+8,%sp),%a0
+	move.l	(PT_OFF_A0+8,%sp),%a0
 	printf	PREGISTER,"{a0->%08x}",1,%a0
 	rts
 
 fp_get_a1:
-	move.l	(PT_A1+8,%sp),%a0
+	move.l	(PT_OFF_A1+8,%sp),%a0
 	printf	PREGISTER,"{a1->%08x}",1,%a0
 	rts
 
 fp_get_a2:
-	move.l	(PT_A2+8,%sp),%a0
+	move.l	(PT_OFF_A2+8,%sp),%a0
 	printf	PREGISTER,"{a2->%08x}",1,%a0
 	rts
 
@@ -276,17 +276,17 @@ fp_put_addr_reg:
 
 fp_put_a0:
 	printf	PREGISTER,"{a0<-%08x}",1,%a0
-	move.l	%a0,(PT_A0+8,%sp)
+	move.l	%a0,(PT_OFF_A0+8,%sp)
 	rts
 
 fp_put_a1:
 	printf	PREGISTER,"{a1<-%08x}",1,%a0
-	move.l	%a0,(PT_A1+8,%sp)
+	move.l	%a0,(PT_OFF_A1+8,%sp)
 	rts
 
 fp_put_a2:
 	printf	PREGISTER,"{a2<-%08x}",1,%a0
-	move.l	%a0,(PT_A2+8,%sp)
+	move.l	%a0,(PT_OFF_A2+8,%sp)
 	rts
 
 fp_put_a3:
-- 
cgit v1.2.3


From 0ee000e5e8fa2e5c760250be0d78d5906e3eb94b Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 31 Aug 2009 14:43:35 +0200
Subject: locking, m68k: Calculate thread_info offset with asm offset

m68k has the thread_info structure embedded in its task struct.
Therefore its not possible to implement current_thread_info()
by looking at the stack pointer and do some simple calculations
like most other architectures do it.

To return the thread_info pointer for a task two defines are
used. This works until the spinlock function bodies get moved
into an own header file and CONFIG_SPINLOCK_DEBUG is turned on.
That results into this compile error:

  In file included from include/linux/spinlock.h:378,
                   from include/linux/seqlock.h:29,
                   from include/linux/time.h:8,
                   from include/linux/timex.h:56,
                   from include/linux/sched.h:54,
                   from arch/m68k/kernel/asm-offsets.c:12:
  include/linux/spinlock_api_smp.h: In function '__spin_unlock_irq':
  include/linux/spinlock_api_smp.h:371: error: 'current' undeclared (first use in this function)
  include/linux/spinlock_api_smp.h:371: error: (Each undeclared identifier is reported only once
  include/linux/spinlock_api_smp.h:371: error: for each function it appears in.)

Including asm/current.h to asm-offsets.c wouldn't help since
the definition of struct task is needed. So we end up with ugly
header file include dependencies.

To solve this calculate the offset of the thread_info structure
into the task struct in asm-offsets.h and use the offset in
task_thread_info(). This works just like it does for IA64 as
well.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Horst Hartmann <horsth@linux.vnet.ibm.com>
Cc: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: David Miller <davem@davemloft.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Roman Zippel <zippel@linux-m68k.org>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <20090831124417.329662275@de.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/m68k/include/asm/thread_info_mm.h | 11 ++++++++++-
 arch/m68k/kernel/asm-offsets.c         |  5 +++++
 2 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/m68k/include/asm/thread_info_mm.h b/arch/m68k/include/asm/thread_info_mm.h
index 6ea5c33b3c5..b6da3882be9 100644
--- a/arch/m68k/include/asm/thread_info_mm.h
+++ b/arch/m68k/include/asm/thread_info_mm.h
@@ -1,6 +1,10 @@
 #ifndef _ASM_M68K_THREAD_INFO_H
 #define _ASM_M68K_THREAD_INFO_H
 
+#ifndef ASM_OFFSETS_C
+#include <asm/asm-offsets.h>
+#endif
+#include <asm/current.h>
 #include <asm/types.h>
 #include <asm/page.h>
 
@@ -31,7 +35,12 @@ struct thread_info {
 #define init_thread_info	(init_task.thread.info)
 #define init_stack		(init_thread_union.stack)
 
-#define task_thread_info(tsk)	(&(tsk)->thread.info)
+#ifdef ASM_OFFSETS_C
+#define task_thread_info(tsk)	((struct thread_info *) NULL)
+#else
+#define task_thread_info(tsk)	((struct thread_info *)((char *)tsk+TASK_TINFO))
+#endif
+
 #define task_stack_page(tsk)	((tsk)->stack)
 #define current_thread_info()	task_thread_info(current)
 
diff --git a/arch/m68k/kernel/asm-offsets.c b/arch/m68k/kernel/asm-offsets.c
index 0cffab8d244..25d6b721522 100644
--- a/arch/m68k/kernel/asm-offsets.c
+++ b/arch/m68k/kernel/asm-offsets.c
@@ -8,6 +8,8 @@
  * #defines from the assembly-language output.
  */
 
+#define ASM_OFFSETS_C
+
 #include <linux/stddef.h>
 #include <linux/sched.h>
 #include <linux/kernel_stat.h>
@@ -27,6 +29,9 @@ int main(void)
 	DEFINE(TASK_INFO, offsetof(struct task_struct, thread.info));
 	DEFINE(TASK_MM, offsetof(struct task_struct, mm));
 	DEFINE(TASK_ACTIVE_MM, offsetof(struct task_struct, active_mm));
+#ifdef CONFIG_MMU
+	DEFINE(TASK_TINFO, offsetof(struct task_struct, thread.info));
+#endif
 
 	/* offsets into the thread struct */
 	DEFINE(THREAD_KSP, offsetof(struct thread_struct, ksp));
-- 
cgit v1.2.3


From b62e180cae6bd82e246d871a1e44e03f8019d421 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 31 Aug 2009 14:43:39 +0200
Subject: locking: Inline spinlock code for all locking variants on s390

Speeds up several benchmarks in a measurable way, so inline
all spin-lock variants by default.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Horst Hartmann <horsth@linux.vnet.ibm.com>
Cc: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: David Miller <davem@davemloft.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Roman Zippel <zippel@linux-m68k.org>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <20090831124419.319518405@de.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/s390/include/asm/spinlock.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index c9af0d19c7a..41ce6861174 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -191,4 +191,33 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw)
 #define _raw_read_relax(lock)	cpu_relax()
 #define _raw_write_relax(lock)	cpu_relax()
 
+#define __always_inline__spin_lock
+#define __always_inline__read_lock
+#define __always_inline__write_lock
+#define __always_inline__spin_lock_bh
+#define __always_inline__read_lock_bh
+#define __always_inline__write_lock_bh
+#define __always_inline__spin_lock_irq
+#define __always_inline__read_lock_irq
+#define __always_inline__write_lock_irq
+#define __always_inline__spin_lock_irqsave
+#define __always_inline__read_lock_irqsave
+#define __always_inline__write_lock_irqsave
+#define __always_inline__spin_trylock
+#define __always_inline__read_trylock
+#define __always_inline__write_trylock
+#define __always_inline__spin_trylock_bh
+#define __always_inline__spin_unlock
+#define __always_inline__read_unlock
+#define __always_inline__write_unlock
+#define __always_inline__spin_unlock_bh
+#define __always_inline__read_unlock_bh
+#define __always_inline__write_unlock_bh
+#define __always_inline__spin_unlock_irq
+#define __always_inline__read_unlock_irq
+#define __always_inline__write_unlock_irq
+#define __always_inline__spin_unlock_irqrestore
+#define __always_inline__read_unlock_irqrestore
+#define __always_inline__write_unlock_irqrestore
+
 #endif /* __ASM_SPINLOCK_H */
-- 
cgit v1.2.3


From 132ec92f3f70fe365c1f4b8d46e66cf8a2a16880 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Mon, 31 Aug 2009 09:50:09 +0200
Subject: x86, msr: Add rd/wrmsr interfaces with preset registers

native_{rdmsr,wrmsr}_safe_regs are two new interfaces which allow
presetting of a subset of eight x86 GPRs before executing the rd/wrmsr
instructions. This is needed at least on AMD K8 for accessing an erratum
workaround MSR.

Originally based on an idea by H. Peter Anvin.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
LKML-Reference: <1251705011-18636-1-git-send-email-petkovbb@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/msr.h      | 13 ++++++
 arch/x86/include/asm/paravirt.h | 16 +++++++
 arch/x86/kernel/paravirt.c      |  2 +
 arch/x86/lib/Makefile           |  1 +
 arch/x86/lib/msr-reg.S          | 98 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 130 insertions(+)
 create mode 100644 arch/x86/lib/msr-reg.S

(limited to 'arch')

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 48ad9d29484..184d4a11396 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -113,6 +113,9 @@ notrace static inline int native_write_msr_safe(unsigned int msr,
 
 extern unsigned long long native_read_tsc(void);
 
+extern int native_rdmsr_safe_regs(u32 *regs);
+extern int native_wrmsr_safe_regs(u32 *regs);
+
 static __always_inline unsigned long long __native_read_tsc(void)
 {
 	DECLARE_ARGS(val, low, high);
@@ -189,6 +192,16 @@ static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
 	return err;
 }
 
+static inline int rdmsr_safe_regs(u32 *regs)
+{
+	return native_rdmsr_safe_regs(regs);
+}
+
+static inline int wrmsr_safe_regs(u32 *regs)
+{
+	return native_wrmsr_safe_regs(regs);
+}
+
 #define rdtscl(low)						\
 	((low) = (u32)__native_read_tsc())
 
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 4fb37c8a083..1705944e037 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -168,7 +168,9 @@ struct pv_cpu_ops {
 	   err = 0/-EFAULT.  wrmsr returns 0/-EFAULT. */
 	u64 (*read_msr_amd)(unsigned int msr, int *err);
 	u64 (*read_msr)(unsigned int msr, int *err);
+	int (*rdmsr_regs)(u32 *regs);
 	int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
+	int (*wrmsr_regs)(u32 *regs);
 
 	u64 (*read_tsc)(void);
 	u64 (*read_pmc)(int counter);
@@ -820,6 +822,12 @@ static inline u64 paravirt_read_msr(unsigned msr, int *err)
 {
 	return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);
 }
+
+static inline int paravirt_rdmsr_regs(u32 *regs)
+{
+	return PVOP_CALL1(int, pv_cpu_ops.rdmsr_regs, regs);
+}
+
 static inline u64 paravirt_read_msr_amd(unsigned msr, int *err)
 {
 	return PVOP_CALL2(u64, pv_cpu_ops.read_msr_amd, msr, err);
@@ -829,6 +837,11 @@ static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
 	return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);
 }
 
+static inline int paravirt_wrmsr_regs(u32 *regs)
+{
+	return PVOP_CALL1(int, pv_cpu_ops.wrmsr_regs, regs);
+}
+
 /* These should all do BUG_ON(_err), but our headers are too tangled. */
 #define rdmsr(msr, val1, val2)			\
 do {						\
@@ -862,6 +875,9 @@ do {						\
 	_err;					\
 })
 
+#define rdmsr_safe_regs(regs)	paravirt_rdmsr_regs(regs)
+#define wrmsr_safe_regs(regs)	paravirt_wrmsr_regs(regs)
+
 static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
 {
 	int err;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 70ec9b951d7..67594af43b3 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -362,8 +362,10 @@ struct pv_cpu_ops pv_cpu_ops = {
 #endif
 	.wbinvd = native_wbinvd,
 	.read_msr = native_read_msr_safe,
+	.rdmsr_regs = native_rdmsr_safe_regs,
 	.read_msr_amd = native_read_msr_amd_safe,
 	.write_msr = native_write_msr_safe,
+	.wrmsr_regs = native_wrmsr_safe_regs,
 	.read_tsc = native_read_tsc,
 	.read_pmc = native_read_pmc,
 	.read_tscp = native_read_tscp,
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 07c31899c9c..b59c0647d80 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -8,6 +8,7 @@ lib-y := delay.o
 lib-y += thunk_$(BITS).o
 lib-y += usercopy_$(BITS).o getuser.o putuser.o
 lib-y += memcpy_$(BITS).o
+lib-y += msr-reg.o
 
 ifeq ($(CONFIG_X86_32),y)
         obj-y += atomic64_32.o
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
new file mode 100644
index 00000000000..51f1bb3f8c7
--- /dev/null
+++ b/arch/x86/lib/msr-reg.S
@@ -0,0 +1,98 @@
+#include <linux/linkage.h>
+#include <linux/errno.h>
+#include <asm/asm.h>
+#include <asm/msr.h>
+
+#ifdef CONFIG_X86_64
+/*
+ * int native_{rdmsr,wrmsr}_safe_regs(u32 gprs[8]);
+ *
+ * reg layout: u32 gprs[eax, ecx, edx, ebx, esp, ebp, esi, edi]
+ *
+ */
+.macro op_safe_regs op:req
+ENTRY(native_\op\()_safe_regs)
+	push    %rbx
+	push    %rbp
+	push    $0              /* Return value */
+	push    %rdi
+	movl    (%rdi), %eax
+	movl    4(%rdi), %ecx
+	movl    8(%rdi), %edx
+	movl    12(%rdi), %ebx
+	movl    20(%rdi), %ebp
+	movl    24(%rdi), %esi
+	movl    28(%rdi), %edi
+1:	\op
+2:	movl    %edi, %r10d
+	pop     %rdi
+	movl    %eax, (%rdi)
+	movl    %ecx, 4(%rdi)
+	movl    %edx, 8(%rdi)
+	movl    %ebx, 12(%rdi)
+	movl    %ebp, 20(%rdi)
+	movl    %esi, 24(%rdi)
+	movl    %r10d, 28(%rdi)
+	pop     %rax
+	pop     %rbp
+	pop     %rbx
+	ret
+3:
+	movq    $-EIO, 8(%rsp)
+	jmp     2b
+	.section __ex_table,"ax"
+	.balign 4
+	.quad   1b, 3b
+	.previous
+ENDPROC(native_\op\()_safe_regs)
+.endm
+
+#else /* X86_32 */
+
+.macro op_safe_regs op:req
+ENTRY(native_\op\()_safe_regs)
+	push    %ebx
+	push    %ebp
+	push    %esi
+	push    %edi
+	push    $0              /* Return value */
+	push    %eax
+	movl    4(%eax), %ecx
+	movl    8(%eax), %edx
+	movl    12(%eax), %ebx
+	movl    20(%eax), %ebp
+	movl    24(%eax), %esi
+	movl    28(%eax), %edi
+	movl    (%eax), %eax
+1:	\op
+2:	push    %eax
+	movl    4(%esp), %eax
+	pop     (%eax)
+	addl    $4, %esp
+	movl    %ecx, 4(%eax)
+	movl    %edx, 8(%eax)
+	movl    %ebx, 12(%eax)
+	movl    %ebp, 20(%eax)
+	movl    %esi, 24(%eax)
+	movl    %edi, 28(%eax)
+	pop     %eax
+	pop     %edi
+	pop     %esi
+	pop     %ebp
+	pop     %ebx
+	ret
+3:
+	movl    $-EIO, 4(%esp)
+	jmp     2b
+	.section __ex_table,"ax"
+	.balign 4
+	.long   1b, 3b
+	.previous
+ENDPROC(native_\op\()_safe_regs)
+.endm
+
+#endif
+
+op_safe_regs rdmsr
+op_safe_regs wrmsr
+
-- 
cgit v1.2.3


From 177fed1ee8d727c39601ce9fc2299b4cb25a718e Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Mon, 31 Aug 2009 09:50:10 +0200
Subject: x86, msr: Rewrite AMD rd/wrmsr variants

Switch them to native_{rd,wr}msr_safe_regs and remove
pv_cpu_ops.read_msr_amd.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
LKML-Reference: <1251705011-18636-2-git-send-email-petkovbb@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/msr.h      | 38 +++++++++++++++++++++-----------------
 arch/x86/include/asm/paravirt.h | 26 ++++++++++++++++++++------
 arch/x86/kernel/paravirt.c      |  1 -
 3 files changed, 41 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 184d4a11396..09c5ca70d49 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -71,22 +71,6 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
 	return EAX_EDX_VAL(val, low, high);
 }
 
-static inline unsigned long long native_read_msr_amd_safe(unsigned int msr,
-						      int *err)
-{
-	DECLARE_ARGS(val, low, high);
-
-	asm volatile("2: rdmsr ; xor %0,%0\n"
-		     "1:\n\t"
-		     ".section .fixup,\"ax\"\n\t"
-		     "3:  mov %3,%0 ; jmp 1b\n\t"
-		     ".previous\n\t"
-		     _ASM_EXTABLE(2b, 3b)
-		     : "=r" (*err), EAX_EDX_RET(val, low, high)
-		     : "c" (msr), "D" (0x9c5a203a), "i" (-EFAULT));
-	return EAX_EDX_VAL(val, low, high);
-}
-
 static inline void native_write_msr(unsigned int msr,
 				    unsigned low, unsigned high)
 {
@@ -184,14 +168,34 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
 	*p = native_read_msr_safe(msr, &err);
 	return err;
 }
+
 static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
 {
+	u32 gprs[8] = { 0 };
 	int err;
 
-	*p = native_read_msr_amd_safe(msr, &err);
+	gprs[1] = msr;
+	gprs[7] = 0x9c5a203a;
+
+	err = native_rdmsr_safe_regs(gprs);
+
+	*p = gprs[0] | ((u64)gprs[2] << 32);
+
 	return err;
 }
 
+static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
+{
+	u32 gprs[8] = { 0 };
+
+	gprs[0] = (u32)val;
+	gprs[1] = msr;
+	gprs[2] = val >> 32;
+	gprs[7] = 0x9c5a203a;
+
+	return native_wrmsr_safe_regs(gprs);
+}
+
 static inline int rdmsr_safe_regs(u32 *regs)
 {
 	return native_rdmsr_safe_regs(regs);
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 1705944e037..11574934a99 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -166,7 +166,6 @@ struct pv_cpu_ops {
 
 	/* MSR, PMC and TSR operations.
 	   err = 0/-EFAULT.  wrmsr returns 0/-EFAULT. */
-	u64 (*read_msr_amd)(unsigned int msr, int *err);
 	u64 (*read_msr)(unsigned int msr, int *err);
 	int (*rdmsr_regs)(u32 *regs);
 	int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
@@ -828,10 +827,6 @@ static inline int paravirt_rdmsr_regs(u32 *regs)
 	return PVOP_CALL1(int, pv_cpu_ops.rdmsr_regs, regs);
 }
 
-static inline u64 paravirt_read_msr_amd(unsigned msr, int *err)
-{
-	return PVOP_CALL2(u64, pv_cpu_ops.read_msr_amd, msr, err);
-}
 static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
 {
 	return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);
@@ -887,12 +882,31 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
 }
 static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
 {
+	u32 gprs[8] = { 0 };
 	int err;
 
-	*p = paravirt_read_msr_amd(msr, &err);
+	gprs[1] = msr;
+	gprs[7] = 0x9c5a203a;
+
+	err = paravirt_rdmsr_regs(gprs);
+
+	*p = gprs[0] | ((u64)gprs[2] << 32);
+
 	return err;
 }
 
+static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
+{
+	u32 gprs[8] = { 0 };
+
+	gprs[0] = (u32)val;
+	gprs[1] = msr;
+	gprs[2] = val >> 32;
+	gprs[7] = 0x9c5a203a;
+
+	return paravirt_wrmsr_regs(gprs);
+}
+
 static inline u64 paravirt_read_tsc(void)
 {
 	return PVOP_CALL0(u64, pv_cpu_ops.read_tsc);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 67594af43b3..f5b0b4a01fb 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -363,7 +363,6 @@ struct pv_cpu_ops pv_cpu_ops = {
 	.wbinvd = native_wbinvd,
 	.read_msr = native_read_msr_safe,
 	.rdmsr_regs = native_rdmsr_safe_regs,
-	.read_msr_amd = native_read_msr_amd_safe,
 	.write_msr = native_write_msr_safe,
 	.wrmsr_regs = native_wrmsr_safe_regs,
 	.read_tsc = native_read_tsc,
-- 
cgit v1.2.3


From 6b0f43ddfa358dc71ad2a2d57bce5906c1c5dc1a Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Mon, 31 Aug 2009 09:50:11 +0200
Subject: x86, AMD: Disable wrongly set X86_FEATURE_LAHF_LM CPUID bit

fbd8b1819e80ac5a176d085fdddc3a34d1499318 turns off the bit for
/proc/cpuinfo. However, a proper/full fix would be to additionally
turn off the bit in the CPUID output so that future callers get
correct CPU features info.

Do that by basically reversing what the BIOS wrongfully does at boot.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
LKML-Reference: <1251705011-18636-3-git-send-email-petkovbb@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/amd.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 63fddcd082c..0a717fc6aeb 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -404,9 +404,18 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 		/*
 		 * Some BIOSes incorrectly force this feature, but only K8
 		 * revision D (model = 0x14) and later actually support it.
+		 * (AMD Erratum #110, docId: 25759).
 		 */
-		if (c->x86_model < 0x14)
+		if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) {
+			u64 val;
+
 			clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
+			if (!rdmsrl_amd_safe(0xc001100d, &val)) {
+				val &= ~(1ULL << 32);
+				wrmsrl_amd_safe(0xc001100d, val);
+			}
+		}
+
 	}
 	if (c->x86 == 0x10 || c->x86 == 0x11)
 		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
-- 
cgit v1.2.3


From fe9b4e4e40ffdabbd385cdf171cb861c2fd517c0 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 31 Aug 2009 11:53:23 -0700
Subject: x86, asm: Add 32-bit versions of the combined CFI macros

Add 32-bit versions of the combined CFI macros, equivalent to the
64-bit ones except, obviously, operating on 32-bit stack words.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/dwarf2.h | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index 3afc5e87cfd..ae6253ab902 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -87,9 +87,25 @@
 	CFI_RESTORE \reg
 	.endm
 #else /*!CONFIG_X86_64*/
+	.macro pushl_cfi reg
+	pushl \reg
+	CFI_ADJUST_CFA_OFFSET 4
+	.endm
 
-	/* 32bit defenitions are missed yet */
+	.macro popl_cfi reg
+	popl \reg
+	CFI_ADJUST_CFA_OFFSET -4
+	.endm
 
+	.macro movl_cfi reg offset=0
+	movl %\reg, \offset(%esp)
+	CFI_REL_OFFSET \reg, \offset
+	.endm
+
+	.macro movl_cfi_restore offset reg
+	movl \offset(%esp), %\reg
+	CFI_RESTORE \reg
+	.endm
 #endif /*!CONFIG_X86_64*/
 #endif /*__ASSEMBLY__*/
 
-- 
cgit v1.2.3


From 709972b1f6f70535d1fddbe1243a51b90c408a1c Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 31 Aug 2009 11:57:20 -0700
Subject: x86, asm: Make _ASM_EXTABLE() usable from assembly code

We have had this convenient macro _ASM_EXTABLE() to generate exception
table entry in inline assembly.  Make it also usable for pure
assembly.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/asm.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 56be78f582f..b3ed1e1460f 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -3,7 +3,7 @@
 
 #ifdef __ASSEMBLY__
 # define __ASM_FORM(x)	x
-# define __ASM_EX_SEC	.section __ex_table
+# define __ASM_EX_SEC	.section __ex_table, "a"
 #else
 # define __ASM_FORM(x)	" " #x " "
 # define __ASM_EX_SEC	" .section __ex_table,\"a\"\n"
@@ -38,10 +38,18 @@
 #define _ASM_DI		__ASM_REG(di)
 
 /* Exception table entry */
+#ifdef __ASSEMBLY__
+# define _ASM_EXTABLE(from,to)	    \
+	__ASM_EX_SEC ;		    \
+	_ASM_ALIGN ;		    \
+	_ASM_PTR from , to ;	    \
+	.previous
+#else
 # define _ASM_EXTABLE(from,to) \
 	__ASM_EX_SEC	\
 	_ASM_ALIGN "\n" \
 	_ASM_PTR #from "," #to "\n" \
 	" .previous\n"
+#endif
 
 #endif /* _ASM_X86_ASM_H */
-- 
cgit v1.2.3


From 79c5dca3619d6ae15815eec14cd7a43db5f38b47 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 31 Aug 2009 13:59:53 -0700
Subject: x86, msr: CFI annotations, cleanups for msr-reg.S

Add CFI annotations for native_{rd,wr}msr_safe_regs().
Simplify the 64-bit implementation: we don't allow the upper half
registers to be set, and so we can use them to carry state across the
operation.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Borislav Petkov <petkovbb@gmail.com>
LKML-Reference: <1251705011-18636-1-git-send-email-petkovbb@gmail.com>
---
 arch/x86/lib/msr-reg.S | 80 ++++++++++++++++++++++++++------------------------
 1 file changed, 42 insertions(+), 38 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
index 51f1bb3f8c7..9e8cdcf5d73 100644
--- a/arch/x86/lib/msr-reg.S
+++ b/arch/x86/lib/msr-reg.S
@@ -1,5 +1,6 @@
 #include <linux/linkage.h>
 #include <linux/errno.h>
+#include <asm/dwarf2.h>
 #include <asm/asm.h>
 #include <asm/msr.h>
 
@@ -12,10 +13,11 @@
  */
 .macro op_safe_regs op:req
 ENTRY(native_\op\()_safe_regs)
-	push    %rbx
-	push    %rbp
-	push    $0              /* Return value */
-	push    %rdi
+	CFI_STARTPROC
+	pushq_cfi %rbx
+	pushq_cfi %rbp
+	movq	%rdi, %r10	/* Save pointer */
+	xorl	%r11d, %r11d	/* Return value */
 	movl    (%rdi), %eax
 	movl    4(%rdi), %ecx
 	movl    8(%rdi), %edx
@@ -23,27 +25,26 @@ ENTRY(native_\op\()_safe_regs)
 	movl    20(%rdi), %ebp
 	movl    24(%rdi), %esi
 	movl    28(%rdi), %edi
+	CFI_REMEMBER_STATE
 1:	\op
-2:	movl    %edi, %r10d
-	pop     %rdi
-	movl    %eax, (%rdi)
-	movl    %ecx, 4(%rdi)
-	movl    %edx, 8(%rdi)
-	movl    %ebx, 12(%rdi)
-	movl    %ebp, 20(%rdi)
-	movl    %esi, 24(%rdi)
-	movl    %r10d, 28(%rdi)
-	pop     %rax
-	pop     %rbp
-	pop     %rbx
+2:	movl    %eax, (%r10)
+	movl	%r11d, %eax	/* Return value */
+	movl    %ecx, 4(%r10)
+	movl    %edx, 8(%r10)
+	movl    %ebx, 12(%r10)
+	movl    %ebp, 20(%r10)
+	movl    %esi, 24(%r10)
+	movl    %edi, 28(%r10)
+	popq_cfi %rbp
+	popq_cfi %rbx
 	ret
 3:
-	movq    $-EIO, 8(%rsp)
+	CFI_RESTORE_STATE
+	movl    $-EIO, %r11d
 	jmp     2b
-	.section __ex_table,"ax"
-	.balign 4
-	.quad   1b, 3b
-	.previous
+
+	_ASM_EXTABLE(1b, 3b)
+	CFI_ENDPROC
 ENDPROC(native_\op\()_safe_regs)
 .endm
 
@@ -51,12 +52,13 @@ ENDPROC(native_\op\()_safe_regs)
 
 .macro op_safe_regs op:req
 ENTRY(native_\op\()_safe_regs)
-	push    %ebx
-	push    %ebp
-	push    %esi
-	push    %edi
-	push    $0              /* Return value */
-	push    %eax
+	CFI_STARTPROC
+	pushl_cfi %ebx
+	pushl_cfi %ebp
+	pushl_cfi %esi
+	pushl_cfi %edi
+	pushl_cfi $0              /* Return value */
+	pushl_cfi %eax
 	movl    4(%eax), %ecx
 	movl    8(%eax), %edx
 	movl    12(%eax), %ebx
@@ -64,30 +66,32 @@ ENTRY(native_\op\()_safe_regs)
 	movl    24(%eax), %esi
 	movl    28(%eax), %edi
 	movl    (%eax), %eax
+	CFI_REMEMBER_STATE
 1:	\op
-2:	push    %eax
+2:	pushl_cfi %eax
 	movl    4(%esp), %eax
-	pop     (%eax)
+	popl_cfi (%eax)
 	addl    $4, %esp
+	CFI_ADJUST_CFA_OFFSET -4
 	movl    %ecx, 4(%eax)
 	movl    %edx, 8(%eax)
 	movl    %ebx, 12(%eax)
 	movl    %ebp, 20(%eax)
 	movl    %esi, 24(%eax)
 	movl    %edi, 28(%eax)
-	pop     %eax
-	pop     %edi
-	pop     %esi
-	pop     %ebp
-	pop     %ebx
+	popl_cfi %eax
+	popl_cfi %edi
+	popl_cfi %esi
+	popl_cfi %ebp
+	popl_cfi %ebx
 	ret
 3:
+	CFI_RESTORE_STATE
 	movl    $-EIO, 4(%esp)
 	jmp     2b
-	.section __ex_table,"ax"
-	.balign 4
-	.long   1b, 3b
-	.previous
+
+	_ASM_EXTABLE(1b, 3b)
+	CFI_ENDPROC
 ENDPROC(native_\op\()_safe_regs)
 .endm
 
-- 
cgit v1.2.3


From 0cc0213e73af5963eca259c84876937c20689dbd Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 31 Aug 2009 14:23:29 -0700
Subject: x86, msr: Have the _safe MSR functions return -EIO, not -EFAULT

For some reason, the _safe MSR functions returned -EFAULT, not -EIO.
However, the only user which cares about the return code as anything
other than a boolean is the MSR driver, which wants -EIO.  Change it
to -EIO across the board.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/x86/include/asm/msr.h |  4 ++--
 arch/x86/kernel/msr.c      | 10 ++--------
 arch/x86/xen/enlighten.c   |  2 +-
 3 files changed, 5 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 09c5ca70d49..943fdd572e1 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -67,7 +67,7 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
 		     ".previous\n\t"
 		     _ASM_EXTABLE(2b, 3b)
 		     : [err] "=r" (*err), EAX_EDX_RET(val, low, high)
-		     : "c" (msr), [fault] "i" (-EFAULT));
+		     : "c" (msr), [fault] "i" (-EIO));
 	return EAX_EDX_VAL(val, low, high);
 }
 
@@ -90,7 +90,7 @@ notrace static inline int native_write_msr_safe(unsigned int msr,
 		     _ASM_EXTABLE(2b, 3b)
 		     : [err] "=a" (err)
 		     : "c" (msr), "0" (low), "d" (high),
-		       [fault] "i" (-EFAULT)
+		       [fault] "i" (-EIO)
 		     : "memory");
 	return err;
 }
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 98fd6cd4e3a..2cfbb4b2c42 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -80,11 +80,8 @@ static ssize_t msr_read(struct file *file, char __user *buf,
 
 	for (; count; count -= 8) {
 		err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]);
-		if (err) {
-			if (err == -EFAULT) /* Fix idiotic error code */
-				err = -EIO;
+		if (err)
 			break;
-		}
 		if (copy_to_user(tmp, &data, 8)) {
 			err = -EFAULT;
 			break;
@@ -115,11 +112,8 @@ static ssize_t msr_write(struct file *file, const char __user *buf,
 			break;
 		}
 		err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]);
-		if (err) {
-			if (err == -EFAULT) /* Fix idiotic error code */
-				err = -EIO;
+		if (err)
 			break;
-		}
 		tmp += 2;
 		bytes += 8;
 	}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 0a1700a2be9..a8432d81690 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -713,7 +713,7 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
 	set:
 		base = ((u64)high << 32) | low;
 		if (HYPERVISOR_set_segment_base(which, base) != 0)
-			ret = -EFAULT;
+			ret = -EIO;
 		break;
 #endif
 
-- 
cgit v1.2.3


From 8b956bf1f0f2b552ed93cf6cafe823edff298b3b Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 31 Aug 2009 14:13:48 -0700
Subject: x86, msr: Create _on_cpu helpers for {rw,wr}msr_safe_regs()

Create _on_cpu helpers for {rw,wr}msr_safe_regs() analogously with the
other MSR functions.  This will be necessary to add support for these
to the MSR driver.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Borislav Petkov <petkovbb@gmail.com>
---
 arch/x86/include/asm/msr.h | 18 +++++++++++++----
 arch/x86/lib/msr.c         | 49 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 943fdd572e1..8e56712aa17 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -97,8 +97,8 @@ notrace static inline int native_write_msr_safe(unsigned int msr,
 
 extern unsigned long long native_read_tsc(void);
 
-extern int native_rdmsr_safe_regs(u32 *regs);
-extern int native_wrmsr_safe_regs(u32 *regs);
+extern int native_rdmsr_safe_regs(u32 regs[8]);
+extern int native_wrmsr_safe_regs(u32 regs[8]);
 
 static __always_inline unsigned long long __native_read_tsc(void)
 {
@@ -196,12 +196,12 @@ static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
 	return native_wrmsr_safe_regs(gprs);
 }
 
-static inline int rdmsr_safe_regs(u32 *regs)
+static inline int rdmsr_safe_regs(u32 regs[8])
 {
 	return native_rdmsr_safe_regs(regs);
 }
 
-static inline int wrmsr_safe_regs(u32 *regs)
+static inline int wrmsr_safe_regs(u32 regs[8])
 {
 	return native_wrmsr_safe_regs(regs);
 }
@@ -245,6 +245,8 @@ void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs);
 void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs);
 int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
 int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
+int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]);
+int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]);
 #else  /*  CONFIG_SMP  */
 static inline int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
 {
@@ -275,6 +277,14 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
 {
 	return wrmsr_safe(msr_no, l, h);
 }
+static inline int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])
+{
+	return rdmsr_safe_regs(regs);
+}
+static inline int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])
+{
+	return wrmsr_safe_regs(regs);
+}
 #endif  /* CONFIG_SMP */
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c
index caa24aca811..33a1e3ca22d 100644
--- a/arch/x86/lib/msr.c
+++ b/arch/x86/lib/msr.c
@@ -175,3 +175,52 @@ int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
 	return err ? err : rv.err;
 }
 EXPORT_SYMBOL(wrmsr_safe_on_cpu);
+
+/*
+ * These variants are significantly slower, but allows control over
+ * the entire 32-bit GPR set.
+ */
+struct msr_regs_info {
+	u32 *regs;
+	int err;
+};
+
+static void __rdmsr_safe_regs_on_cpu(void *info)
+{
+	struct msr_regs_info *rv = info;
+
+	rv->err = rdmsr_safe_regs(rv->regs);
+}
+
+static void __wrmsr_safe_regs_on_cpu(void *info)
+{
+	struct msr_regs_info *rv = info;
+
+	rv->err = wrmsr_safe_regs(rv->regs);
+}
+
+int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
+{
+	int err;
+	struct msr_regs_info rv;
+
+	rv.regs   = regs;
+	rv.err    = -EIO;
+	err = smp_call_function_single(cpu, __rdmsr_safe_regs_on_cpu, &rv, 1);
+
+	return err ? err : rv.err;
+}
+EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu);
+
+int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
+{
+	int err;
+	struct msr_regs_info rv;
+
+	rv.regs = regs;
+	rv.err  = -EIO;
+	err = smp_call_function_single(cpu, __wrmsr_safe_regs_on_cpu, &rv, 1);
+
+	return err ? err : rv.err;
+}
+EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu);
-- 
cgit v1.2.3


From ff55df53dfdd338906c8ba9d1f4a759b86b869d5 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 31 Aug 2009 14:16:57 -0700
Subject: x86, msr: Export the register-setting MSR functions via /dev/*/msr

Make it possible to access the all-register-setting/getting MSR
functions via the MSR driver.  This is implemented as an ioctl() on
the standard MSR device node.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Borislav Petkov <petkovbb@gmail.com>
---
 arch/x86/include/asm/msr.h | 10 +++++++--
 arch/x86/kernel/msr.c      | 51 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 8e56712aa17..7e2b6ba962f 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -3,10 +3,16 @@
 
 #include <asm/msr-index.h>
 
-#ifdef __KERNEL__
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define X86_IOC_RDMSR_REGS	_IOWR('c', 0xA0, __u32[8])
+#define X86_IOC_WRMSR_REGS	_IOWR('c', 0xA1, __u32[8])
+
+#ifdef __KERNEL__
+
 #include <asm/asm.h>
 #include <asm/errno.h>
 #include <asm/cpumask.h>
@@ -286,6 +292,6 @@ static inline int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])
 	return wrmsr_safe_regs(regs);
 }
 #endif  /* CONFIG_SMP */
-#endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
+#endif /* __ASSEMBLY__ */
 #endif /* _ASM_X86_MSR_H */
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 2cfbb4b2c42..7dd95009417 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -1,6 +1,7 @@
 /* ----------------------------------------------------------------------- *
  *
  *   Copyright 2000-2008 H. Peter Anvin - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author: H. Peter Anvin
  *
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -121,6 +122,54 @@ static ssize_t msr_write(struct file *file, const char __user *buf,
 	return bytes ? bytes : err;
 }
 
+static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
+{
+	u32 __user *uregs = (u32 __user *)arg;
+	u32 regs[8];
+	int cpu = iminor(file->f_path.dentry->d_inode);
+	int err;
+
+	switch (ioc) {
+	case X86_IOC_RDMSR_REGS:
+		if (!(file->f_mode & FMODE_READ)) {
+			err = -EBADF;
+			break;
+		}
+		if (copy_from_user(&regs, uregs, sizeof regs)) {
+			err = -EFAULT;
+			break;
+		}
+		err = rdmsr_safe_regs_on_cpu(cpu, regs);
+		if (err)
+			break;
+		if (copy_to_user(uregs, &regs, sizeof regs))
+			err = -EFAULT;
+		break;
+
+	case X86_IOC_WRMSR_REGS:
+		if (!(file->f_mode & FMODE_WRITE)) {
+			err = -EBADF;
+			break;
+		}
+		if (copy_from_user(&regs, uregs, sizeof regs)) {
+			err = -EFAULT;
+			break;
+		}
+		err = wrmsr_safe_regs_on_cpu(cpu, regs);
+		if (err)
+			break;
+		if (copy_to_user(uregs, &regs, sizeof regs))
+			err = -EFAULT;
+		break;
+
+	default:
+		err = -ENOTTY;
+		break;
+	}
+
+	return err;
+}
+
 static int msr_open(struct inode *inode, struct file *file)
 {
 	unsigned int cpu = iminor(file->f_path.dentry->d_inode);
@@ -151,6 +200,8 @@ static const struct file_operations msr_fops = {
 	.read = msr_read,
 	.write = msr_write,
 	.open = msr_open,
+	.unlocked_ioctl = msr_ioctl,
+	.compat_ioctl = msr_ioctl,
 };
 
 static int __cpuinit msr_device_create(int cpu)
-- 
cgit v1.2.3


From 96910b6dc8a4fdb75e69f09f47b62d41743d36ba Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 31 Aug 2009 14:43:34 +0200
Subject: locking, m68k/asm-offsets: Rename signal defines

In order to be able to use asm-offsets.h in C files the
existing namespace conflicts must be solved first. In
asm-offsets.h there are defines for signal constants, so they
can be used in assembler files.

Unfortunately the existing defines use a 1:1 mapping for the
macro names which results in name space conflicts if the header
file would also be used in C files. So rename the created
defines and add an "L" prefix to each one since that has
already been done for the SIGTRAP define in entry_mm.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Horst Hartmann <horsth@linux.vnet.ibm.com>
Cc: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: David Miller <davem@davemloft.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Roman Zippel <zippel@linux-m68k.org>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <20090831124416.998821502@de.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/m68k/include/asm/entry_mm.h | 1 -
 arch/m68k/kernel/asm-offsets.c   | 8 ++++----
 arch/m68k/math-emu/fp_entry.S    | 8 ++++----
 3 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/include/asm/entry_mm.h b/arch/m68k/include/asm/entry_mm.h
index 4fcfc28c061..47412588621 100644
--- a/arch/m68k/include/asm/entry_mm.h
+++ b/arch/m68k/include/asm/entry_mm.h
@@ -46,7 +46,6 @@
 #define curptr a2
 
 LFLUSH_I_AND_D = 0x00000808
-LSIGTRAP = 5
 
 /* process bits for task_struct.ptrace */
 PT_TRACESYS_OFF = 3
diff --git a/arch/m68k/kernel/asm-offsets.c b/arch/m68k/kernel/asm-offsets.c
index 25d6b721522..73e5e581245 100644
--- a/arch/m68k/kernel/asm-offsets.c
+++ b/arch/m68k/kernel/asm-offsets.c
@@ -89,10 +89,10 @@ int main(void)
 	DEFINE(FONT_DESC_PREF, offsetof(struct font_desc, pref));
 
 	/* signal defines */
-	DEFINE(SIGSEGV, SIGSEGV);
-	DEFINE(SEGV_MAPERR, SEGV_MAPERR);
-	DEFINE(SIGTRAP, SIGTRAP);
-	DEFINE(TRAP_TRACE, TRAP_TRACE);
+	DEFINE(LSIGSEGV, SIGSEGV);
+	DEFINE(LSEGV_MAPERR, SEGV_MAPERR);
+	DEFINE(LSIGTRAP, SIGTRAP);
+	DEFINE(LTRAP_TRACE, TRAP_TRACE);
 
 	/* offsets into the custom struct */
 	DEFINE(CUSTOMBASE, &amiga_custom);
diff --git a/arch/m68k/math-emu/fp_entry.S b/arch/m68k/math-emu/fp_entry.S
index 916c6f3d1f0..a3fe1f348df 100644
--- a/arch/m68k/math-emu/fp_entry.S
+++ b/arch/m68k/math-emu/fp_entry.S
@@ -85,8 +85,8 @@ fp_err_ua2:
 fp_err_ua1:
 	addq.l	#4,%sp
 	move.l	%a0,-(%sp)
-	pea	SEGV_MAPERR
-	pea	SIGSEGV
+	pea	LSEGV_MAPERR
+	pea	LSIGSEGV
 	jsr	fpemu_signal
 	add.w	#12,%sp
 	jra	ret_from_exception
@@ -96,8 +96,8 @@ fp_err_ua1:
 	| it does not really belong here, but...
 fp_sendtrace060:
 	move.l	(FPS_PC,%sp),-(%sp)
-	pea	TRAP_TRACE
-	pea	SIGTRAP
+	pea	LTRAP_TRACE
+	pea	LSIGTRAP
 	jsr	fpemu_signal
 	add.w	#12,%sp
 	jra	ret_from_exception
-- 
cgit v1.2.3


From acde31dc467797ccae3a55b791a77af446cce018 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 27 Aug 2009 14:29:20 +0100
Subject: kmemleak: Ignore the aperture memory hole on x86_64

This block is allocated with alloc_bootmem() and scanned by kmemleak but
the kernel direct mapping may no longer exist. This patch tells kmemleak
to ignore this memory hole. The dma32_bootmem_ptr in
dma32_reserve_bootmem() is also ignored.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/aperture_64.c | 6 ++++++
 arch/x86/kernel/pci-dma.c     | 6 ++++++
 2 files changed, 12 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 676debfc170..128111d8ffe 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -20,6 +20,7 @@
 #include <linux/bitops.h>
 #include <linux/ioport.h>
 #include <linux/suspend.h>
+#include <linux/kmemleak.h>
 #include <asm/e820.h>
 #include <asm/io.h>
 #include <asm/iommu.h>
@@ -94,6 +95,11 @@ static u32 __init allocate_aperture(void)
 	 * code for safe
 	 */
 	p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20);
+	/*
+	 * Kmemleak should not scan this block as it may not be mapped via the
+	 * kernel direct mapping.
+	 */
+	kmemleak_ignore(p);
 	if (!p || __pa(p)+aper_size > 0xffffffff) {
 		printk(KERN_ERR
 			"Cannot allocate aperture memory hole (%p,%uK)\n",
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 1a041bcf506..fa80f60e960 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -3,6 +3,7 @@
 #include <linux/dmar.h>
 #include <linux/bootmem.h>
 #include <linux/pci.h>
+#include <linux/kmemleak.h>
 
 #include <asm/proto.h>
 #include <asm/dma.h>
@@ -88,6 +89,11 @@ void __init dma32_reserve_bootmem(void)
 	size = roundup(dma32_bootmem_size, align);
 	dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
 				 512ULL<<20);
+	/*
+	 * Kmemleak should not scan this block as it may not be mapped via the
+	 * kernel direct mapping.
+	 */
+	kmemleak_ignore(dma32_bootmem_ptr);
 	if (dma32_bootmem_ptr)
 		dma32_bootmem_size = size;
 	else
-- 
cgit v1.2.3


From f6909f394c2d4a0a71320797df72d54c49c5927e Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Tue, 1 Sep 2009 13:31:52 -0700
Subject: x86, msr: fix msr-reg.S compilation with gas 2.16.1

msr-reg.S used the :req option on a macro argument, which wasn't
supported by gas 2.16.1 (but apparently by some earlier versions of
gas, just to be confusing.)  It isn't necessary, so just remove it.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Borislav Petkov <petkovbb@googlemail.com>
---
 arch/x86/lib/msr-reg.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
index 9e8cdcf5d73..d5eaf53aa67 100644
--- a/arch/x86/lib/msr-reg.S
+++ b/arch/x86/lib/msr-reg.S
@@ -11,7 +11,7 @@
  * reg layout: u32 gprs[eax, ecx, edx, ebx, esp, ebp, esi, edi]
  *
  */
-.macro op_safe_regs op:req
+.macro op_safe_regs op
 ENTRY(native_\op\()_safe_regs)
 	CFI_STARTPROC
 	pushq_cfi %rbx
-- 
cgit v1.2.3


From e7a088f935180b90cfe6ab0aaae8a556f46885fe Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 1 Sep 2009 17:54:07 -0700
Subject: sparc: convert /proc/io_map, /proc/dvma_map to seq_file

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/ioport.c | 32 ++++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index 87ea0d03d97..e71ce79d8c1 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -35,6 +35,7 @@
 #include <linux/slab.h>
 #include <linux/pci.h>		/* struct pci_dev */
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/scatterlist.h>
 #include <linux/of_device.h>
 
@@ -663,26 +664,33 @@ EXPORT_SYMBOL(pci_dma_sync_sg_for_device);
 
 #ifdef CONFIG_PROC_FS
 
-static int
-_sparc_io_get_info(char *buf, char **start, off_t fpos, int length, int *eof,
-    void *data)
+static int sparc_io_proc_show(struct seq_file *m, void *v)
 {
-	char *p = buf, *e = buf + length;
-	struct resource *r;
+	struct resource *root = m->private, *r;
 	const char *nm;
 
-	for (r = ((struct resource *)data)->child; r != NULL; r = r->sibling) {
-		if (p + 32 >= e)	/* Better than nothing */
-			break;
+	for (r = root->child; r != NULL; r = r->sibling) {
 		if ((nm = r->name) == 0) nm = "???";
-		p += sprintf(p, "%016llx-%016llx: %s\n",
+		seq_printf(m, "%016llx-%016llx: %s\n",
 				(unsigned long long)r->start,
 				(unsigned long long)r->end, nm);
 	}
 
-	return p-buf;
+	return 0;
 }
 
+static int sparc_io_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, sparc_io_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations sparc_io_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= sparc_io_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
 #endif /* CONFIG_PROC_FS */
 
 /*
@@ -707,7 +715,7 @@ static struct resource *_sparc_find_resource(struct resource *root,
 static void register_proc_sparc_ioport(void)
 {
 #ifdef CONFIG_PROC_FS
-	create_proc_read_entry("io_map",0,NULL,_sparc_io_get_info,&sparc_iomap);
-	create_proc_read_entry("dvma_map",0,NULL,_sparc_io_get_info,&_sparc_dvma);
+	proc_create_data("io_map", 0, NULL, &sparc_io_proc_fops, &sparc_iomap);
+	proc_create_data("dvma_map", 0, NULL, &sparc_io_proc_fops, &_sparc_dvma);
 #endif
 }
-- 
cgit v1.2.3


From ae4b688db2432baad379f73fdcac13ec24f603d5 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Mon, 31 Aug 2009 13:11:54 +0800
Subject: x86: Move kernel_fpu_using to irq_fpu_usable in asm/i387.h

This function measures whether the FPU/SSE state can be touched in
interrupt context. If the interrupted code is in user space or has no
valid FPU/SSE context (CR0.TS == 1), FPU/SSE state can be used in IRQ
or soft_irq context too.

This is used by AES-NI accelerated AES implementation and PCLMULQDQ
accelerated GHASH implementation.

v3:
 - Renamed to irq_fpu_usable to reflect the purpose of the function.

v2:
 - Renamed to irq_is_fpu_using to reflect the real situation.

Signed-off-by: Huang Ying <ying.huang@intel.com>
CC: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/crypto/aesni-intel_glue.c | 17 +++++------------
 arch/x86/include/asm/i387.h        |  8 ++++++++
 2 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index c580c5ec1ca..7667235ade1 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -59,13 +59,6 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
 asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
 			      const u8 *in, unsigned int len, u8 *iv);
 
-static inline int kernel_fpu_using(void)
-{
-	if (in_interrupt() && !(read_cr0() & X86_CR0_TS))
-		return 1;
-	return 0;
-}
-
 static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
 {
 	unsigned long addr = (unsigned long)raw_ctx;
@@ -89,7 +82,7 @@ static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx,
 		return -EINVAL;
 	}
 
-	if (kernel_fpu_using())
+	if (irq_fpu_usable())
 		err = crypto_aes_expand_key(ctx, in_key, key_len);
 	else {
 		kernel_fpu_begin();
@@ -110,7 +103,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
 
-	if (kernel_fpu_using())
+	if (irq_fpu_usable())
 		crypto_aes_encrypt_x86(ctx, dst, src);
 	else {
 		kernel_fpu_begin();
@@ -123,7 +116,7 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
 
-	if (kernel_fpu_using())
+	if (irq_fpu_usable())
 		crypto_aes_decrypt_x86(ctx, dst, src);
 	else {
 		kernel_fpu_begin();
@@ -349,7 +342,7 @@ static int ablk_encrypt(struct ablkcipher_request *req)
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
 	struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
 
-	if (kernel_fpu_using()) {
+	if (irq_fpu_usable()) {
 		struct ablkcipher_request *cryptd_req =
 			ablkcipher_request_ctx(req);
 		memcpy(cryptd_req, req, sizeof(*req));
@@ -370,7 +363,7 @@ static int ablk_decrypt(struct ablkcipher_request *req)
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
 	struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
 
-	if (kernel_fpu_using()) {
+	if (irq_fpu_usable()) {
 		struct ablkcipher_request *cryptd_req =
 			ablkcipher_request_ctx(req);
 		memcpy(cryptd_req, req, sizeof(*req));
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 175adf58dd4..fb7f0d64e14 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -301,6 +301,14 @@ static inline void kernel_fpu_end(void)
 	preempt_enable();
 }
 
+static inline bool irq_fpu_usable(void)
+{
+	struct pt_regs *regs;
+
+	return !in_interrupt() || !(regs = get_irq_regs()) || \
+		user_mode(regs) || (read_cr0() & X86_CR0_TS);
+}
+
 /*
  * Some instructions like VIA's padlock instructions generate a spurious
  * DNA fault but don't modify SSE registers. And these instructions
-- 
cgit v1.2.3


From 0fc0b732eaa38beb93a6fb62f77c7bd9622c76ec Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Wed, 2 Sep 2009 01:03:33 -0700
Subject: netdev: drivers should make ethtool_ops const

No need to put ethtool_ops in data, they should be const.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/um/drivers/net_kern.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 4c75409bc09..f114813ae25 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -285,7 +285,7 @@ static void uml_net_get_drvinfo(struct net_device *dev,
 	strcpy(info->version, "42");
 }
 
-static struct ethtool_ops uml_net_ethtool_ops = {
+static const struct ethtool_ops uml_net_ethtool_ops = {
 	.get_drvinfo	= uml_net_get_drvinfo,
 	.get_link	= ethtool_op_get_link,
 };
-- 
cgit v1.2.3


From d0420c83f39f79afb82010c2d2cafd150eef651b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 2 Sep 2009 09:14:16 +0100
Subject: KEYS: Extend TIF_NOTIFY_RESUME to (almost) all architectures [try #6]

Implement TIF_NOTIFY_RESUME for most of those architectures in which isn't yet
available, and, whilst we're at it, have it call the appropriate tracehook.

After this patch, blackfin, m68k* and xtensa still lack support and need
alteration of assembly code to make it work.

Resume notification can then be used (by a later patch) to install a new
session keyring on the parent of a process.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>

cc: linux-arch@vger.kernel.org
Signed-off-by: James Morris <jmorris@namei.org>
---
 arch/alpha/include/asm/thread_info.h  | 5 ++++-
 arch/alpha/kernel/signal.c            | 5 +++++
 arch/arm/include/asm/thread_info.h    | 3 +++
 arch/arm/kernel/entry-common.S        | 2 +-
 arch/arm/kernel/signal.c              | 5 +++++
 arch/avr32/include/asm/thread_info.h  | 6 +++++-
 arch/avr32/kernel/entry-avr32b.S      | 2 +-
 arch/avr32/kernel/signal.c            | 5 +++++
 arch/cris/kernel/ptrace.c             | 5 +++++
 arch/h8300/include/asm/thread_info.h  | 2 ++
 arch/h8300/kernel/signal.c            | 5 +++++
 arch/m32r/include/asm/thread_info.h   | 2 ++
 arch/m32r/kernel/signal.c             | 5 +++++
 arch/mips/include/asm/thread_info.h   | 2 ++
 arch/mips/kernel/signal.c             | 5 +++++
 arch/parisc/include/asm/thread_info.h | 4 +++-
 arch/parisc/kernel/entry.S            | 2 +-
 arch/parisc/kernel/signal.c           | 5 +++++
 18 files changed, 64 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
index 60c83abfde7..5076a8860b1 100644
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h
@@ -75,6 +75,7 @@ register struct thread_info *__current_thread_info __asm__("$8");
 #define TIF_UAC_SIGBUS		7
 #define TIF_MEMDIE		8
 #define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal */
+#define TIF_NOTIFY_RESUME	10	/* callback before returning to user */
 #define TIF_FREEZE		16	/* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
@@ -82,10 +83,12 @@ register struct thread_info *__current_thread_info __asm__("$8");
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
+#define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
 #define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 /* Work to do on interrupt/exception return.  */
-#define _TIF_WORK_MASK		(_TIF_SIGPENDING | _TIF_NEED_RESCHED)
+#define _TIF_WORK_MASK		(_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
+				 _TIF_NOTIFY_RESUME)
 
 /* Work to do on any return to userspace.  */
 #define _TIF_ALLWORK_MASK	(_TIF_WORK_MASK		\
diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index df65eaa84c4..04e17c1f0f1 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -683,4 +683,9 @@ do_notify_resume(struct pt_regs *regs, struct switch_stack *sw,
 {
 	if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
 		do_signal(regs, sw, r0, r19);
+
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+	}
 }
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 73394e50cbc..d3a39b1e6c0 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -130,11 +130,13 @@ extern void vfp_sync_state(struct thread_info *thread);
  *  TIF_SYSCALL_TRACE	- syscall trace active
  *  TIF_SIGPENDING	- signal pending
  *  TIF_NEED_RESCHED	- rescheduling necessary
+ *  TIF_NOTIFY_RESUME	- callback before returning to user
  *  TIF_USEDFPU		- FPU was used by this task this quantum (SMP)
  *  TIF_POLLING_NRFLAG	- true if poll_idle() is polling TIF_NEED_RESCHED
  */
 #define TIF_SIGPENDING		0
 #define TIF_NEED_RESCHED	1
+#define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
 #define TIF_SYSCALL_TRACE	8
 #define TIF_POLLING_NRFLAG	16
 #define TIF_USING_IWMMXT	17
@@ -143,6 +145,7 @@ extern void vfp_sync_state(struct thread_info *thread);
 
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
+#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
 #define _TIF_USING_IWMMXT	(1 << TIF_USING_IWMMXT)
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 8c3de1a350b..7813ab782fd 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -51,7 +51,7 @@ fast_work_pending:
 work_pending:
 	tst	r1, #_TIF_NEED_RESCHED
 	bne	work_resched
-	tst	r1, #_TIF_SIGPENDING
+	tst	r1, #_TIF_SIGPENDING|_TIF_NOTIFY_RESUME
 	beq	no_work_pending
 	mov	r0, sp				@ 'regs'
 	mov	r2, why				@ 'syscall'
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index f6bc5d44278..13dec276927 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -707,4 +707,9 @@ do_notify_resume(struct pt_regs *regs, unsigned int thread_flags, int syscall)
 {
 	if (thread_flags & _TIF_SIGPENDING)
 		do_signal(&current->blocked, regs, syscall);
+
+	if (thread_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+	}
 }
diff --git a/arch/avr32/include/asm/thread_info.h b/arch/avr32/include/asm/thread_info.h
index fc42de5ca20..fd0c5d7e933 100644
--- a/arch/avr32/include/asm/thread_info.h
+++ b/arch/avr32/include/asm/thread_info.h
@@ -84,6 +84,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_MEMDIE		6
 #define TIF_RESTORE_SIGMASK	7	/* restore signal mask in do_signal */
 #define TIF_CPU_GOING_TO_SLEEP	8	/* CPU is entering sleep 0 mode */
+#define TIF_NOTIFY_RESUME	9	/* callback before returning to user */
 #define TIF_FREEZE		29
 #define TIF_DEBUG		30	/* debugging enabled */
 #define TIF_USERSPACE		31      /* true if FS sets userspace */
@@ -96,6 +97,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_MEMDIE		(1 << TIF_MEMDIE)
 #define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
 #define _TIF_CPU_GOING_TO_SLEEP (1 << TIF_CPU_GOING_TO_SLEEP)
+#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_FREEZE		(1 << TIF_FREEZE)
 
 /* Note: The masks below must never span more than 16 bits! */
@@ -103,13 +105,15 @@ static inline struct thread_info *current_thread_info(void)
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK				\
 	((1 << TIF_SIGPENDING)			\
+	 | _TIF_NOTIFY_RESUME			\
 	 | (1 << TIF_NEED_RESCHED)		\
 	 | (1 << TIF_POLLING_NRFLAG)		\
 	 | (1 << TIF_BREAKPOINT)		\
 	 | (1 << TIF_RESTORE_SIGMASK))
 
 /* work to do on any return to userspace */
-#define _TIF_ALLWORK_MASK	(_TIF_WORK_MASK | (1 << TIF_SYSCALL_TRACE))
+#define _TIF_ALLWORK_MASK	(_TIF_WORK_MASK | (1 << TIF_SYSCALL_TRACE) | \
+				 _TIF_NOTIFY_RESUME)
 /* work to do on return from debug mode */
 #define _TIF_DBGWORK_MASK	(_TIF_WORK_MASK & ~(1 << TIF_BREAKPOINT))
 
diff --git a/arch/avr32/kernel/entry-avr32b.S b/arch/avr32/kernel/entry-avr32b.S
index 009a80155d6..169268c40ae 100644
--- a/arch/avr32/kernel/entry-avr32b.S
+++ b/arch/avr32/kernel/entry-avr32b.S
@@ -281,7 +281,7 @@ syscall_exit_work:
 	ld.w	r1, r0[TI_flags]
 	rjmp	1b
 
-2:	mov	r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK
+2:	mov	r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NOTIFY_RESUME
 	tst	r1, r2
 	breq	3f
 	unmask_interrupts
diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c
index 27227561bad..62d242e2d03 100644
--- a/arch/avr32/kernel/signal.c
+++ b/arch/avr32/kernel/signal.c
@@ -322,4 +322,9 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, struct thread_info *ti)
 
 	if (ti->flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
 		do_signal(regs, &current->blocked, syscall);
+
+	if (ti->flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+	}
 }
diff --git a/arch/cris/kernel/ptrace.c b/arch/cris/kernel/ptrace.c
index b326023baab..4f06d7fb43d 100644
--- a/arch/cris/kernel/ptrace.c
+++ b/arch/cris/kernel/ptrace.c
@@ -36,4 +36,9 @@ void do_notify_resume(int canrestart, struct pt_regs *regs,
 	/* deal with pending signal delivery */
 	if (thread_info_flags & _TIF_SIGPENDING)
 		do_signal(canrestart,regs);
+
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+	}
 }
diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h
index 8bbc8b0ee45..70e67e47d02 100644
--- a/arch/h8300/include/asm/thread_info.h
+++ b/arch/h8300/include/asm/thread_info.h
@@ -89,6 +89,7 @@ static inline struct thread_info *current_thread_info(void)
 					   TIF_NEED_RESCHED */
 #define TIF_MEMDIE		4
 #define TIF_RESTORE_SIGMASK	5	/* restore signal mask in do_signal() */
+#define TIF_NOTIFY_RESUME	6	/* callback before returning to user */
 #define TIF_FREEZE		16	/* is freezing for suspend */
 
 /* as above, but as bit values */
@@ -97,6 +98,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
+#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #define _TIF_WORK_MASK		0x0000FFFE	/* work to do on interrupt/exception return */
diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c
index cf3472f7389..56b3ab7dbbb 100644
--- a/arch/h8300/kernel/signal.c
+++ b/arch/h8300/kernel/signal.c
@@ -552,4 +552,9 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags)
 {
 	if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
 		do_signal(regs, NULL);
+
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+	}
 }
diff --git a/arch/m32r/include/asm/thread_info.h b/arch/m32r/include/asm/thread_info.h
index 07bb5bd00e2..71578151a40 100644
--- a/arch/m32r/include/asm/thread_info.h
+++ b/arch/m32r/include/asm/thread_info.h
@@ -149,6 +149,7 @@ static inline unsigned int get_thread_fault_code(void)
 #define TIF_NEED_RESCHED	2	/* rescheduling necessary */
 #define TIF_SINGLESTEP		3	/* restore singlestep on return to user mode */
 #define TIF_IRET		4	/* return with iret */
+#define TIF_NOTIFY_RESUME	5	/* callback before returning to user */
 #define TIF_RESTORE_SIGMASK	8	/* restore signal mask in do_signal() */
 #define TIF_USEDFPU		16	/* FPU was used by this task this quantum (SMP) */
 #define TIF_POLLING_NRFLAG	17	/* true if poll_idle() is polling TIF_NEED_RESCHED */
@@ -160,6 +161,7 @@ static inline unsigned int get_thread_fault_code(void)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
 #define _TIF_SINGLESTEP		(1<<TIF_SINGLESTEP)
 #define _TIF_IRET		(1<<TIF_IRET)
+#define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
 #define _TIF_USEDFPU		(1<<TIF_USEDFPU)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c
index 18124542a6e..3220258be18 100644
--- a/arch/m32r/kernel/signal.c
+++ b/arch/m32r/kernel/signal.c
@@ -408,5 +408,10 @@ void do_notify_resume(struct pt_regs *regs, sigset_t *oldset,
 	if (thread_info_flags & _TIF_SIGPENDING)
 		do_signal(regs,oldset);
 
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+	}
+
 	clear_thread_flag(TIF_IRET);
 }
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index f9df720d2e4..01cc1630b66 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -115,6 +115,7 @@ register struct thread_info *__current_thread_info __asm__("$28");
 #define TIF_NEED_RESCHED	2	/* rescheduling necessary */
 #define TIF_SYSCALL_AUDIT	3	/* syscall auditing active */
 #define TIF_SECCOMP		4	/* secure computing */
+#define TIF_NOTIFY_RESUME	5	/* callback before returning to user */
 #define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal() */
 #define TIF_USEDFPU		16	/* FPU was used by this task this quantum (SMP) */
 #define TIF_POLLING_NRFLAG	17	/* true if poll_idle() is polling TIF_NEED_RESCHED */
@@ -139,6 +140,7 @@ register struct thread_info *__current_thread_info __asm__("$28");
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
 #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1<<TIF_SECCOMP)
+#define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
 #define _TIF_USEDFPU		(1<<TIF_USEDFPU)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 830c5ef9932..a3d1015471d 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -700,4 +700,9 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused,
 	/* deal with pending signal delivery */
 	if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
 		do_signal(regs);
+
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+	}
 }
diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h
index 4ce0edfbe96..ac775a76bff 100644
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -59,6 +59,7 @@ struct thread_info {
 #define TIF_MEMDIE		5
 #define TIF_RESTORE_SIGMASK	6	/* restore saved signal mask */
 #define TIF_FREEZE		7	/* is freezing for suspend */
+#define TIF_NOTIFY_RESUME	8	/* callback before returning to user */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
@@ -67,8 +68,9 @@ struct thread_info {
 #define _TIF_32BIT		(1 << TIF_32BIT)
 #define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
 #define _TIF_FREEZE		(1 << TIF_FREEZE)
+#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 
-#define _TIF_USER_WORK_MASK     (_TIF_SIGPENDING | \
+#define _TIF_USER_WORK_MASK     (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \
                                  _TIF_NEED_RESCHED | _TIF_RESTORE_SIGMASK)
 
 #endif /* __KERNEL__ */
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index e552e547cb9..8c4712b74dc 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -948,7 +948,7 @@ intr_check_sig:
 	/* As above */
 	mfctl   %cr30,%r1
 	LDREG	TI_FLAGS(%r1),%r19
-	ldi	(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK), %r20
+	ldi	(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NOTIFY_RESUME), %r20
 	and,COND(<>)	%r19, %r20, %r0
 	b,n	intr_restore	/* skip past if we've nothing to do */
 
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index f82544225e8..b3bfc432670 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -645,4 +645,9 @@ void do_notify_resume(struct pt_regs *regs, long in_syscall)
 	if (test_thread_flag(TIF_SIGPENDING) ||
 	    test_thread_flag(TIF_RESTORE_SIGMASK))
 		do_signal(regs, in_syscall);
+
+	if (test_thread_flag(TIF_NOTIFY_RESUME)) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+	}
 }
-- 
cgit v1.2.3


From ee18d64c1f632043a02e6f5ba5e045bb26a5465f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 2 Sep 2009 09:14:21 +0100
Subject: KEYS: Add a keyctl to install a process's session keyring on its
 parent [try #6]

Add a keyctl to install a process's session keyring onto its parent.  This
replaces the parent's session keyring.  Because the COW credential code does
not permit one process to change another process's credentials directly, the
change is deferred until userspace next starts executing again.  Normally this
will be after a wait*() syscall.

To support this, three new security hooks have been provided:
cred_alloc_blank() to allocate unset security creds, cred_transfer() to fill in
the blank security creds and key_session_to_parent() - which asks the LSM if
the process may replace its parent's session keyring.

The replacement may only happen if the process has the same ownership details
as its parent, and the process has LINK permission on the session keyring, and
the session keyring is owned by the process, and the LSM permits it.

Note that this requires alteration to each architecture's notify_resume path.
This has been done for all arches barring blackfin, m68k* and xtensa, all of
which need assembly alteration to support TIF_NOTIFY_RESUME.  This allows the
replacement to be performed at the point the parent process resumes userspace
execution.

This allows the userspace AFS pioctl emulation to fully emulate newpag() and
the VIOCSETTOK and VIOCSETTOK2 pioctls, all of which require the ability to
alter the parent process's PAG membership.  However, since kAFS doesn't use
PAGs per se, but rather dumps the keys into the session keyring, the session
keyring of the parent must be replaced if, for example, VIOCSETTOK is passed
the newpag flag.

This can be tested with the following program:

	#include <stdio.h>
	#include <stdlib.h>
	#include <keyutils.h>

	#define KEYCTL_SESSION_TO_PARENT	18

	#define OSERROR(X, S) do { if ((long)(X) == -1) { perror(S); exit(1); } } while(0)

	int main(int argc, char **argv)
	{
		key_serial_t keyring, key;
		long ret;

		keyring = keyctl_join_session_keyring(argv[1]);
		OSERROR(keyring, "keyctl_join_session_keyring");

		key = add_key("user", "a", "b", 1, keyring);
		OSERROR(key, "add_key");

		ret = keyctl(KEYCTL_SESSION_TO_PARENT);
		OSERROR(ret, "KEYCTL_SESSION_TO_PARENT");

		return 0;
	}

Compiled and linked with -lkeyutils, you should see something like:

	[dhowells@andromeda ~]$ keyctl show
	Session Keyring
	       -3 --alswrv   4043  4043  keyring: _ses
	355907932 --alswrv   4043    -1   \_ keyring: _uid.4043
	[dhowells@andromeda ~]$ /tmp/newpag
	[dhowells@andromeda ~]$ keyctl show
	Session Keyring
	       -3 --alswrv   4043  4043  keyring: _ses
	1055658746 --alswrv   4043  4043   \_ user: a
	[dhowells@andromeda ~]$ /tmp/newpag hello
	[dhowells@andromeda ~]$ keyctl show
	Session Keyring
	       -3 --alswrv   4043  4043  keyring: hello
	340417692 --alswrv   4043  4043   \_ user: a

Where the test program creates a new session keyring, sticks a user key named
'a' into it and then installs it on its parent.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 arch/alpha/kernel/signal.c    | 2 ++
 arch/arm/kernel/signal.c      | 2 ++
 arch/avr32/kernel/signal.c    | 2 ++
 arch/cris/kernel/ptrace.c     | 2 ++
 arch/frv/kernel/signal.c      | 2 ++
 arch/h8300/kernel/signal.c    | 2 ++
 arch/ia64/kernel/process.c    | 2 ++
 arch/m32r/kernel/signal.c     | 2 ++
 arch/mips/kernel/signal.c     | 2 ++
 arch/mn10300/kernel/signal.c  | 2 ++
 arch/parisc/kernel/signal.c   | 2 ++
 arch/s390/kernel/signal.c     | 2 ++
 arch/sh/kernel/signal_32.c    | 2 ++
 arch/sh/kernel/signal_64.c    | 2 ++
 arch/sparc/kernel/signal_32.c | 2 ++
 arch/sparc/kernel/signal_64.c | 3 +++
 arch/x86/kernel/signal.c      | 2 ++
 17 files changed, 35 insertions(+)

(limited to 'arch')

diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index 04e17c1f0f1..d91aaa74705 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -687,5 +687,7 @@ do_notify_resume(struct pt_regs *regs, struct switch_stack *sw,
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 13dec276927..ea4ad3a43c8 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -711,5 +711,7 @@ do_notify_resume(struct pt_regs *regs, unsigned int thread_flags, int syscall)
 	if (thread_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c
index 62d242e2d03..de9f7fe2aef 100644
--- a/arch/avr32/kernel/signal.c
+++ b/arch/avr32/kernel/signal.c
@@ -326,5 +326,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, struct thread_info *ti)
 	if (ti->flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
diff --git a/arch/cris/kernel/ptrace.c b/arch/cris/kernel/ptrace.c
index 4f06d7fb43d..32e9d5ee895 100644
--- a/arch/cris/kernel/ptrace.c
+++ b/arch/cris/kernel/ptrace.c
@@ -40,5 +40,7 @@ void do_notify_resume(int canrestart, struct pt_regs *regs,
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c
index 4a7a62c6e78..6b0a2b6fed6 100644
--- a/arch/frv/kernel/signal.c
+++ b/arch/frv/kernel/signal.c
@@ -572,6 +572,8 @@ asmlinkage void do_notify_resume(__u32 thread_info_flags)
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(__frame);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 
 } /* end do_notify_resume() */
diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c
index 56b3ab7dbbb..abac3ee8c52 100644
--- a/arch/h8300/kernel/signal.c
+++ b/arch/h8300/kernel/signal.c
@@ -556,5 +556,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags)
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 5d7c0e5b9e7..89969e95004 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -192,6 +192,8 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall)
 	if (test_thread_flag(TIF_NOTIFY_RESUME)) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(&scr->pt);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 
 	/* copy user rbs to kernel rbs */
diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c
index 3220258be18..f80bac17c65 100644
--- a/arch/m32r/kernel/signal.c
+++ b/arch/m32r/kernel/signal.c
@@ -411,6 +411,8 @@ void do_notify_resume(struct pt_regs *regs, sigset_t *oldset,
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 
 	clear_thread_flag(TIF_IRET);
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index a3d1015471d..c2acf31874a 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -704,5 +704,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused,
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c
index feb2f2e810d..a21f43bc68e 100644
--- a/arch/mn10300/kernel/signal.c
+++ b/arch/mn10300/kernel/signal.c
@@ -568,5 +568,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags)
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(__frame);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index b3bfc432670..5ca1c02b805 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -649,5 +649,7 @@ void do_notify_resume(struct pt_regs *regs, long in_syscall)
 	if (test_thread_flag(TIF_NOTIFY_RESUME)) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 062bd64e65f..6b4fef877f9 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -536,4 +536,6 @@ void do_notify_resume(struct pt_regs *regs)
 {
 	clear_thread_flag(TIF_NOTIFY_RESUME);
 	tracehook_notify_resume(regs);
+	if (current->replacement_session_keyring)
+		key_replace_session_keyring();
 }
diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index b5afbec1db5..04a21883f32 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -640,5 +640,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned int save_r0,
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c
index 0663a0ee602..9e5c9b1d7e9 100644
--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c
@@ -772,5 +772,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned long thread_info
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index 181d069a2d4..7ce1a1005b1 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -590,6 +590,8 @@ void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0,
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
 
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index ec82d76dc6f..647afbda7ae 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -613,5 +613,8 @@ void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
+
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 4c578751e94..81e58238c4c 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -869,6 +869,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 
 #ifdef CONFIG_X86_32
-- 
cgit v1.2.3


From e3e59876e82a5e1a07f365d5474e7b6943524725 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 3 Sep 2009 14:02:10 +0200
Subject: x86/amd-iommu: Dump fault entry on DTE error

This patch adds code to dump the content of the device table
entry which caused an ILLEGAL_DEV_TABLE_ENTRY error from the
IOMMU hardware.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 6c99f503780..364c6de2637 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -138,6 +138,15 @@ static int iommu_has_npcache(struct amd_iommu *iommu)
  *
  ****************************************************************************/
 
+static void dump_dte_entry(u16 devid)
+{
+	int i;
+
+	for (i = 0; i < 8; ++i)
+		pr_err("AMD-Vi: DTE[%d]: %08x\n", i,
+			amd_iommu_dev_table[devid].data[i]);
+}
+
 static void iommu_print_event(void *__evt)
 {
 	u32 *event = __evt;
@@ -155,6 +164,7 @@ static void iommu_print_event(void *__evt)
 		       "address=0x%016llx flags=0x%04x]\n",
 		       PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 		       address, flags);
+		dump_dte_entry(devid);
 		break;
 	case EVENT_TYPE_IO_FAULT:
 		printk("IO_PAGE_FAULT device=%02x:%02x.%x "
-- 
cgit v1.2.3


From 945b4ac44e5700acd3d974c176c8ace34b4d2e8e Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 3 Sep 2009 14:25:02 +0200
Subject: x86/amd-iommu: Dump illegal command on ILLEGAL_COMMAND_ERROR

This patch adds code to dump the command which caused an
ILLEGAL_COMMAND_ERROR raised by the IOMMU hardware.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 364c6de2637..e62b35f5df1 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -147,6 +147,15 @@ static void dump_dte_entry(u16 devid)
 			amd_iommu_dev_table[devid].data[i]);
 }
 
+static void dump_command(unsigned long phys_addr)
+{
+	struct iommu_cmd *cmd = phys_to_virt(phys_addr);
+	int i;
+
+	for (i = 0; i < 4; ++i)
+		pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
+}
+
 static void iommu_print_event(void *__evt)
 {
 	u32 *event = __evt;
@@ -186,6 +195,7 @@ static void iommu_print_event(void *__evt)
 		break;
 	case EVENT_TYPE_ILL_CMD:
 		printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
+		dump_command(address);
 		break;
 	case EVENT_TYPE_CMD_HARD_ERR:
 		printk("COMMAND_HARDWARE_ERROR address=0x%016llx "
-- 
cgit v1.2.3


From e394d72aa8b319211b8f947d151d9d50b0fde842 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 3 Sep 2009 15:28:33 +0200
Subject: x86/amd-iommu: Introduce function for iommu-local domain flush

This patch introduces a function to flush all domain tlbs
for on one given IOMMU. This is required later to reset the
command buffer on one IOMMU.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 49 ++++++++++++++++++++++++++++++---------------
 1 file changed, 33 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index e62b35f5df1..64cc582feb9 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -465,38 +465,55 @@ static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid)
 }
 
 /*
- * This function is used to flush the IO/TLB for a given protection domain
- * on every IOMMU in the system
+ * This function flushes one domain on one IOMMU
  */
-static void iommu_flush_domain(u16 domid)
+static void flush_domain_on_iommu(struct amd_iommu *iommu, u16 domid)
 {
-	unsigned long flags;
-	struct amd_iommu *iommu;
 	struct iommu_cmd cmd;
-
-	INC_STATS_COUNTER(domain_flush_all);
+	unsigned long flags;
 
 	__iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
 				      domid, 1, 1);
 
-	for_each_iommu(iommu) {
-		spin_lock_irqsave(&iommu->lock, flags);
-		__iommu_queue_command(iommu, &cmd);
-		__iommu_completion_wait(iommu);
-		__iommu_wait_for_completion(iommu);
-		spin_unlock_irqrestore(&iommu->lock, flags);
-	}
+	spin_lock_irqsave(&iommu->lock, flags);
+	__iommu_queue_command(iommu, &cmd);
+	__iommu_completion_wait(iommu);
+	__iommu_wait_for_completion(iommu);
+	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
-void amd_iommu_flush_all_domains(void)
+static void flush_all_domains_on_iommu(struct amd_iommu *iommu)
 {
 	int i;
 
 	for (i = 1; i < MAX_DOMAIN_ID; ++i) {
 		if (!test_bit(i, amd_iommu_pd_alloc_bitmap))
 			continue;
-		iommu_flush_domain(i);
+		flush_domain_on_iommu(iommu, i);
 	}
+
+}
+
+/*
+ * This function is used to flush the IO/TLB for a given protection domain
+ * on every IOMMU in the system
+ */
+static void iommu_flush_domain(u16 domid)
+{
+	struct amd_iommu *iommu;
+
+	INC_STATS_COUNTER(domain_flush_all);
+
+	for_each_iommu(iommu)
+		flush_domain_on_iommu(iommu, domid);
+}
+
+void amd_iommu_flush_all_domains(void)
+{
+	struct amd_iommu *iommu;
+
+	for_each_iommu(iommu)
+		flush_all_domains_on_iommu(iommu);
 }
 
 void amd_iommu_flush_all_devices(void)
-- 
cgit v1.2.3


From f2430bd104bec2706315e9e983a9d9f828ff9565 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 26 Aug 2009 12:10:19 +0200
Subject: x86/amd-iommu: Remove some merge helper code

This patch removes some left-overs which where put into the code to
simplify merging code which also depends on changes in other trees.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 6c99f503780..70fdef54e06 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -41,9 +41,7 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock);
 static LIST_HEAD(iommu_pd_list);
 static DEFINE_SPINLOCK(iommu_pd_list_lock);
 
-#ifdef CONFIG_IOMMU_API
 static struct iommu_ops amd_iommu_ops;
-#endif
 
 /*
  * general struct to manage commands send to an IOMMU
@@ -62,10 +60,6 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
 				      unsigned long start_page,
 				      unsigned int pages);
 
-#ifndef BUS_NOTIFY_UNBOUND_DRIVER
-#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005
-#endif
-
 #ifdef CONFIG_AMD_IOMMU_STATS
 
 /*
-- 
cgit v1.2.3


From 4c6f40d4e0f0bba77a5f27eec4e1c6d1c457d324 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 1 Sep 2009 16:43:58 +0200
Subject: x86/amd-iommu: replace "AMD IOMMU" by "AMD-Vi"

This patch replaces the "AMD IOMMU" printk strings with the
official name for the hardware: "AMD-Vi".

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h |  2 +-
 arch/x86/kernel/amd_iommu.c            |  2 +-
 arch/x86/kernel/amd_iommu_init.c       | 12 ++++++------
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 0c878caaa0a..106e1305ef8 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -198,7 +198,7 @@ extern bool amd_iommu_dump;
 #define DUMP_printk(format, arg...)					\
 	do {								\
 		if (amd_iommu_dump)						\
-			printk(KERN_INFO "AMD IOMMU: " format, ## arg);	\
+			printk(KERN_INFO "AMD-Vi: " format, ## arg);	\
 	} while(0);
 
 /*
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 70fdef54e06..3e62d783652 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -141,7 +141,7 @@ static void iommu_print_event(void *__evt)
 	int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
 	u64 address = (u64)(((u64)event[3]) << 32) | event[2];
 
-	printk(KERN_ERR "AMD IOMMU: Event logged [");
+	printk(KERN_ERR "AMD-Vi: Event logged [");
 
 	switch (type) {
 	case EVENT_TYPE_ILL_DEV:
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c1b17e97252..169958ad624 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -252,7 +252,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
 /* Function to enable the hardware */
 static void iommu_enable(struct amd_iommu *iommu)
 {
-	printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
+	printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx\n",
 	       dev_name(&iommu->dev->dev), iommu->cap_ptr);
 
 	iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
@@ -902,7 +902,7 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu)
 
 	r = request_irq(iommu->dev->irq, amd_iommu_int_handler,
 			IRQF_SAMPLE_RANDOM,
-			"AMD IOMMU",
+			"AMD-Vi",
 			NULL);
 
 	if (r) {
@@ -1150,7 +1150,7 @@ int __init amd_iommu_init(void)
 
 
 	if (no_iommu) {
-		printk(KERN_INFO "AMD IOMMU disabled by kernel command line\n");
+		printk(KERN_INFO "AMD-Vi disabled by kernel command line\n");
 		return 0;
 	}
 
@@ -1248,16 +1248,16 @@ int __init amd_iommu_init(void)
 
 	enable_iommus();
 
-	printk(KERN_INFO "AMD IOMMU: device isolation ");
+	printk(KERN_INFO "AMD-Vi: device isolation ");
 	if (amd_iommu_isolate)
 		printk("enabled\n");
 	else
 		printk("disabled\n");
 
 	if (amd_iommu_unmap_flush)
-		printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n");
+		printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n");
 	else
-		printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n");
+		printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n");
 
 out:
 	return ret;
-- 
cgit v1.2.3


From ae908c22aa2b9f7d4b41bd02d14e473f79c22dd3 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 1 Sep 2009 16:52:16 +0200
Subject: x86/amd-iommu: Remove redundant 'IOMMU' string

The 'IOMMU: ' prefix is not necessary because the
DUMP_printk macro already prints its own prefix.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 169958ad624..264b3ef9dd6 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -858,7 +858,7 @@ static int __init init_iommu_all(struct acpi_table_header *table)
 		switch (*p) {
 		case ACPI_IVHD_TYPE:
 
-			DUMP_printk("IOMMU: device: %02x:%02x.%01x cap: %04x "
+			DUMP_printk("device: %02x:%02x.%01x cap: %04x "
 				    "seg: %d flags: %01x info %04x\n",
 				    PCI_BUS(h->devid), PCI_SLOT(h->devid),
 				    PCI_FUNC(h->devid), h->cap_ptr,
-- 
cgit v1.2.3


From e0faf54ee82bf9c07f0307b4391caad4020bd659 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 3 Sep 2009 15:45:51 +0200
Subject: x86/amd-iommu: fix broken check in amd_iommu_flush_all_devices

The amd_iommu_pd_table is indexed by protection domain
number and not by device id. So this check is broken and
must be removed.

Cc: stable@kernel.org
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 3e62d783652..009d722af00 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -479,8 +479,6 @@ void amd_iommu_flush_all_devices(void)
 	int i;
 
 	for (i = 0; i <= amd_iommu_last_bdf; ++i) {
-		if (amd_iommu_pd_table[i] == NULL)
-			continue;
 
 		iommu = amd_iommu_rlookup_table[i];
 		if (!iommu)
-- 
cgit v1.2.3


From d586d7852ccd0cecb502bf4809f827e60c486af0 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 3 Sep 2009 15:39:23 +0200
Subject: x86/amd-iommu: Add function to flush all DTEs on one IOMMU

This function flushes all DTE entries on one IOMMU for all
devices behind this IOMMU. This is required for command
buffer resetting later.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 64cc582feb9..2dc093370d2 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -516,6 +516,19 @@ void amd_iommu_flush_all_domains(void)
 		flush_all_domains_on_iommu(iommu);
 }
 
+static void flush_all_devices_for_iommu(struct amd_iommu *iommu)
+{
+	int i;
+
+	for (i = 0; i <= amd_iommu_last_bdf; ++i) {
+		if (iommu != amd_iommu_rlookup_table[i])
+			continue;
+
+		iommu_queue_inv_dev_entry(iommu, i);
+		iommu_completion_wait(iommu);
+	}
+}
+
 void amd_iommu_flush_all_devices(void)
 {
 	struct amd_iommu *iommu;
-- 
cgit v1.2.3


From 93f1cc67cf3196174412adca87321b25c1c986b0 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 3 Sep 2009 14:50:20 +0200
Subject: x86/amd-iommu: Add reset function for command buffers

This patch factors parts of the command buffer
initialization code into a seperate function which can be
used to reset the command buffer later.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h |  3 +++
 arch/x86/kernel/amd_iommu_init.c       | 20 +++++++++++++++-----
 2 files changed, 18 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 0c878caaa0a..c54bc979dc1 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -457,4 +457,7 @@ static inline void amd_iommu_stats_init(void) { }
 
 #endif /* CONFIG_AMD_IOMMU_STATS */
 
+/* some function prototypes */
+extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
+
 #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c1b17e97252..1752afef948 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -434,6 +434,20 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
 	return cmd_buf;
 }
 
+/*
+ * This function resets the command buffer if the IOMMU stopped fetching
+ * commands from it.
+ */
+void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
+{
+	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
+
+	writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
+	writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+
+	iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
+}
+
 /*
  * This function writes the command buffer address to the hardware and
  * enables it.
@@ -450,11 +464,7 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu)
 	memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
 		    &entry, sizeof(entry));
 
-	/* set head and tail to zero manually */
-	writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
-	writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
-
-	iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
+	amd_iommu_reset_cmd_buffer(iommu);
 }
 
 static void __init free_command_buffer(struct amd_iommu *iommu)
-- 
cgit v1.2.3


From a345b23b79f1900e7d87c3165182504419180de4 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 3 Sep 2009 15:01:43 +0200
Subject: x86/amd-iommu: Reset command buffer on ILLEGAL_COMMAND_ERROR

On an ILLEGAL_COMMAND_ERROR the IOMMU stops executing
further commands. This patch changes the code to handle this
case better by resetting the command buffer in the IOMMU.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 2dc093370d2..2333d615f5e 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -61,6 +61,7 @@ static u64* alloc_pte(struct protection_domain *dom,
 static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
 				      unsigned long start_page,
 				      unsigned int pages);
+static void reset_iommu_command_buffer(struct amd_iommu *iommu);
 
 #ifndef BUS_NOTIFY_UNBOUND_DRIVER
 #define BUS_NOTIFY_UNBOUND_DRIVER 0x0005
@@ -156,7 +157,7 @@ static void dump_command(unsigned long phys_addr)
 		pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
 }
 
-static void iommu_print_event(void *__evt)
+static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
 {
 	u32 *event = __evt;
 	int type  = (event[1] >> EVENT_TYPE_SHIFT)  & EVENT_TYPE_MASK;
@@ -195,6 +196,7 @@ static void iommu_print_event(void *__evt)
 		break;
 	case EVENT_TYPE_ILL_CMD:
 		printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
+		reset_iommu_command_buffer(iommu);
 		dump_command(address);
 		break;
 	case EVENT_TYPE_CMD_HARD_ERR:
@@ -229,7 +231,7 @@ static void iommu_poll_events(struct amd_iommu *iommu)
 	tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
 
 	while (head != tail) {
-		iommu_print_event(iommu->evt_buf + head);
+		iommu_print_event(iommu, iommu->evt_buf + head);
 		head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
 	}
 
@@ -547,6 +549,15 @@ void amd_iommu_flush_all_devices(void)
 	}
 }
 
+static void reset_iommu_command_buffer(struct amd_iommu *iommu)
+{
+	pr_err("AMD-Vi: Resetting IOMMU command buffer\n");
+
+	amd_iommu_reset_cmd_buffer(iommu);
+	flush_all_devices_for_iommu(iommu);
+	flush_all_domains_on_iommu(iommu);
+}
+
 /****************************************************************************
  *
  * The functions below are used the create the page table mappings for
-- 
cgit v1.2.3


From b26e81b871bd18184968f0bb3f12945906eadfce Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 3 Sep 2009 15:08:09 +0200
Subject: x86/amd-iommu: Panic if IOMMU command buffer reset fails

To prevent the driver from doing recursive command buffer
resets, just panic when that recursion happens.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h | 3 +++
 arch/x86/kernel/amd_iommu.c            | 7 +++++++
 2 files changed, 10 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index c54bc979dc1..e676746a4a3 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -337,6 +337,9 @@ struct amd_iommu {
 	/* if one, we need to send a completion wait command */
 	bool need_sync;
 
+	/* becomes true if a command buffer reset is running */
+	bool reset_in_progress;
+
 	/* default dma_ops domain for that IOMMU */
 	struct dma_ops_domain *default_dom;
 };
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 2333d615f5e..b62a2f64dfc 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -553,9 +553,16 @@ static void reset_iommu_command_buffer(struct amd_iommu *iommu)
 {
 	pr_err("AMD-Vi: Resetting IOMMU command buffer\n");
 
+	if (iommu->reset_in_progress)
+		panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n");
+
+	iommu->reset_in_progress = true;
+
 	amd_iommu_reset_cmd_buffer(iommu);
 	flush_all_devices_for_iommu(iommu);
 	flush_all_domains_on_iommu(iommu);
+
+	iommu->reset_in_progress = false;
 }
 
 /****************************************************************************
-- 
cgit v1.2.3


From 6a1eddd2f951656a6abbd42e2cddc2267c4a639d Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 3 Sep 2009 15:15:10 +0200
Subject: x86/amd-iommu: Reset command buffer if wait loop fails

Instead of a panic on an comletion wait loop failure, try to
recover from that event from resetting the command buffer.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index b62a2f64dfc..cfca80bfe75 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -318,8 +318,11 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu)
 	status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
 	writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
 
-	if (unlikely(i == EXIT_LOOP_COUNT))
-		panic("AMD IOMMU: Completion wait loop failed\n");
+	if (unlikely(i == EXIT_LOOP_COUNT)) {
+		spin_unlock(&iommu->lock);
+		reset_iommu_command_buffer(iommu);
+		spin_lock(&iommu->lock);
+	}
 }
 
 /*
-- 
cgit v1.2.3


From 9355a08186e52b7c120adea91c984923b54efa10 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 2 Sep 2009 14:24:08 +0200
Subject: x86/amd-iommu: Make fetch_pte aware of dynamic mapping levels

This patch changes the fetch_pte function in the AMD IOMMU
driver to support dynamic mapping levels.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h |  9 +++++++++
 arch/x86/kernel/amd_iommu.c            | 24 +++++++++++++-----------
 2 files changed, 22 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 0c878caaa0a..7fce4ef77bd 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -146,12 +146,21 @@
 #define PAGE_MODE_1_LEVEL 0x01
 #define PAGE_MODE_2_LEVEL 0x02
 #define PAGE_MODE_3_LEVEL 0x03
+#define PAGE_MODE_4_LEVEL 0x04
+#define PAGE_MODE_5_LEVEL 0x05
+#define PAGE_MODE_6_LEVEL 0x06
 
 #define IOMMU_PDE_NL_0   0x000ULL
 #define IOMMU_PDE_NL_1   0x200ULL
 #define IOMMU_PDE_NL_2   0x400ULL
 #define IOMMU_PDE_NL_3   0x600ULL
 
+#define PM_LEVEL_SHIFT(x)	(12 + ((x) * 9))
+#define PM_LEVEL_SIZE(x)	(((x) < 6) ? \
+				  ((1ULL << PM_LEVEL_SHIFT((x))) - 1): \
+				   (0xffffffffffffffffULL))
+#define PM_LEVEL_INDEX(x, a)	(((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL)
+
 #define IOMMU_PTE_L2_INDEX(address) (((address) >> 30) & 0x1ffULL)
 #define IOMMU_PTE_L1_INDEX(address) (((address) >> 21) & 0x1ffULL)
 #define IOMMU_PTE_L0_INDEX(address) (((address) >> 12) & 0x1ffULL)
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 6c99f503780..29bcd358b6c 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -61,6 +61,8 @@ static u64* alloc_pte(struct protection_domain *dom,
 static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
 				      unsigned long start_page,
 				      unsigned int pages);
+static u64 *fetch_pte(struct protection_domain *domain,
+		      unsigned long address);
 
 #ifndef BUS_NOTIFY_UNBOUND_DRIVER
 #define BUS_NOTIFY_UNBOUND_DRIVER 0x0005
@@ -670,24 +672,24 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
  * This function checks if there is a PTE for a given dma address. If
  * there is one, it returns the pointer to it.
  */
-static u64* fetch_pte(struct protection_domain *domain,
+static u64 *fetch_pte(struct protection_domain *domain,
 		      unsigned long address)
 {
+	int level;
 	u64 *pte;
 
-	pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)];
+	level =  domain->mode - 1;
+	pte   = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
 
-	if (!IOMMU_PTE_PRESENT(*pte))
-		return NULL;
+	while (level > 0) {
+		if (!IOMMU_PTE_PRESENT(*pte))
+			return NULL;
 
-	pte = IOMMU_PTE_PAGE(*pte);
-	pte = &pte[IOMMU_PTE_L1_INDEX(address)];
+		level -= 1;
 
-	if (!IOMMU_PTE_PRESENT(*pte))
-		return NULL;
-
-	pte = IOMMU_PTE_PAGE(*pte);
-	pte = &pte[IOMMU_PTE_L0_INDEX(address)];
+		pte = IOMMU_PTE_PAGE(*pte);
+		pte = &pte[PM_LEVEL_INDEX(level, address)];
+	}
 
 	return pte;
 }
-- 
cgit v1.2.3


From 38a76eeeafb251bf67d143a34b37a8105cff302e Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 2 Sep 2009 17:02:47 +0200
Subject: x86/amd-iommu: Use fetch_pte in iommu_unmap_page

Instead of reimplementing existing logic use fetch_pte to
walk the page table in iommu_unmap_page.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 29bcd358b6c..5e527989999 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -546,23 +546,10 @@ static int iommu_map_page(struct protection_domain *dom,
 static void iommu_unmap_page(struct protection_domain *dom,
 			     unsigned long bus_addr)
 {
-	u64 *pte;
-
-	pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
-
-	if (!IOMMU_PTE_PRESENT(*pte))
-		return;
-
-	pte = IOMMU_PTE_PAGE(*pte);
-	pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
-
-	if (!IOMMU_PTE_PRESENT(*pte))
-		return;
-
-	pte = IOMMU_PTE_PAGE(*pte);
-	pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
+	u64 *pte = fetch_pte(dom, bus_addr);
 
-	*pte = 0;
+	if (pte)
+		*pte = 0;
 }
 
 /*
-- 
cgit v1.2.3


From a6d41a4027b758a9473197a78fab45afb31003aa Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 2 Sep 2009 17:08:55 +0200
Subject: x86/amd-iommu: Use fetch_pte in amd_iommu_iova_to_phys

Don't reimplement the page table walker in this function.
Use the generic one.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 5e527989999..4a54366d422 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -2140,21 +2140,9 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
 	phys_addr_t paddr;
 	u64 *pte;
 
-	pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)];
+	pte = fetch_pte(domain, iova);
 
-	if (!IOMMU_PTE_PRESENT(*pte))
-		return 0;
-
-	pte = IOMMU_PTE_PAGE(*pte);
-	pte = &pte[IOMMU_PTE_L1_INDEX(iova)];
-
-	if (!IOMMU_PTE_PRESENT(*pte))
-		return 0;
-
-	pte = IOMMU_PTE_PAGE(*pte);
-	pte = &pte[IOMMU_PTE_L0_INDEX(iova)];
-
-	if (!IOMMU_PTE_PRESENT(*pte))
+	if (!pte || !IOMMU_PTE_PRESENT(*pte))
 		return 0;
 
 	paddr  = *pte & IOMMU_PAGE_MASK;
-- 
cgit v1.2.3


From 6a0dbcbe4e612fbc9d73cd4dde8ebef19295058a Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 2 Sep 2009 15:41:59 +0200
Subject: x86/amd-iommu: Add a gneric version of amd_iommu_flush_all_devices

This patch adds a generic variant of
amd_iommu_flush_all_devices function which flushes only the
DTEs for a given protection domain.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 4a54366d422..5265dd17eb5 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -481,13 +481,14 @@ void amd_iommu_flush_all_domains(void)
 	}
 }
 
-void amd_iommu_flush_all_devices(void)
+static void flush_devices_by_domain(struct protection_domain *domain)
 {
 	struct amd_iommu *iommu;
 	int i;
 
 	for (i = 0; i <= amd_iommu_last_bdf; ++i) {
-		if (amd_iommu_pd_table[i] == NULL)
+		if ((domain == NULL && amd_iommu_pd_table[i] == NULL) ||
+		    (amd_iommu_pd_table[i] != domain))
 			continue;
 
 		iommu = amd_iommu_rlookup_table[i];
@@ -499,6 +500,11 @@ void amd_iommu_flush_all_devices(void)
 	}
 }
 
+void amd_iommu_flush_all_devices(void)
+{
+	flush_devices_by_domain(NULL);
+}
+
 /****************************************************************************
  *
  * The functions below are used the create the page table mappings for
-- 
cgit v1.2.3


From 407d733e30a97daf5ea6f9eb5f9ebbd42a0a9ef2 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 2 Sep 2009 16:07:00 +0200
Subject: x86/amd-iommu: Introduce set_dte_entry function

This function factors out some logic of attach_device to a
seperate function. This new function will be used to update
device table entries when necessary.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 5265dd17eb5..0fab1f1d135 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1058,18 +1058,10 @@ static struct protection_domain *domain_for_device(u16 devid)
 	return dom;
 }
 
-/*
- * If a device is not yet associated with a domain, this function does
- * assigns it visible for the hardware
- */
-static void attach_device(struct amd_iommu *iommu,
-			  struct protection_domain *domain,
-			  u16 devid)
+static void set_dte_entry(u16 devid, struct protection_domain *domain)
 {
-	unsigned long flags;
 	u64 pte_root = virt_to_phys(domain->pt_root);
-
-	domain->dev_cnt += 1;
+	unsigned long flags;
 
 	pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
 		    << DEV_ENTRY_MODE_SHIFT;
@@ -1082,6 +1074,21 @@ static void attach_device(struct amd_iommu *iommu,
 
 	amd_iommu_pd_table[devid] = domain;
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+}
+
+/*
+ * If a device is not yet associated with a domain, this function does
+ * assigns it visible for the hardware
+ */
+static void attach_device(struct amd_iommu *iommu,
+			  struct protection_domain *domain,
+			  u16 devid)
+{
+	/* set the DTE entry */
+	set_dte_entry(devid, domain);
+
+	/* increase reference counter */
+	domain->dev_cnt += 1;
 
        /*
         * We might boot into a crash-kernel here. The crashed kernel
-- 
cgit v1.2.3


From 04bfdd8406099fca2e6b8844748c4d6c5eba8c8d Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 2 Sep 2009 16:00:23 +0200
Subject: x86/amd-iommu: Flush domains if address space size was increased

Thist patch introduces the update_domain function which
propagates the larger address space of a protection domain
to the device table and flushes all relevant DTEs and the
domain TLB.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h |  1 +
 arch/x86/kernel/amd_iommu.c            | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 7fce4ef77bd..97f3d09d3be 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -235,6 +235,7 @@ struct protection_domain {
 	int mode;		/* paging mode (0-6 levels) */
 	u64 *pt_root;		/* page table root pointer */
 	unsigned long flags;	/* flags to find out type of domain */
+	bool updated;		/* complete domain flush required */
 	unsigned dev_cnt;	/* devices assigned to this domain */
 	void *priv;		/* private data */
 };
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 0fab1f1d135..5eab6a84b9c 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -63,6 +63,7 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
 				      unsigned int pages);
 static u64 *fetch_pte(struct protection_domain *domain,
 		      unsigned long address);
+static void update_domain(struct protection_domain *domain);
 
 #ifndef BUS_NOTIFY_UNBOUND_DRIVER
 #define BUS_NOTIFY_UNBOUND_DRIVER 0x0005
@@ -546,6 +547,8 @@ static int iommu_map_page(struct protection_domain *dom,
 
 	*pte = __pte;
 
+	update_domain(dom);
+
 	return 0;
 }
 
@@ -762,9 +765,13 @@ static int alloc_new_range(struct amd_iommu *iommu,
 		dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1);
 	}
 
+	update_domain(&dma_dom->domain);
+
 	return 0;
 
 out_free:
+	update_domain(&dma_dom->domain);
+
 	free_page((unsigned long)dma_dom->aperture[index]->bitmap);
 
 	kfree(dma_dom->aperture[index]);
@@ -1294,6 +1301,29 @@ static int get_device_resources(struct device *dev,
 	return 1;
 }
 
+static void update_device_table(struct protection_domain *domain)
+{
+	int i;
+
+	for (i = 0; i <= amd_iommu_last_bdf; ++i) {
+		if (amd_iommu_pd_table[i] != domain)
+			continue;
+		set_dte_entry(i, domain);
+	}
+}
+
+static void update_domain(struct protection_domain *domain)
+{
+	if (!domain->updated)
+		return;
+
+	update_device_table(domain);
+	flush_devices_by_domain(domain);
+	iommu_flush_domain(domain->id);
+
+	domain->updated = false;
+}
+
 /*
  * If the pte_page is not yet allocated this function is called
  */
@@ -1351,6 +1381,8 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
 	} else
 		pte += IOMMU_PTE_L0_INDEX(address);
 
+	update_domain(&dom->domain);
+
 	return pte;
 }
 
-- 
cgit v1.2.3


From 50020fb6324465e478d6c8cdbf3c695f0a60358d Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 2 Sep 2009 15:38:40 +0200
Subject: x86/amd-iommu: Introduce increase_address_space function

This function will be used to increase the address space
size of a protection domain.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h |  4 ++++
 arch/x86/kernel/amd_iommu.c            | 27 +++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 97f3d09d3be..1b4b3d6c9f0 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -160,6 +160,10 @@
 				  ((1ULL << PM_LEVEL_SHIFT((x))) - 1): \
 				   (0xffffffffffffffffULL))
 #define PM_LEVEL_INDEX(x, a)	(((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL)
+#define PM_LEVEL_ENC(x)		(((x) << 9) & 0xe00ULL)
+#define PM_LEVEL_PDE(x, a)	((a) | PM_LEVEL_ENC((x)) | \
+				 IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
+
 
 #define IOMMU_PTE_L2_INDEX(address) (((address) >> 30) & 0x1ffULL)
 #define IOMMU_PTE_L1_INDEX(address) (((address) >> 21) & 0x1ffULL)
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 5eab6a84b9c..fc97b51f028 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1324,6 +1324,33 @@ static void update_domain(struct protection_domain *domain)
 	domain->updated = false;
 }
 
+/*
+ * This function is used to add another level to an IO page table. Adding
+ * another level increases the size of the address space by 9 bits to a size up
+ * to 64 bits.
+ */
+static bool increase_address_space(struct protection_domain *domain,
+				   gfp_t gfp)
+{
+	u64 *pte;
+
+	if (domain->mode == PAGE_MODE_6_LEVEL)
+		/* address space already 64 bit large */
+		return false;
+
+	pte = (void *)get_zeroed_page(gfp);
+	if (!pte)
+		return false;
+
+	*pte             = PM_LEVEL_PDE(domain->mode,
+					virt_to_phys(domain->pt_root));
+	domain->pt_root  = pte;
+	domain->mode    += 1;
+	domain->updated  = true;
+
+	return true;
+}
+
 /*
  * If the pte_page is not yet allocated this function is called
  */
-- 
cgit v1.2.3


From 8bc3e127421bf3b735edbde05135892c12c5f615 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 2 Sep 2009 16:48:40 +0200
Subject: x86/amd-iommu: Change alloc_pte to support 64 bit address space

This patch changes the alloc_pte function to be able to map
pages into the whole 64 bit address space supported by AMD
IOMMU hardware from the old limit of 2**39 bytes.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 44 ++++++++++++++++++++------------------------
 1 file changed, 20 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index fc97b51f028..3be2b61fc31 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -55,7 +55,7 @@ struct iommu_cmd {
 static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
 			     struct unity_map_entry *e);
 static struct dma_ops_domain *find_protection_domain(u16 devid);
-static u64* alloc_pte(struct protection_domain *dom,
+static u64 *alloc_pte(struct protection_domain *domain,
 		      unsigned long address, u64
 		      **pte_page, gfp_t gfp);
 static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
@@ -1351,39 +1351,35 @@ static bool increase_address_space(struct protection_domain *domain,
 	return true;
 }
 
-/*
- * If the pte_page is not yet allocated this function is called
- */
-static u64* alloc_pte(struct protection_domain *dom,
+static u64 *alloc_pte(struct protection_domain *domain,
 		      unsigned long address, u64 **pte_page, gfp_t gfp)
 {
 	u64 *pte, *page;
+	int level;
 
-	pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(address)];
+	while (address > PM_LEVEL_SIZE(domain->mode))
+		increase_address_space(domain, gfp);
 
-	if (!IOMMU_PTE_PRESENT(*pte)) {
-		page = (u64 *)get_zeroed_page(gfp);
-		if (!page)
-			return NULL;
-		*pte = IOMMU_L2_PDE(virt_to_phys(page));
-	}
+	level =  domain->mode - 1;
+	pte   = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
 
-	pte = IOMMU_PTE_PAGE(*pte);
-	pte = &pte[IOMMU_PTE_L1_INDEX(address)];
+	while (level > 0) {
+		if (!IOMMU_PTE_PRESENT(*pte)) {
+			page = (u64 *)get_zeroed_page(gfp);
+			if (!page)
+				return NULL;
+			*pte = PM_LEVEL_PDE(level, virt_to_phys(page));
+		}
 
-	if (!IOMMU_PTE_PRESENT(*pte)) {
-		page = (u64 *)get_zeroed_page(gfp);
-		if (!page)
-			return NULL;
-		*pte = IOMMU_L1_PDE(virt_to_phys(page));
-	}
+		level -= 1;
 
-	pte = IOMMU_PTE_PAGE(*pte);
+		pte = IOMMU_PTE_PAGE(*pte);
 
-	if (pte_page)
-		*pte_page = pte;
+		if (pte_page && level == 0)
+			*pte_page = pte;
 
-	pte = &pte[IOMMU_PTE_L0_INDEX(address)];
+		pte = &pte[PM_LEVEL_INDEX(level, address)];
+	}
 
 	return pte;
 }
-- 
cgit v1.2.3


From 8c8c143cdc95ebe50fd962917556e25e8912997b Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 2 Sep 2009 17:30:00 +0200
Subject: x86/amd-iommu: Remove last usages of IOMMU_PTE_L0_INDEX

This change allows to remove these old macros later.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 3be2b61fc31..ebc1c844392 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1402,7 +1402,7 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
 		pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC);
 		aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
 	} else
-		pte += IOMMU_PTE_L0_INDEX(address);
+		pte += PM_LEVEL_INDEX(0, address);
 
 	update_domain(&dom->domain);
 
@@ -1466,7 +1466,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
 	if (!pte)
 		return;
 
-	pte += IOMMU_PTE_L0_INDEX(address);
+	pte += PM_LEVEL_INDEX(0, address);
 
 	WARN_ON(!*pte);
 
-- 
cgit v1.2.3


From bad1cac28a707c69301a4d0612da9ccbecd51953 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 2 Sep 2009 16:52:23 +0200
Subject: x86/amd-iommu: Remove bus_addr check in iommu_map_page

The driver now supports full 64 bit device address spaces.
So this check is not longer required.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index ebc1c844392..6ffb3e63765 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -530,8 +530,7 @@ static int iommu_map_page(struct protection_domain *dom,
 	bus_addr  = PAGE_ALIGN(bus_addr);
 	phys_addr = PAGE_ALIGN(phys_addr);
 
-	/* only support 512GB address spaces for now */
-	if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
+	if (!(prot & IOMMU_PROT_MASK))
 		return -EINVAL;
 
 	pte = alloc_pte(dom, bus_addr, NULL, GFP_KERNEL);
-- 
cgit v1.2.3


From 8f7a017ce05ed4522809448e169daa44fe6edeb1 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 2 Sep 2009 16:55:24 +0200
Subject: x86/amd-iommu: Use 2-level page tables for dma_ops domains

The driver now supports a dynamic number of levels for IO
page tables. This allows to reduce the number of levels for
dma_ops domains by one because a dma_ops domain has usually
an address space size between 128MB and 4G.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 6ffb3e63765..addf6588c36 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1010,7 +1010,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
 	dma_dom->domain.id = domain_id_alloc();
 	if (dma_dom->domain.id == 0)
 		goto free_dma_dom;
-	dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
+	dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
 	dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
 	dma_dom->domain.flags = PD_DMA_OPS_MASK;
 	dma_dom->domain.priv = dma_dom;
-- 
cgit v1.2.3


From 674d798a80cb6ea1defa01899099f40d9124423c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 2 Sep 2009 17:26:09 +0200
Subject: x86/amd-iommu: Remove old page table handling macros

These macros are not longer required. So remove them.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h | 19 -------------------
 1 file changed, 19 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 1b4b3d6c9f0..d66430de5f7 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -150,11 +150,6 @@
 #define PAGE_MODE_5_LEVEL 0x05
 #define PAGE_MODE_6_LEVEL 0x06
 
-#define IOMMU_PDE_NL_0   0x000ULL
-#define IOMMU_PDE_NL_1   0x200ULL
-#define IOMMU_PDE_NL_2   0x400ULL
-#define IOMMU_PDE_NL_3   0x600ULL
-
 #define PM_LEVEL_SHIFT(x)	(12 + ((x) * 9))
 #define PM_LEVEL_SIZE(x)	(((x) < 6) ? \
 				  ((1ULL << PM_LEVEL_SHIFT((x))) - 1): \
@@ -164,15 +159,6 @@
 #define PM_LEVEL_PDE(x, a)	((a) | PM_LEVEL_ENC((x)) | \
 				 IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
 
-
-#define IOMMU_PTE_L2_INDEX(address) (((address) >> 30) & 0x1ffULL)
-#define IOMMU_PTE_L1_INDEX(address) (((address) >> 21) & 0x1ffULL)
-#define IOMMU_PTE_L0_INDEX(address) (((address) >> 12) & 0x1ffULL)
-
-#define IOMMU_MAP_SIZE_L1 (1ULL << 21)
-#define IOMMU_MAP_SIZE_L2 (1ULL << 30)
-#define IOMMU_MAP_SIZE_L3 (1ULL << 39)
-
 #define IOMMU_PTE_P  (1ULL << 0)
 #define IOMMU_PTE_TV (1ULL << 1)
 #define IOMMU_PTE_U  (1ULL << 59)
@@ -180,11 +166,6 @@
 #define IOMMU_PTE_IR (1ULL << 61)
 #define IOMMU_PTE_IW (1ULL << 62)
 
-#define IOMMU_L1_PDE(address) \
-	((address) | IOMMU_PDE_NL_1 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
-#define IOMMU_L2_PDE(address) \
-	((address) | IOMMU_PDE_NL_2 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
-
 #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
 #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
 #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK))
-- 
cgit v1.2.3


From a6b256b41357c33ccb2d105a4457e22bdc83e021 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 3 Sep 2009 12:21:31 +0200
Subject: x86/amd-iommu: Support higher level PTEs in iommu_page_unmap

This patch changes fetch_pte and iommu_page_unmap to support
different page sizes too.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h |  1 +
 arch/x86/kernel/amd_iommu.c            | 21 +++++++++++++--------
 2 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index d66430de5f7..351ca39ece0 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -158,6 +158,7 @@
 #define PM_LEVEL_ENC(x)		(((x) << 9) & 0xe00ULL)
 #define PM_LEVEL_PDE(x, a)	((a) | PM_LEVEL_ENC((x)) | \
 				 IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
+#define PM_PTE_LEVEL(pte)	(((pte) >> 9) & 0x7ULL)
 
 #define IOMMU_PTE_P  (1ULL << 0)
 #define IOMMU_PTE_TV (1ULL << 1)
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index addf6588c36..002cf9cab9e 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -62,7 +62,7 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
 				      unsigned long start_page,
 				      unsigned int pages);
 static u64 *fetch_pte(struct protection_domain *domain,
-		      unsigned long address);
+		      unsigned long address, int map_size);
 static void update_domain(struct protection_domain *domain);
 
 #ifndef BUS_NOTIFY_UNBOUND_DRIVER
@@ -552,9 +552,9 @@ static int iommu_map_page(struct protection_domain *dom,
 }
 
 static void iommu_unmap_page(struct protection_domain *dom,
-			     unsigned long bus_addr)
+			     unsigned long bus_addr, int map_size)
 {
-	u64 *pte = fetch_pte(dom, bus_addr);
+	u64 *pte = fetch_pte(dom, bus_addr, map_size);
 
 	if (pte)
 		*pte = 0;
@@ -668,7 +668,7 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
  * there is one, it returns the pointer to it.
  */
 static u64 *fetch_pte(struct protection_domain *domain,
-		      unsigned long address)
+		      unsigned long address, int map_size)
 {
 	int level;
 	u64 *pte;
@@ -676,7 +676,7 @@ static u64 *fetch_pte(struct protection_domain *domain,
 	level =  domain->mode - 1;
 	pte   = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
 
-	while (level > 0) {
+	while (level > map_size) {
 		if (!IOMMU_PTE_PRESENT(*pte))
 			return NULL;
 
@@ -684,6 +684,11 @@ static u64 *fetch_pte(struct protection_domain *domain,
 
 		pte = IOMMU_PTE_PAGE(*pte);
 		pte = &pte[PM_LEVEL_INDEX(level, address)];
+
+		if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) {
+			pte = NULL;
+			break;
+		}
 	}
 
 	return pte;
@@ -757,7 +762,7 @@ static int alloc_new_range(struct amd_iommu *iommu,
 	for (i = dma_dom->aperture[index]->offset;
 	     i < dma_dom->aperture_size;
 	     i += PAGE_SIZE) {
-		u64 *pte = fetch_pte(&dma_dom->domain, i);
+		u64 *pte = fetch_pte(&dma_dom->domain, i, PM_MAP_4k);
 		if (!pte || !IOMMU_PTE_PRESENT(*pte))
 			continue;
 
@@ -2192,7 +2197,7 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom,
 	iova  &= PAGE_MASK;
 
 	for (i = 0; i < npages; ++i) {
-		iommu_unmap_page(domain, iova);
+		iommu_unmap_page(domain, iova, PM_MAP_4k);
 		iova  += PAGE_SIZE;
 	}
 
@@ -2207,7 +2212,7 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
 	phys_addr_t paddr;
 	u64 *pte;
 
-	pte = fetch_pte(domain, iova);
+	pte = fetch_pte(domain, iova, PM_MAP_4k);
 
 	if (!pte || !IOMMU_PTE_PRESENT(*pte))
 		return 0;
-- 
cgit v1.2.3


From abdc5eb3d69279039ba6cb89719913d08013ab14 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 3 Sep 2009 11:33:51 +0200
Subject: x86/amd-iommu: Change iommu_map_page to support multiple page sizes

This patch adds a map_size parameter to the iommu_map_page
function which makes it generic enough to handle multiple
page sizes. This also requires a change to alloc_pte which
is also done in this patch.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h |  6 ++++++
 arch/x86/kernel/amd_iommu.c            | 31 ++++++++++++++++++++-----------
 2 files changed, 26 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 351ca39ece0..9e8bb9746dc 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -160,6 +160,12 @@
 				 IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
 #define PM_PTE_LEVEL(pte)	(((pte) >> 9) & 0x7ULL)
 
+#define PM_MAP_4k		0
+#define PM_ADDR_MASK		0x000ffffffffff000ULL
+#define PM_MAP_MASK(lvl)	(PM_ADDR_MASK & \
+				(~((1ULL << (12 + ((lvl) * 9))) - 1)))
+#define PM_ALIGNED(lvl, addr)	((PM_MAP_MASK(lvl) & (addr)) == (addr))
+
 #define IOMMU_PTE_P  (1ULL << 0)
 #define IOMMU_PTE_TV (1ULL << 1)
 #define IOMMU_PTE_U  (1ULL << 59)
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 002cf9cab9e..45be9499c97 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -56,8 +56,8 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
 			     struct unity_map_entry *e);
 static struct dma_ops_domain *find_protection_domain(u16 devid);
 static u64 *alloc_pte(struct protection_domain *domain,
-		      unsigned long address, u64
-		      **pte_page, gfp_t gfp);
+		      unsigned long address, int end_lvl,
+		      u64 **pte_page, gfp_t gfp);
 static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
 				      unsigned long start_page,
 				      unsigned int pages);
@@ -523,17 +523,21 @@ void amd_iommu_flush_all_devices(void)
 static int iommu_map_page(struct protection_domain *dom,
 			  unsigned long bus_addr,
 			  unsigned long phys_addr,
-			  int prot)
+			  int prot,
+			  int map_size)
 {
 	u64 __pte, *pte;
 
 	bus_addr  = PAGE_ALIGN(bus_addr);
 	phys_addr = PAGE_ALIGN(phys_addr);
 
+	BUG_ON(!PM_ALIGNED(map_size, bus_addr));
+	BUG_ON(!PM_ALIGNED(map_size, phys_addr));
+
 	if (!(prot & IOMMU_PROT_MASK))
 		return -EINVAL;
 
-	pte = alloc_pte(dom, bus_addr, NULL, GFP_KERNEL);
+	pte = alloc_pte(dom, bus_addr, map_size, NULL, GFP_KERNEL);
 
 	if (IOMMU_PTE_PRESENT(*pte))
 		return -EBUSY;
@@ -612,7 +616,8 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
 
 	for (addr = e->address_start; addr < e->address_end;
 	     addr += PAGE_SIZE) {
-		ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot);
+		ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot,
+				     PM_MAP_4k);
 		if (ret)
 			return ret;
 		/*
@@ -729,7 +734,7 @@ static int alloc_new_range(struct amd_iommu *iommu,
 		u64 *pte, *pte_page;
 
 		for (i = 0; i < num_ptes; ++i) {
-			pte = alloc_pte(&dma_dom->domain, address,
+			pte = alloc_pte(&dma_dom->domain, address, PM_MAP_4k,
 					&pte_page, gfp);
 			if (!pte)
 				goto out_free;
@@ -1356,7 +1361,10 @@ static bool increase_address_space(struct protection_domain *domain,
 }
 
 static u64 *alloc_pte(struct protection_domain *domain,
-		      unsigned long address, u64 **pte_page, gfp_t gfp)
+		      unsigned long address,
+		      int end_lvl,
+		      u64 **pte_page,
+		      gfp_t gfp)
 {
 	u64 *pte, *page;
 	int level;
@@ -1367,7 +1375,7 @@ static u64 *alloc_pte(struct protection_domain *domain,
 	level =  domain->mode - 1;
 	pte   = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
 
-	while (level > 0) {
+	while (level > end_lvl) {
 		if (!IOMMU_PTE_PRESENT(*pte)) {
 			page = (u64 *)get_zeroed_page(gfp);
 			if (!page)
@@ -1379,7 +1387,7 @@ static u64 *alloc_pte(struct protection_domain *domain,
 
 		pte = IOMMU_PTE_PAGE(*pte);
 
-		if (pte_page && level == 0)
+		if (pte_page && level == end_lvl)
 			*pte_page = pte;
 
 		pte = &pte[PM_LEVEL_INDEX(level, address)];
@@ -1403,7 +1411,8 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
 
 	pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
 	if (!pte) {
-		pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC);
+		pte = alloc_pte(&dom->domain, address, PM_MAP_4k, &pte_page,
+				GFP_ATOMIC);
 		aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
 	} else
 		pte += PM_LEVEL_INDEX(0, address);
@@ -2176,7 +2185,7 @@ static int amd_iommu_map_range(struct iommu_domain *dom,
 	paddr &= PAGE_MASK;
 
 	for (i = 0; i < npages; ++i) {
-		ret = iommu_map_page(domain, iova, paddr, prot);
+		ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k);
 		if (ret)
 			return ret;
 
-- 
cgit v1.2.3


From ac0101d396fee24994632f71b55b9f7f9ee16eff Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 1 Sep 2009 16:00:35 +0200
Subject: x86/dma: Mark iommu_pass_through as __read_mostly

This variable is read most of the time. This patch marks it
as such. It also documents the meaning the this variable
while at it.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/pci-dma.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 1a041bcf506..873aa079d16 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -32,7 +32,14 @@ int no_iommu __read_mostly;
 /* Set this to 1 if there is a HW IOMMU in the system */
 int iommu_detected __read_mostly = 0;
 
-int iommu_pass_through;
+/*
+ * This variable becomes 1 if iommu=pt is passed on the kernel command line.
+ * If this variable is 1, IOMMU implementations do no DMA ranslation for
+ * devices and allow every device to access to whole physical memory. This is
+ * useful if a user want to use an IOMMU only for KVM device assignment to
+ * guests and not for driver dma translation.
+ */
+int iommu_pass_through __read_mostly;
 
 dma_addr_t bad_dma_address __read_mostly = 0;
 EXPORT_SYMBOL(bad_dma_address);
-- 
cgit v1.2.3


From 2650815fb03fe2bf1e6701584087ba669dcf92cd Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 26 Aug 2009 16:52:40 +0200
Subject: x86/amd-iommu: Add core functions for pd allocation/freeing

This patch factors some code of protection domain allocation
into seperate functions. This way the logic can be used to
allocate the passthrough domain later. As a side effect this
patch fixes an unlikely domain id leakage bug.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 36 ++++++++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 6c99f503780..0934348abfa 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1988,19 +1988,47 @@ static void cleanup_domain(struct protection_domain *domain)
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 }
 
-static int amd_iommu_domain_init(struct iommu_domain *dom)
+static void protection_domain_free(struct protection_domain *domain)
+{
+	if (!domain)
+		return;
+
+	if (domain->id)
+		domain_id_free(domain->id);
+
+	kfree(domain);
+}
+
+static struct protection_domain *protection_domain_alloc(void)
 {
 	struct protection_domain *domain;
 
 	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
 	if (!domain)
-		return -ENOMEM;
+		return NULL;
 
 	spin_lock_init(&domain->lock);
-	domain->mode = PAGE_MODE_3_LEVEL;
 	domain->id = domain_id_alloc();
 	if (!domain->id)
+		goto out_err;
+
+	return domain;
+
+out_err:
+	kfree(domain);
+
+	return NULL;
+}
+
+static int amd_iommu_domain_init(struct iommu_domain *dom)
+{
+	struct protection_domain *domain;
+
+	domain = protection_domain_alloc();
+	if (!domain)
 		goto out_free;
+
+	domain->mode    = PAGE_MODE_3_LEVEL;
 	domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
 	if (!domain->pt_root)
 		goto out_free;
@@ -2010,7 +2038,7 @@ static int amd_iommu_domain_init(struct iommu_domain *dom)
 	return 0;
 
 out_free:
-	kfree(domain);
+	protection_domain_free(domain);
 
 	return -ENOMEM;
 }
-- 
cgit v1.2.3


From 0feae533ddebe02cda6ccce5cac7349b446776a8 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 26 Aug 2009 15:26:30 +0200
Subject: x86/amd-iommu: Add passthrough mode initialization functions

When iommu=pt is passed on kernel command line the devices
should run untranslated. This requires the allocation of a
special domain for that purpose. This patch implements the
allocation and initialization path for iommu=pt.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu.h       |  1 +
 arch/x86/include/asm/amd_iommu_types.h |  4 ++
 arch/x86/kernel/amd_iommu.c            | 72 ++++++++++++++++++++++++++++++----
 3 files changed, 69 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index bdf96f119f0..ac95995b7ba 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -25,6 +25,7 @@
 #ifdef CONFIG_AMD_IOMMU
 extern int amd_iommu_init(void);
 extern int amd_iommu_init_dma_ops(void);
+extern int amd_iommu_init_passthrough(void);
 extern void amd_iommu_detect(void);
 extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
 extern void amd_iommu_flush_all_domains(void);
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 0c878caaa0a..49f7453bff7 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -143,6 +143,7 @@
 #define EVT_BUFFER_SIZE		8192 /* 512 entries */
 #define EVT_LEN_MASK		(0x9ULL << 56)
 
+#define PAGE_MODE_NONE    0x00
 #define PAGE_MODE_1_LEVEL 0x01
 #define PAGE_MODE_2_LEVEL 0x02
 #define PAGE_MODE_3_LEVEL 0x03
@@ -194,6 +195,9 @@
 #define PD_DMA_OPS_MASK		(1UL << 0) /* domain used for dma_ops */
 #define PD_DEFAULT_MASK		(1UL << 1) /* domain is a default dma_ops
 					      domain for an IOMMU */
+#define PD_PASSTHROUGH_MASK	(1UL << 2) /* domain has no page
+					      translation */
+
 extern bool amd_iommu_dump;
 #define DUMP_printk(format, arg...)					\
 	do {								\
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 0934348abfa..7987f20499a 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -41,6 +41,12 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock);
 static LIST_HEAD(iommu_pd_list);
 static DEFINE_SPINLOCK(iommu_pd_list_lock);
 
+/*
+ * Domain for untranslated devices - only allocated
+ * if iommu=pt passed on kernel cmd line.
+ */
+static struct protection_domain *pt_domain;
+
 #ifdef CONFIG_IOMMU_API
 static struct iommu_ops amd_iommu_ops;
 #endif
@@ -1067,9 +1073,9 @@ static struct protection_domain *domain_for_device(u16 devid)
  * If a device is not yet associated with a domain, this function does
  * assigns it visible for the hardware
  */
-static void attach_device(struct amd_iommu *iommu,
-			  struct protection_domain *domain,
-			  u16 devid)
+static void __attach_device(struct amd_iommu *iommu,
+			    struct protection_domain *domain,
+			    u16 devid)
 {
 	unsigned long flags;
 	u64 pte_root = virt_to_phys(domain->pt_root);
@@ -1087,12 +1093,19 @@ static void attach_device(struct amd_iommu *iommu,
 
 	amd_iommu_pd_table[devid] = domain;
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+}
 
-       /*
-        * We might boot into a crash-kernel here. The crashed kernel
-        * left the caches in the IOMMU dirty. So we have to flush
-        * here to evict all dirty stuff.
-        */
+static void attach_device(struct amd_iommu *iommu,
+			  struct protection_domain *domain,
+			  u16 devid)
+{
+	__attach_device(iommu, domain, devid);
+
+	/*
+	 * We might boot into a crash-kernel here. The crashed kernel
+	 * left the caches in the IOMMU dirty. So we have to flush
+	 * here to evict all dirty stuff.
+	 */
 	iommu_queue_inv_dev_entry(iommu, devid);
 	iommu_flush_tlb_pde(iommu, domain->id);
 }
@@ -2219,3 +2232,46 @@ static struct iommu_ops amd_iommu_ops = {
 	.domain_has_cap = amd_iommu_domain_has_cap,
 };
 
+/*****************************************************************************
+ *
+ * The next functions do a basic initialization of IOMMU for pass through
+ * mode
+ *
+ * In passthrough mode the IOMMU is initialized and enabled but not used for
+ * DMA-API translation.
+ *
+ *****************************************************************************/
+
+int __init amd_iommu_init_passthrough(void)
+{
+	struct pci_dev *dev = NULL;
+	u16 devid, devid2;
+
+	/* allocate passthroug domain */
+	pt_domain = protection_domain_alloc();
+	if (!pt_domain)
+		return -ENOMEM;
+
+	pt_domain->mode |= PAGE_MODE_NONE;
+
+	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+		struct amd_iommu *iommu;
+
+		devid = calc_devid(dev->bus->number, dev->devfn);
+		if (devid > amd_iommu_last_bdf)
+			continue;
+
+		devid2 = amd_iommu_alias_table[devid];
+
+		iommu = amd_iommu_rlookup_table[devid2];
+		if (!iommu)
+			continue;
+
+		__attach_device(iommu, pt_domain, devid);
+		__attach_device(iommu, pt_domain, devid2);
+	}
+
+	pr_info("AMD-Vi: Initialized for Passthrough Mode\n");
+
+	return 0;
+}
-- 
cgit v1.2.3


From aa879fff5d12318259816aa35023e941a1e4d3d9 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 31 Aug 2009 16:01:48 +0200
Subject: x86/amd-iommu: Fix device table write order

The V bit of the device table entry has to be set after the
rest of the entry is written to not confuse the hardware.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 7987f20499a..2b1e77c714f 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1087,9 +1087,9 @@ static void __attach_device(struct amd_iommu *iommu,
 	pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
 
 	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
-	amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
-	amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
 	amd_iommu_dev_table[devid].data[2] = domain->id;
+	amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
+	amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
 
 	amd_iommu_pd_table[devid] = domain;
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
-- 
cgit v1.2.3


From eba6ac60ba66c6bf6858938442204feaa67dea31 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 1 Sep 2009 12:07:08 +0200
Subject: x86/amd-iommu: Align locking between attach_device and detach_device

This patch makes the locking behavior between the functions
attach_device and __attach_device consistent with the
locking behavior between detach_device and __detach_device.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 2b1e77c714f..9aa135d4453 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1077,29 +1077,38 @@ static void __attach_device(struct amd_iommu *iommu,
 			    struct protection_domain *domain,
 			    u16 devid)
 {
-	unsigned long flags;
-	u64 pte_root = virt_to_phys(domain->pt_root);
+	u64 pte_root;
 
-	domain->dev_cnt += 1;
+	/* lock domain */
+	spin_lock(&domain->lock);
+
+	pte_root = virt_to_phys(domain->pt_root);
 
 	pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
 		    << DEV_ENTRY_MODE_SHIFT;
 	pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
 
-	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
 	amd_iommu_dev_table[devid].data[2] = domain->id;
 	amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
 	amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
 
 	amd_iommu_pd_table[devid] = domain;
-	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+
+	domain->dev_cnt += 1;
+
+	/* ready */
+	spin_unlock(&domain->lock);
 }
 
 static void attach_device(struct amd_iommu *iommu,
 			  struct protection_domain *domain,
 			  u16 devid)
 {
+	unsigned long flags;
+
+	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
 	__attach_device(iommu, domain, devid);
+	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 
 	/*
 	 * We might boot into a crash-kernel here. The crashed kernel
-- 
cgit v1.2.3


From 21129f786f231f7a9dce5b504617b893f50a435f Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 1 Sep 2009 11:59:42 +0200
Subject: x86/amd-iommu: Make sure a device is assigned in passthrough mode

When the IOMMU driver runs in passthrough mode it has to
make sure that every device not assigned to an IOMMU-API
domain must be put into the passthrough domain instead of
keeping it unassigned.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 9aa135d4453..a8e74c34dd2 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1141,6 +1141,15 @@ static void __detach_device(struct protection_domain *domain, u16 devid)
 
 	/* ready */
 	spin_unlock(&domain->lock);
+
+	/*
+	 * If we run in passthrough mode the device must be assigned to the
+	 * passthrough domain if it is detached from any other domain
+	 */
+	if (iommu_pass_through) {
+		struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
+		__attach_device(iommu, pt_domain, devid);
+	}
 }
 
 /*
-- 
cgit v1.2.3


From a1ca331c8aa75cd58fdf685e2e8745e1d3ec5c8f Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 1 Sep 2009 12:22:22 +0200
Subject: x86/amd-iommu: Don't detach device from pt domain on driver unbind

This patch makes sure a device is not detached from the
passthrough domain when the device driver is unloaded or
does otherwise release the device.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index a8e74c34dd2..12a541deae5 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1195,6 +1195,8 @@ static int device_change_notifier(struct notifier_block *nb,
 	case BUS_NOTIFY_UNBOUND_DRIVER:
 		if (!domain)
 			goto out;
+		if (iommu_pass_through)
+			break;
 		detach_device(domain, devid);
 		break;
 	case BUS_NOTIFY_ADD_DEVICE:
-- 
cgit v1.2.3


From 4751a95134e05f1172131d2001c6991d671fa58c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 1 Sep 2009 15:53:54 +0200
Subject: x86/amd-iommu: Initialize passthrough mode when requested

This patch enables the passthrough mode for AMD IOMMU by
running the initialization function when iommu=pt is passed
on the kernel command line.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu_init.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c1b17e97252..f00f489ab15 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1242,12 +1242,18 @@ int __init amd_iommu_init(void)
 	if (ret)
 		goto free;
 
-	ret = amd_iommu_init_dma_ops();
+	if (iommu_pass_through)
+		ret = amd_iommu_init_passthrough();
+	else
+		ret = amd_iommu_init_dma_ops();
 	if (ret)
 		goto free;
 
 	enable_iommus();
 
+	if (iommu_pass_through)
+		goto out;
+
 	printk(KERN_INFO "AMD IOMMU: device isolation ");
 	if (amd_iommu_isolate)
 		printk("enabled\n");
-- 
cgit v1.2.3


From 6ac162d6c01ac7626f46c68c0770556cf682ce34 Mon Sep 17 00:00:00 2001
From: Pavel Vasilyev <pavel@pavlinux.ru>
Date: Thu, 3 Sep 2009 16:20:55 +0200
Subject: x86/gart: Do not select AGP for GART_IOMMU

There is no dependency from the gart code to the agp code.
And since a lot of systems today do not have agp anymore
remove this dependency from the kernel configuration.

Signed-off-by: Pavel Vasilyev <pavel@pavlinux.ru>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/Kconfig | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 13ffa5df37d..1d9c18aa17e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -586,7 +586,6 @@ config GART_IOMMU
 	bool "GART IOMMU support" if EMBEDDED
 	default y
 	select SWIOTLB
-	select AGP
 	depends on X86_64 && PCI
 	---help---
 	  Support for full DMA access of devices with 32bit memory access only
-- 
cgit v1.2.3


From 8adf65cfae2d6c2ec5c06e4521f089c62f9eff05 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 3 Sep 2009 21:26:34 +0200
Subject: x86, msr: Fix msr-reg.S compilation with gas 2.16.1, on 32-bit too

The macro was defined in the 32-bit path as well - breaking the
build on 32-bit platforms:

  arch/x86/lib/msr-reg.S: Assembler messages:
  arch/x86/lib/msr-reg.S:53: Error: Bad macro parameter list
  arch/x86/lib/msr-reg.S:100: Error: invalid character '_' in mnemonic
  arch/x86/lib/msr-reg.S:101: Error: invalid character '_' in mnemonic

Cc: Borislav Petkov <petkovbb@googlemail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
LKML-Reference: <tip-f6909f394c2d4a0a71320797df72d54c49c5927e@git.kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/msr-reg.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
index d5eaf53aa67..69fa10623f2 100644
--- a/arch/x86/lib/msr-reg.S
+++ b/arch/x86/lib/msr-reg.S
@@ -50,7 +50,7 @@ ENDPROC(native_\op\()_safe_regs)
 
 #else /* X86_32 */
 
-.macro op_safe_regs op:req
+.macro op_safe_regs op
 ENTRY(native_\op\()_safe_regs)
 	CFI_STARTPROC
 	pushl_cfi %ebx
-- 
cgit v1.2.3


From 1ea0d14e480c245683927eecc03a70faf06e80c8 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 3 Sep 2009 12:27:15 -0700
Subject: x86/i386: Make sure stack-protector segment base is cache aligned

The Intel Optimization Reference Guide says:

	In Intel Atom microarchitecture, the address generation unit
	assumes that the segment base will be 0 by default. Non-zero
	segment base will cause load and store operations to experience
	a delay.
		- If the segment base isn't aligned to a cache line
		  boundary, the max throughput of memory operations is
		  reduced to one [e]very 9 cycles.
	[...]
	Assembly/Compiler Coding Rule 15. (H impact, ML generality)
	For Intel Atom processors, use segments with base set to 0
	whenever possible; avoid non-zero segment base address that is
	not aligned to cache line boundary at all cost.

We can't avoid having a non-zero base for the stack-protector
segment, but we can make it cache-aligned.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: <stable@kernel.org>
LKML-Reference: <4AA01893.6000507@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/processor.h      | 12 +++++++++++-
 arch/x86/include/asm/stackprotector.h |  4 ++--
 arch/x86/include/asm/system.h         |  2 +-
 arch/x86/kernel/cpu/common.c          |  2 +-
 arch/x86/kernel/head_32.S             |  1 -
 5 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c7768269b1c..e597ecc8753 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -403,7 +403,17 @@ extern unsigned long kernel_eflags;
 extern asmlinkage void ignore_sysret(void);
 #else	/* X86_64 */
 #ifdef CONFIG_CC_STACKPROTECTOR
-DECLARE_PER_CPU(unsigned long, stack_canary);
+/*
+ * Make sure stack canary segment base is cached-aligned:
+ *   "For Intel Atom processors, avoid non zero segment base address
+ *    that is not aligned to cache line boundary at all cost."
+ * (Optim Ref Manual Assembly/Compiler Coding Rule 15.)
+ */
+struct stack_canary {
+	char __pad[20];		/* canary at %gs:20 */
+	unsigned long canary;
+};
+DECLARE_PER_CPU(struct stack_canary, stack_canary) ____cacheline_aligned;
 #endif
 #endif	/* X86_64 */
 
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 44efdff3975..15751776356 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -78,14 +78,14 @@ static __always_inline void boot_init_stack_canary(void)
 #ifdef CONFIG_X86_64
 	percpu_write(irq_stack_union.stack_canary, canary);
 #else
-	percpu_write(stack_canary, canary);
+	percpu_write(stack_canary.canary, canary);
 #endif
 }
 
 static inline void setup_stack_canary_segment(int cpu)
 {
 #ifdef CONFIG_X86_32
-	unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu) - 20;
+	unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
 	struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
 	struct desc_struct desc;
 
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 75c49c782e2..f08f9737489 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -31,7 +31,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 	"movl %P[task_canary](%[next]), %%ebx\n\t"			\
 	"movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
 #define __switch_canary_oparam						\
-	, [stack_canary] "=m" (per_cpu_var(stack_canary))
+	, [stack_canary] "=m" (per_cpu_var(stack_canary.canary))
 #define __switch_canary_iparam						\
 	, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
 #else	/* CC_STACKPROTECTOR */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ced07ba5e93..7d84bc4c118 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1043,7 +1043,7 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist);
 #else	/* CONFIG_X86_64 */
 
 #ifdef CONFIG_CC_STACKPROTECTOR
-DEFINE_PER_CPU(unsigned long, stack_canary);
+DEFINE_PER_CPU(struct stack_canary, stack_canary) ____cacheline_aligned;
 #endif
 
 /* Make sure %fs and %gs are initialized properly in idle threads */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index cc827ac9e8d..7ffec6b3b33 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -439,7 +439,6 @@ is386:	movl $2,%ecx		# set MP
 	jne 1f
 	movl $per_cpu__gdt_page,%eax
 	movl $per_cpu__stack_canary,%ecx
-	subl $20, %ecx
 	movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
 	shrl $16, %ecx
 	movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
-- 
cgit v1.2.3


From 4a376ec3a2599c02207cd4cbd5dbf73783548463 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Thu, 3 Sep 2009 09:40:21 +0200
Subject: x86: Fix CPU llc_shared_map information for AMD Magny-Cours

Construct entire NodeID and use it as cpu_llc_id. Thus internal node
siblings are stored in llc_shared_map.

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/cpufeature.h |  1 +
 arch/x86/kernel/cpu/amd.c         | 64 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 64 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 4a28d22d479..847fee6493a 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -95,6 +95,7 @@
 #define X86_FEATURE_NONSTOP_TSC	(3*32+24) /* TSC does not stop in C states */
 #define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */
 #define X86_FEATURE_EXTD_APICID	(3*32+26) /* has extended APICID (8 bits) */
+#define X86_FEATURE_AMD_DCM     (3*32+27) /* multi-node processor */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* "pni" SSE-3 */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 0a717fc6aeb..a76d2c18f15 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -250,6 +250,64 @@ static int __cpuinit nearby_node(int apicid)
 }
 #endif
 
+/*
+ * Fixup core topology information for AMD multi-node processors.
+ * Assumption 1: Number of cores in each internal node is the same.
+ * Assumption 2: Mixed systems with both single-node and dual-node
+ *               processors are not supported.
+ */
+#ifdef CONFIG_X86_HT
+static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_PCI
+	u32 t, cpn;
+	u8 n, n_id;
+	int cpu = smp_processor_id();
+
+	/* fixup topology information only once for a core */
+	if (cpu_has(c, X86_FEATURE_AMD_DCM))
+		return;
+
+	/* check for multi-node processor on boot cpu */
+	t = read_pci_config(0, 24, 3, 0xe8);
+	if (!(t & (1 << 29)))
+		return;
+
+	set_cpu_cap(c, X86_FEATURE_AMD_DCM);
+
+	/* cores per node: each internal node has half the number of cores */
+	cpn = c->x86_max_cores >> 1;
+
+	/* even-numbered NB_id of this dual-node processor */
+	n = c->phys_proc_id << 1;
+
+	/*
+	 * determine internal node id and assign cores fifty-fifty to
+	 * each node of the dual-node processor
+	 */
+	t = read_pci_config(0, 24 + n, 3, 0xe8);
+	n = (t>>30) & 0x3;
+	if (n == 0) {
+		if (c->cpu_core_id < cpn)
+			n_id = 0;
+		else
+			n_id = 1;
+	} else {
+		if (c->cpu_core_id < cpn)
+			n_id = 1;
+		else
+			n_id = 0;
+	}
+
+	/* compute entire NodeID, use llc_shared_map to store sibling info */
+	per_cpu(cpu_llc_id, cpu) = (c->phys_proc_id << 1) + n_id;
+
+	/* fixup core id to be in range from 0 to cpn */
+	c->cpu_core_id = c->cpu_core_id % cpn;
+#endif
+}
+#endif
+
 /*
  * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
  * Assumes number of cores is a power of two.
@@ -267,6 +325,9 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
 	c->phys_proc_id = c->initial_apicid >> bits;
 	/* use socket ID also for last level cache */
 	per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
+	/* fixup topology information on multi-node processors */
+	if ((c->x86 == 0x10) && (c->x86_model == 9))
+		amd_fixup_dcm(c);
 #endif
 }
 
@@ -277,7 +338,8 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 	int node;
 	unsigned apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
 
-	node = c->phys_proc_id;
+	node = per_cpu(cpu_llc_id, cpu);
+
 	if (apicid_to_node[apicid] != NUMA_NO_NODE)
 		node = apicid_to_node[apicid];
 	if (!node_online(node)) {
-- 
cgit v1.2.3


From a326e948c538e8ce998f30d92e146ecea8a30421 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Thu, 3 Sep 2009 09:41:19 +0200
Subject: x86, cacheinfo: Fixup L3 cache information for AMD multi-node
 processors

L3 cache size, associativity and shared_cpu information need to be
adapted to show information for an internal node instead of the
entire physical package.

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/intel_cacheinfo.c | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 789efe217e1..cb98a73e516 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -241,7 +241,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 	case 0:
 		if (!l1->val)
 			return;
-		assoc = l1->assoc;
+		assoc = assocs[l1->assoc];
 		line_size = l1->line_size;
 		lines_per_tag = l1->lines_per_tag;
 		size_in_kb = l1->size_in_kb;
@@ -249,7 +249,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 	case 2:
 		if (!l2.val)
 			return;
-		assoc = l2.assoc;
+		assoc = assocs[l2.assoc];
 		line_size = l2.line_size;
 		lines_per_tag = l2.lines_per_tag;
 		/* cpu_data has errata corrections for K7 applied */
@@ -258,10 +258,14 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 	case 3:
 		if (!l3.val)
 			return;
-		assoc = l3.assoc;
+		assoc = assocs[l3.assoc];
 		line_size = l3.line_size;
 		lines_per_tag = l3.lines_per_tag;
 		size_in_kb = l3.size_encoded * 512;
+		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
+			size_in_kb = size_in_kb >> 1;
+			assoc = assoc >> 1;
+		}
 		break;
 	default:
 		return;
@@ -270,18 +274,14 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 	eax->split.is_self_initializing = 1;
 	eax->split.type = types[leaf];
 	eax->split.level = levels[leaf];
-	if (leaf == 3)
-		eax->split.num_threads_sharing =
-			current_cpu_data.x86_max_cores - 1;
-	else
-		eax->split.num_threads_sharing = 0;
+	eax->split.num_threads_sharing = 0;
 	eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
 
 
-	if (assoc == 0xf)
+	if (assoc == 0xffff)
 		eax->split.is_fully_associative = 1;
 	ebx->split.coherency_line_size = line_size - 1;
-	ebx->split.ways_of_associativity = assocs[assoc] - 1;
+	ebx->split.ways_of_associativity = assoc - 1;
 	ebx->split.physical_line_partition = lines_per_tag - 1;
 	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
 		(ebx->split.ways_of_associativity + 1) - 1;
@@ -522,6 +522,18 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
 	int index_msb, i;
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
+	if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
+		struct cpuinfo_x86 *d;
+		for_each_online_cpu(i) {
+			if (!per_cpu(cpuid4_info, i))
+				continue;
+			d = &cpu_data(i);
+			this_leaf = CPUID4_INFO_IDX(i, index);
+			cpumask_copy(to_cpumask(this_leaf->shared_cpu_map),
+				     d->llc_shared_map);
+		}
+		return;
+	}
 	this_leaf = CPUID4_INFO_IDX(cpu, index);
 	num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
 
-- 
cgit v1.2.3


From cb9805ab5b97079f69a21b6b4e344a69d5c96157 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Thu, 3 Sep 2009 09:42:20 +0200
Subject: x86, mcheck: Use correct cpumask for shared bank4

This fixes threshold_bank4 support on multi-node processors.

The correct mask to use is llc_shared_map, representing an internal
node on Magny-Cours.

We need to create 2 sets of symlinks for sibling shared banks -- one
set for each internal node, symlinks of each set should target the
first core on same internal node.

Currently only one set is created where all symlinks are targeting
the first core of the entire socket.

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/mcheck/mce_amd.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index ddae21620bd..1fecba404fd 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -489,12 +489,14 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 	int i, err = 0;
 	struct threshold_bank *b = NULL;
 	char name[32];
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
+
 
 	sprintf(name, "threshold_bank%i", bank);
 
 #ifdef CONFIG_SMP
 	if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) {	/* symlink */
-		i = cpumask_first(cpu_core_mask(cpu));
+		i = cpumask_first(c->llc_shared_map);
 
 		/* first core not up yet */
 		if (cpu_data(i).cpu_core_id)
@@ -514,7 +516,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 		if (err)
 			goto out;
 
-		cpumask_copy(b->cpus, cpu_core_mask(cpu));
+		cpumask_copy(b->cpus, c->llc_shared_map);
 		per_cpu(threshold_banks, cpu)[bank] = b;
 
 		goto out;
@@ -539,7 +541,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 #ifndef CONFIG_SMP
 	cpumask_setall(b->cpus);
 #else
-	cpumask_copy(b->cpus, cpu_core_mask(cpu));
+	cpumask_copy(b->cpus, c->llc_shared_map);
 #endif
 
 	per_cpu(threshold_banks, cpu)[bank] = b;
-- 
cgit v1.2.3


From 5a925b4282d7f805deafde62001a83dbaf8be275 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Thu, 3 Sep 2009 09:44:28 +0200
Subject: x86, sched: Workaround broken sched domain creation for AMD
 Magny-Cours

Current sched domain creation code can't handle multi-node processors.
When switching to power_savings scheduling errors show up and
system might hang later on (due to broken sched domain hierarchy):

  # echo 0  >> /sys/devices/system/cpu/sched_mc_power_savings
  CPU0 attaching sched-domain:
   domain 0: span 0-5 level MC
    groups: 0 1 2 3 4 5
    domain 1: span 0-23 level NODE
     groups: 0-5 6-11 18-23 12-17
  ...
  # echo 1  >> /sys/devices/system/cpu/sched_mc_power_savings
  CPU0 attaching sched-domain:
   domain 0: span 0-11 level MC
    groups: 0 1 2 3 4 5 6 7 8 9 10 11
  ERROR: parent span is not a superset of domain->span
    domain 1: span 0-5 level CPU
  ERROR: domain->groups does not contain CPU0
     groups: 6-11 (__cpu_power = 12288)
  ERROR: groups don't span domain->span
     domain 2: span 0-23 level NODE
      groups:
  ERROR: domain->cpu_power not set

  ERROR: groups don't span domain->span
  ...

Fixing all aspects of power-savings scheduling for Magny-Cours needs
some larger changes in the sched domain creation code.

As a short-term and temporary workaround avoid the problems by
extending "the worst possible hack" ;-(
and always use llc_shared_map on AMD Magny-Cours when MC domain span
is calculated.

With this I get:

  # echo 1  >> /sys/devices/system/cpu/sched_mc_power_savings
  CPU0 attaching sched-domain:
   domain 0: span 0-5 level MC
    groups: 0 1 2 3 4 5
    domain 1: span 0-5 level CPU
     groups: 0-5 (__cpu_power = 6144)
     domain 2: span 0-23 level NODE
      groups: 0-5 (__cpu_power = 6144) 6-11 (__cpu_power = 6144) 18-23 (__cpu_power = 6144) 12-17 (__cpu_power = 6144)
  ...

I.e. no errors during sched domain creation, no system hangs, and also
mc_power_savings scheduling works to a certain extend.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/smpboot.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 2fecda69ee6..c36cc1452cd 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -434,7 +434,8 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
 	 * For perf, we return last level cache shared map.
 	 * And for power savings, we return cpu_core_map
 	 */
-	if (sched_mc_power_savings || sched_smt_power_savings)
+	if ((sched_mc_power_savings || sched_smt_power_savings) &&
+	    !(cpu_has(c, X86_FEATURE_AMD_DCM)))
 		return cpu_core_mask(cpu);
 	else
 		return c->llc_shared_map;
-- 
cgit v1.2.3


From 53f824520b6d84ca5b4a8fd71addc91dbf64357e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 3 Sep 2009 14:31:44 -0700
Subject: x86/i386: Put aligned stack-canary in percpu shared_aligned section

Pack aligned things together into a special section to minimize
padding holes.

Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Tejun Heo <tj@kernel.org>
LKML-Reference: <4AA035C0.9070202@goop.org>
[ queued up in tip:x86/asm because it depends on this commit:
  x86/i386: Make sure stack-protector segment base is cache aligned ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/processor.h | 2 +-
 arch/x86/kernel/cpu/common.c     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index e597ecc8753..ac7e79654f3 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -413,7 +413,7 @@ struct stack_canary {
 	char __pad[20];		/* canary at %gs:20 */
 	unsigned long canary;
 };
-DECLARE_PER_CPU(struct stack_canary, stack_canary) ____cacheline_aligned;
+DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 #endif
 #endif	/* X86_64 */
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 7d84bc4c118..f23e236391a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1043,7 +1043,7 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist);
 #else	/* CONFIG_X86_64 */
 
 #ifdef CONFIG_CC_STACKPROTECTOR
-DEFINE_PER_CPU(struct stack_canary, stack_canary) ____cacheline_aligned;
+DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 #endif
 
 /* Make sure %fs and %gs are initialized properly in idle threads */
-- 
cgit v1.2.3


From 747b50aaf728987732e6ff3ba10aba4acc4e0277 Mon Sep 17 00:00:00 2001
From: "markus.t.metzger@intel.com" <markus.t.metzger@intel.com>
Date: Wed, 2 Sep 2009 16:04:46 +0200
Subject: x86, perf_counter, bts: Fail if BTS is not available

Reserve PERF_COUNT_HW_BRANCH_INSTRUCTIONS with sample_period ==
1 for BTS tracing and fail, if BTS is not available.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090902140612.943801000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_counter.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 396e35db705..2f41874ffb8 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -769,7 +769,7 @@ static int reserve_bts_hardware(void)
 	int cpu, err = 0;
 
 	if (!bts_available())
-		return -EOPNOTSUPP;
+		return 0;
 
 	get_online_cpus();
 
@@ -914,7 +914,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 			if (!reserve_pmc_hardware())
 				err = -EBUSY;
 			else
-				reserve_bts_hardware();
+				err = reserve_bts_hardware();
 		}
 		if (!err)
 			atomic_inc(&active_counters);
@@ -979,6 +979,13 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	if (config == -1LL)
 		return -EINVAL;
 
+	/*
+	 * Branch tracing:
+	 */
+	if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
+	    (hwc->sample_period == 1) && !bts_available())
+		return -EOPNOTSUPP;
+
 	hwc->config |= config;
 
 	return 0;
@@ -1355,19 +1362,9 @@ static int x86_pmu_enable(struct perf_counter *counter)
 
 	idx = fixed_mode_idx(counter, hwc);
 	if (idx == X86_PMC_IDX_FIXED_BTS) {
-		/*
-		 * Try to use BTS for branch tracing. If that is not
-		 * available, try to get a generic counter.
-		 */
-		if (unlikely(!cpuc->ds))
-			goto try_generic;
-
-		/*
-		 * Try to get the fixed counter, if that is already taken
-		 * then try to get a generic counter:
-		 */
+		/* BTS is already occupied. */
 		if (test_and_set_bit(idx, cpuc->used_mask))
-			goto try_generic;
+			return -EAGAIN;
 
 		hwc->config_base	= 0;
 		hwc->counter_base	= 0;
-- 
cgit v1.2.3


From 596da17f94c103348ebe04129c00d536ea0e80e2 Mon Sep 17 00:00:00 2001
From: "markus.t.metzger@intel.com" <markus.t.metzger@intel.com>
Date: Wed, 2 Sep 2009 16:04:47 +0200
Subject: x86, perf_counter, bts: Correct pointer-to-u64 casts

On 32bit, pointers in the DS AREA configuration are cast to
u64. The current (long) cast to avoid compiler warnings results
in a signed 64bit address.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090902140615.305889000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_counter.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 2f41874ffb8..3776b0b630c 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -726,7 +726,8 @@ static inline void init_debug_store_on_cpu(int cpu)
 		return;
 
 	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
-		     (u32)((u64)(long)ds), (u32)((u64)(long)ds >> 32));
+		     (u32)((u64)(unsigned long)ds),
+		     (u32)((u64)(unsigned long)ds >> 32));
 }
 
 static inline void fini_debug_store_on_cpu(int cpu)
@@ -757,7 +758,7 @@ static void release_bts_hardware(void)
 
 		per_cpu(cpu_hw_counters, cpu).ds = NULL;
 
-		kfree((void *)(long)ds->bts_buffer_base);
+		kfree((void *)(unsigned long)ds->bts_buffer_base);
 		kfree(ds);
 	}
 
@@ -788,7 +789,7 @@ static int reserve_bts_hardware(void)
 			break;
 		}
 
-		ds->bts_buffer_base = (u64)(long)buffer;
+		ds->bts_buffer_base = (u64)(unsigned long)buffer;
 		ds->bts_index = ds->bts_buffer_base;
 		ds->bts_absolute_maximum =
 			ds->bts_buffer_base + BTS_BUFFER_SIZE;
@@ -1491,7 +1492,7 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
 	};
 	struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS];
 	unsigned long orig_ip = data->regs->ip;
-	u64 at;
+	struct bts_record *at, *top;
 
 	if (!counter)
 		return;
@@ -1499,19 +1500,18 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
 	if (!ds)
 		return;
 
-	for (at = ds->bts_buffer_base;
-	     at < ds->bts_index;
-	     at += sizeof(struct bts_record)) {
-		struct bts_record *rec = (struct bts_record *)(long)at;
+	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
+	top = (struct bts_record *)(unsigned long)ds->bts_index;
 
-		data->regs->ip	= rec->from;
-		data->addr	= rec->to;
+	ds->bts_index = ds->bts_buffer_base;
+
+	for (; at < top; at++) {
+		data->regs->ip	= at->from;
+		data->addr	= at->to;
 
 		perf_counter_output(counter, 1, data);
 	}
 
-	ds->bts_index = ds->bts_buffer_base;
-
 	data->regs->ip	= orig_ip;
 	data->addr	= 0;
 
-- 
cgit v1.2.3


From 1653192f510bd8114b7b133d7289e6e5c3e95046 Mon Sep 17 00:00:00 2001
From: "markus.t.metzger@intel.com" <markus.t.metzger@intel.com>
Date: Wed, 2 Sep 2009 16:04:48 +0200
Subject: x86, perf_counter, bts: Do not allow kernel BTS tracing for now

Kernel BTS tracing generates too much data too fast for us to
handle, causing the kernel to hang.

Fail for BTS requests for kernel code.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Acked-by: Peter Zijlstra <a.p.zjilstra@chello.nl>
LKML-Reference: <20090902140616.901253000@intel.com>
[ This is really a workaround - but we want BTS tracing in .32
  so make sure we dont regress. The lockup should be fixed
  ASAP. ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_counter.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 3776b0b630c..f9cd0849bd4 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -984,8 +984,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	 * Branch tracing:
 	 */
 	if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
-	    (hwc->sample_period == 1) && !bts_available())
-		return -EOPNOTSUPP;
+	    (hwc->sample_period == 1)) {
+		/* BTS is not supported by this architecture. */
+		if (!bts_available())
+			return -EOPNOTSUPP;
+
+		/* BTS is currently only allowed for user-mode. */
+		if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
+			return -EOPNOTSUPP;
+	}
 
 	hwc->config |= config;
 
-- 
cgit v1.2.3


From 0d96b9ff748b5f57d6f1d6d21209f5745245aadc Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sat, 29 Aug 2009 13:17:14 -0700
Subject: x86: Use hard_smp_processor_id() to get apic id for AMD K8 cpus

Otherwise, system with apci id lifting will have wrong apicid in
/proc/cpuinfo.

and use that in srat_detect_node().

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
LKML-Reference: <4A998CCA.1040407@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/amd.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a76d2c18f15..e1600c775e1 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -336,7 +336,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
 	int cpu = smp_processor_id();
 	int node;
-	unsigned apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
+	unsigned apicid = c->apicid;
 
 	node = per_cpu(cpu_llc_id, cpu);
 
@@ -481,6 +481,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	}
 	if (c->x86 == 0x10 || c->x86 == 0x11)
 		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+
+	/* get apicid instead of initial apic id from cpuid */
+	c->apicid = hard_smp_processor_id();
 #else
 
 	/*
-- 
cgit v1.2.3


From 47734f89be0614b5acbd6a532390f9c72f019648 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 4 Sep 2009 11:21:24 +0200
Subject: sched: Clean up topology.h

Re-organize the flag settings so that it's visible at a glance
which sched-domains flags are set and which not.

With the new balancer code we'll need to re-tune these details
anyway, so make it cleaner to make fewer mistakes down the
road ;-)

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Gautham R Shenoy <ego@in.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/topology.h | 47 ++++++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 066ef590d7e..be29eb81fb0 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -129,25 +129,34 @@ extern unsigned long node_remap_size[];
 #endif
 
 /* sched_domains SD_NODE_INIT for NUMA machines */
-#define SD_NODE_INIT (struct sched_domain) {		\
-	.min_interval		= 8,			\
-	.max_interval		= 32,			\
-	.busy_factor		= 32,			\
-	.imbalance_pct		= 125,			\
-	.cache_nice_tries	= SD_CACHE_NICE_TRIES,	\
-	.busy_idx		= 3,			\
-	.idle_idx		= SD_IDLE_IDX,		\
-	.newidle_idx		= SD_NEWIDLE_IDX,	\
-	.wake_idx		= 1,			\
-	.forkexec_idx		= SD_FORKEXEC_IDX,	\
-	.flags			= SD_LOAD_BALANCE	\
-				| SD_BALANCE_EXEC	\
-				| SD_BALANCE_FORK	\
-				| SD_WAKE_AFFINE	\
-				| SD_WAKE_BALANCE	\
-				| SD_SERIALIZE,		\
-	.last_balance		= jiffies,		\
-	.balance_interval	= 1,			\
+#define SD_NODE_INIT (struct sched_domain) {				\
+	.min_interval		= 8,					\
+	.max_interval		= 32,					\
+	.busy_factor		= 32,					\
+	.imbalance_pct		= 125,					\
+	.cache_nice_tries	= SD_CACHE_NICE_TRIES,			\
+	.busy_idx		= 3,					\
+	.idle_idx		= SD_IDLE_IDX,				\
+	.newidle_idx		= SD_NEWIDLE_IDX,			\
+	.wake_idx		= 1,					\
+	.forkexec_idx		= SD_FORKEXEC_IDX,			\
+									\
+	.flags			= 1*SD_LOAD_BALANCE			\
+				| 0*SD_BALANCE_NEWIDLE			\
+				| 1*SD_BALANCE_EXEC			\
+				| 1*SD_BALANCE_FORK			\
+				| 0*SD_WAKE_IDLE			\
+				| 1*SD_WAKE_AFFINE			\
+				| 1*SD_WAKE_BALANCE			\
+				| 0*SD_SHARE_CPUPOWER			\
+				| 0*SD_POWERSAVINGS_BALANCE		\
+				| 0*SD_SHARE_PKG_RESOURCES		\
+				| 1*SD_SERIALIZE			\
+				| 0*SD_WAKE_IDLE_FAR			\
+				| 0*SD_PREFER_SIBLING			\
+				,					\
+	.last_balance		= jiffies,				\
+	.balance_interval	= 1,					\
 }
 
 #ifdef CONFIG_X86_64_ACPI_NUMA
-- 
cgit v1.2.3


From 840a0653100dbde599ae8ddf83fa214dfa5fd1aa Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 4 Sep 2009 11:32:54 +0200
Subject: sched: Turn on SD_BALANCE_NEWIDLE

Start the re-tuning of the balancer by turning on newidle.

It improves hackbench performance and parallelism on a 4x4 box.
The "perf stat --repeat 10" measurements give us:

  domain0             domain1
  .......................................
 -SD_BALANCE_NEWIDLE -SD_BALANCE_NEWIDLE:
   2041.273208  task-clock-msecs         #      9.354 CPUs    ( +-   0.363% )

 +SD_BALANCE_NEWIDLE -SD_BALANCE_NEWIDLE:
   2086.326925  task-clock-msecs         #     11.934 CPUs    ( +-   0.301% )

 +SD_BALANCE_NEWIDLE +SD_BALANCE_NEWIDLE:
   2115.289791  task-clock-msecs         #     12.158 CPUs    ( +-   0.263% )

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Gautham R Shenoy <ego@in.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/topology.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index be29eb81fb0..ef7bc7fc252 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -142,7 +142,7 @@ extern unsigned long node_remap_size[];
 	.forkexec_idx		= SD_FORKEXEC_IDX,			\
 									\
 	.flags			= 1*SD_LOAD_BALANCE			\
-				| 0*SD_BALANCE_NEWIDLE			\
+				| 1*SD_BALANCE_NEWIDLE			\
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
 				| 0*SD_WAKE_IDLE			\
-- 
cgit v1.2.3


From 825c9fb47a0837db12fecf8d360e0e1d284ddb49 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 4 Sep 2009 02:56:22 -0700
Subject: sparc: add basic support for 'perf'

This wires up the perf_counter_open() syscall so that basic
software support for perf is working.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/Kconfig                    | 2 ++
 arch/sparc/include/asm/perf_counter.h | 8 ++++++++
 arch/sparc/include/asm/unistd.h       | 3 ++-
 arch/sparc/kernel/systbls_32.S        | 2 +-
 arch/sparc/kernel/systbls_64.S        | 4 ++--
 5 files changed, 15 insertions(+), 4 deletions(-)
 create mode 100644 arch/sparc/include/asm/perf_counter.h

(limited to 'arch')

diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index b847f880eec..2aa7cd39b48 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -25,6 +25,7 @@ config SPARC
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select RTC_CLASS
 	select RTC_DRV_M48T59
+	select HAVE_PERF_COUNTERS
 
 config SPARC32
 	def_bool !64BIT
@@ -44,6 +45,7 @@ config SPARC64
 	select RTC_DRV_BQ4802
 	select RTC_DRV_SUN4V
 	select RTC_DRV_STARFIRE
+	select HAVE_PERF_COUNTERS
 
 config ARCH_DEFCONFIG
 	string
diff --git a/arch/sparc/include/asm/perf_counter.h b/arch/sparc/include/asm/perf_counter.h
new file mode 100644
index 00000000000..f07c587a830
--- /dev/null
+++ b/arch/sparc/include/asm/perf_counter.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_SPARC_PERF_COUNTER_H
+#define __ASM_SPARC_PERF_COUNTER_H
+
+static inline void set_perf_counter_pending(void) { }
+
+#define	PERF_COUNTER_INDEX_OFFSET	0
+
+#endif
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index b2c406de7d4..706df669f3b 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -395,8 +395,9 @@
 #define __NR_preadv		324
 #define __NR_pwritev		325
 #define __NR_rt_tgsigqueueinfo	326
+#define __NR_perf_counter_open	327
 
-#define NR_SYSCALLS		327
+#define NR_SYSCALLS		328
 
 #ifdef __32bit_syscall_numbers__
 /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 69090165729..04181577cb6 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -82,5 +82,5 @@ sys_call_table:
 /*310*/	.long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
 /*315*/	.long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
 /*320*/	.long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
-/*325*/	.long sys_pwritev, sys_rt_tgsigqueueinfo
+/*325*/	.long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_counter_open
 
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 2ee7250ba7a..91b06b7f7ac 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -83,7 +83,7 @@ sys_call_table32:
 /*310*/	.word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate
 	.word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1
 /*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
-	.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo
+	.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_counter_open
 
 #endif /* CONFIG_COMPAT */
 
@@ -158,4 +158,4 @@ sys_call_table:
 /*310*/	.word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
 	.word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
 /*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
-	.word sys_pwritev, sys_rt_tgsigqueueinfo
+	.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_counter_open
-- 
cgit v1.2.3


From 8e019366ba749a536131cde1947af6dcaccf8e8f Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 27 Aug 2009 14:50:00 +0100
Subject: kmemleak: Don't scan uninitialized memory when kmemcheck is enabled

Ingo Molnar reported the following kmemcheck warning when running both
kmemleak and kmemcheck enabled:

  PM: Adding info for No Bus:vcsa7
  WARNING: kmemcheck: Caught 32-bit read from uninitialized memory
  (f6f6e1a4)
  d873f9f600000000c42ae4c1005c87f70000000070665f666978656400000000
   i i i i u u u u i i i i i i i i i i i i i i i i i i i i i u u u
           ^

  Pid: 3091, comm: kmemleak Not tainted (2.6.31-rc7-tip #1303) P4DC6
  EIP: 0060:[<c110301f>] EFLAGS: 00010006 CPU: 0
  EIP is at scan_block+0x3f/0xe0
  EAX: f40bd700 EBX: f40bd780 ECX: f16b46c0 EDX: 00000001
  ESI: f6f6e1a4 EDI: 00000000 EBP: f10f3f4c ESP: c2605fcc
   DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
  CR0: 8005003b CR2: e89a4844 CR3: 30ff1000 CR4: 000006f0
  DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
  DR6: ffff4ff0 DR7: 00000400
   [<c110313c>] scan_object+0x7c/0xf0
   [<c1103389>] kmemleak_scan+0x1d9/0x400
   [<c1103a3c>] kmemleak_scan_thread+0x4c/0xb0
   [<c10819d4>] kthread+0x74/0x80
   [<c10257db>] kernel_thread_helper+0x7/0x3c
   [<ffffffff>] 0xffffffff
  kmemleak: 515 new suspected memory leaks (see
  /sys/kernel/debug/kmemleak)
  kmemleak: 42 new suspected memory leaks (see /sys/kernel/debug/kmemleak)

The problem here is that kmemleak will scan partially initialized
objects that makes kmemcheck complain. Fix that up by skipping
uninitialized memory regions when kmemcheck is enabled.

Reported-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 arch/x86/mm/kmemcheck/kmemcheck.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
index 2c55ed09865..528bf954eb7 100644
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -331,6 +331,20 @@ static void kmemcheck_read_strict(struct pt_regs *regs,
 	kmemcheck_shadow_set(shadow, size);
 }
 
+bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
+{
+	enum kmemcheck_shadow status;
+	void *shadow;
+
+	shadow = kmemcheck_shadow_lookup(addr);
+	if (!shadow)
+		return true;
+
+	status = kmemcheck_shadow_test(shadow, size);
+
+	return status == KMEMCHECK_SHADOW_INITIALIZED;
+}
+
 /* Access may cross page boundary */
 static void kmemcheck_read(struct pt_regs *regs,
 	unsigned long addr, unsigned int size)
-- 
cgit v1.2.3


From b19ae3999891cad21a3995c34d313dda5df014e2 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Fri, 4 Sep 2009 10:00:09 -0700
Subject: x86, msr: change msr-reg.o to obj-y, and export its symbols

Change msr-reg.o to obj-y (it will be included in virtually every
kernel since it is used by the initialization code for AMD processors)
and add a separate C file to export its symbols to modules, so that
msr.ko can use them; on uniprocessors we bypass the helper functions
in msr.o and use the accessor functions directly via inlines.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
LKML-Reference: <20090904140834.GA15789@elte.hu>
Cc: Borislav Petkov <petkovbb@googlemail.com>
---
 arch/x86/lib/Makefile         | 3 ++-
 arch/x86/lib/msr-reg-export.c | 5 +++++
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/lib/msr-reg-export.c

(limited to 'arch')

diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index b59c0647d80..9e609206fac 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -8,7 +8,8 @@ lib-y := delay.o
 lib-y += thunk_$(BITS).o
 lib-y += usercopy_$(BITS).o getuser.o putuser.o
 lib-y += memcpy_$(BITS).o
-lib-y += msr-reg.o
+
+obj-y += msr-reg.o msr-reg-export.o
 
 ifeq ($(CONFIG_X86_32),y)
         obj-y += atomic64_32.o
diff --git a/arch/x86/lib/msr-reg-export.c b/arch/x86/lib/msr-reg-export.c
new file mode 100644
index 00000000000..a311cc59b65
--- /dev/null
+++ b/arch/x86/lib/msr-reg-export.c
@@ -0,0 +1,5 @@
+#include <linux/module.h>
+#include <asm/msr.h>
+
+EXPORT_SYMBOL(native_rdmsr_safe_regs);
+EXPORT_SYMBOL(native_wrmsr_safe_regs);
-- 
cgit v1.2.3


From 81bd5f6c966cf2f137c2759dfc78abdffcff055e Mon Sep 17 00:00:00 2001
From: Jan Glauber <jang@linux.vnet.ibm.com>
Date: Sat, 5 Sep 2009 16:27:35 +1000
Subject: crypto: sha-s390 - Fix warnings in import function

That patch should fix the warnings.

Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/crypto/sha1_s390.c   | 4 ++--
 arch/s390/crypto/sha256_s390.c | 4 ++--
 arch/s390/crypto/sha512_s390.c | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index 4a943789c20..f6de7826c97 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -57,10 +57,10 @@ static int sha1_export(struct shash_desc *desc, void *out)
 	return 0;
 }
 
-static int sha1_import(struct shash_desc *desc, const u8 *in)
+static int sha1_import(struct shash_desc *desc, const void *in)
 {
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-	struct sha1_state *ictx = in;
+	const struct sha1_state *ictx = in;
 
 	sctx->count = ictx->count;
 	memcpy(sctx->state, ictx->state, sizeof(ictx->state));
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index 2bab5197789..61a7db37212 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -53,10 +53,10 @@ static int sha256_export(struct shash_desc *desc, void *out)
 	return 0;
 }
 
-static int sha256_import(struct shash_desc *desc, const u8 *in)
+static int sha256_import(struct shash_desc *desc, const void *in)
 {
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-	struct sha256_state *ictx = in;
+	const struct sha256_state *ictx = in;
 
 	sctx->count = ictx->count;
 	memcpy(sctx->state, ictx->state, sizeof(ictx->state));
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index b4b3438ae77..4bf73d0dc52 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -52,10 +52,10 @@ static int sha512_export(struct shash_desc *desc, void *out)
 	return 0;
 }
 
-static int sha512_import(struct shash_desc *desc, const u8 *in)
+static int sha512_import(struct shash_desc *desc, const void *in)
 {
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-	struct sha512_state *ictx = in;
+	const struct sha512_state *ictx = in;
 
 	if (unlikely(ictx->count[1]))
 		return -ERANGE;
-- 
cgit v1.2.3


From 23b6c52cf558f2a1dc6c7010c601b3624eb4591f Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sat, 5 Sep 2009 01:07:45 +0200
Subject: x86: Decrease the level of some NUMA messages to KERN_DEBUG

Some NUMA messages in srat_32.c are confusing to users,
because they seem to indicate errors, while in fact they
reflect normal behaviour.

Decrease the level of these messages to KERN_DEBUG so that
they don't show up unnecessarily.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
LKML-Reference: <200909050107.45175.rjw@sisk.pl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/srat_32.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index 29a0e37114f..6f8aa33031c 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -215,7 +215,7 @@ int __init get_memcfg_from_srat(void)
 		goto out_fail;
 
 	if (num_memory_chunks == 0) {
-		printk(KERN_WARNING
+		printk(KERN_DEBUG
 			 "could not find any ACPI SRAT memory areas.\n");
 		goto out_fail;
 	}
@@ -277,7 +277,7 @@ int __init get_memcfg_from_srat(void)
 	}
 	return 1;
 out_fail:
-	printk(KERN_ERR "failed to get NUMA memory information from SRAT"
+	printk(KERN_DEBUG "failed to get NUMA memory information from SRAT"
 			" table\n");
 	return 0;
 }
-- 
cgit v1.2.3


From d535e4319a2e598ba25dc966ada4e52ea774e33f Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Fri, 4 Sep 2009 15:53:09 +0200
Subject: x86: Make memtype_seq_ops const

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index e6718bb2806..e2900a36de0 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -826,7 +826,7 @@ static int memtype_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations memtype_seq_ops = {
+static const struct seq_operations memtype_seq_ops = {
 	.start = memtype_seq_start,
 	.next  = memtype_seq_next,
 	.stop  = memtype_seq_stop,
-- 
cgit v1.2.3


From 5dbc46506a4f7b9f564bb7589a49ed32bc1caa15 Mon Sep 17 00:00:00 2001
From: Krzysztof Halasa <khc@pm.waw.pl>
Date: Sat, 5 Sep 2009 03:59:49 +0000
Subject: IXP42x HSS support for setting internal clock rate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

HSS usually uses external clocks, so it's not a big deal. Internal clock
is used for direct DTE-DTE connections and when the DCE doesn't provide
it's own clock.

This also depends on the oscillator frequency. Intel seems to have
calculated the clock register settings for 33.33 MHz (66.66 MHz timer
base). Their settings seem quite suboptimal both in terms of average
frequency (60 ppm is unacceptable for G.703 applications, their primary
intended usage(?)) and jitter.

Many (most?) platforms use a 33.333 MHz oscillator, a 10 ppm difference
from Intel's base.

Instead of creating static tables, I've created a procedure to program
the HSS clock register. The register consists of 3 parts (A, B, C).
The average frequency (= bit rate) is:
66.66x MHz / (A  + (B + 1) / (C + 1))
The procedure aims at the closest average frequency, possibly at the
cost of increased jitter. Nobody would be able to directly drive an
unbufferred transmitter with a HSS anyway, and the frequency error is
what it really counts.

I've verified the above with an oscilloscope on IXP425. It seems IXP46x
and possibly IXP43x use a bit different clock generation algorithm - it
looks like the avg frequency is:
(on IXP465) 66.66x MHz / (A  + B / (C + 1)).
Also they use much greater precomputed A and B - on IXP425 it would
simply result in more jitter, but I don't know how does it work on
IXP46x (perhaps 3 least significant bits aren't used?).

Anyway it looks that they were aiming for exactly +60 ppm or -60 ppm,
while <1 ppm is typically possible (with a synchronized clock, of
course).

The attached patch makes it possible to set almost any bit rate
(my IXP425 533 MHz quits at > 22 Mb/s if a single port is used, and the
minimum is ca. 65 Kb/s).

This is independent of MVIP (multi-E1/T1 on one HSS) mode.

Signed-off-by: Krzysztof Hałasa <khc@pm.waw.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/mach-ixp4xx/common.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index 1e93dfee754..5083f03e9b5 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -416,6 +416,7 @@ static struct clocksource clocksource_ixp4xx = {
 };
 
 unsigned long ixp4xx_timer_freq = FREQ;
+EXPORT_SYMBOL(ixp4xx_timer_freq);
 static int __init ixp4xx_clocksource_init(void)
 {
 	clocksource_ixp4xx.mult =
-- 
cgit v1.2.3


From a8fae3ec5f118dc92517dcbed3ecf69ddb641d0f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 7 Sep 2009 18:32:32 +0200
Subject: sched: enable SD_WAKE_IDLE

Now that SD_WAKE_IDLE doesn't make pipe-test suck anymore,
enable it by default for MC, CPU and NUMA domains.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/topology.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index ef7bc7fc252..26d06e052a1 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -152,7 +152,7 @@ extern unsigned long node_remap_size[];
 				| 0*SD_POWERSAVINGS_BALANCE		\
 				| 0*SD_SHARE_PKG_RESOURCES		\
 				| 1*SD_SERIALIZE			\
-				| 0*SD_WAKE_IDLE_FAR			\
+				| 1*SD_WAKE_IDLE_FAR			\
 				| 0*SD_PREFER_SIBLING			\
 				,					\
 	.last_balance		= jiffies,				\
-- 
cgit v1.2.3


From 3e5cd1f2576c720f1d0705fdd7ba64f27e8836b7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Sun, 16 Aug 2009 21:02:36 +0900
Subject: dmi: extend dmi_get_year() to dmi_get_date()

There are cases where full date information is required instead of
just the year.  Add month and day parsing to dmi_get_year() and rename
it to dmi_get_date().

As the original function only required '/' followed by any number of
parseable characters at the end of the string, keep that behavior to
avoid upsetting existing users.

The new function takes dates of format [mm[/dd]]/yy[yy].  Year, month
and date are checked to be in the ranges of [1-9999], [1-12] and
[1-31] respectively and any invalid or out-of-range component is
returned as zero.

The dummy implementation is updated accordingly but the return value
is updated to indicate field not found which is consistent with how
other dummy functions behave.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 arch/x86/pci/direct.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c
index bd13c3e4c6d..347d882b3bb 100644
--- a/arch/x86/pci/direct.c
+++ b/arch/x86/pci/direct.c
@@ -192,13 +192,14 @@ struct pci_raw_ops pci_direct_conf2 = {
 static int __init pci_sanity_check(struct pci_raw_ops *o)
 {
 	u32 x = 0;
-	int devfn;
+	int year, devfn;
 
 	if (pci_probe & PCI_NO_CHECKS)
 		return 1;
 	/* Assume Type 1 works for newer systems.
 	   This handles machines that don't have anything on PCI Bus 0. */
-	if (dmi_get_year(DMI_BIOS_DATE) >= 2001)
+	dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL);
+	if (year >= 2001)
 		return 1;
 
 	for (devfn = 0; devfn < 0x100; devfn++) {
-- 
cgit v1.2.3


From a8f22264550e64c0cd11fb6647284b0bd6407f9c Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Sep 2009 23:16:06 -0700
Subject: sparc64: Manage NMI watchdog enabling like x86.

Use a per-cpu 'wd_enabled' boolean and a global atomic_t count
of watchdog NMI enabled cpus which is set to '-1' if something
is wrong with the watchdog and it can't be used.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/nmi.h |  2 +-
 arch/sparc/kernel/nmi.c      | 60 +++++++++++++++++++++++++++++++-------------
 arch/sparc/oprofile/init.c   |  2 +-
 3 files changed, 45 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/include/asm/nmi.h b/arch/sparc/include/asm/nmi.h
index fbd546dd4fe..c7d11e435df 100644
--- a/arch/sparc/include/asm/nmi.h
+++ b/arch/sparc/include/asm/nmi.h
@@ -5,6 +5,6 @@ extern int __init nmi_init(void);
 extern void perfctr_irq(int irq, struct pt_regs *regs);
 extern void nmi_adjust_hz(unsigned int new_hz);
 
-extern int nmi_usable;
+extern atomic_t nmi_active;
 
 #endif /* __NMI_H */
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index 2c0cc72d295..d1614e8384a 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -34,10 +34,17 @@
 static int nmi_watchdog_active;
 static int panic_on_timeout;
 
-int nmi_usable;
-EXPORT_SYMBOL_GPL(nmi_usable);
+/* nmi_active:
+ * >0: the NMI watchdog is active, but can be disabled
+ * <0: the NMI watchdog has not been set up, and cannot be enabled
+ *  0: the NMI watchdog is disabled, but can be enabled
+ */
+atomic_t nmi_active = ATOMIC_INIT(0);		/* oprofile uses this */
+EXPORT_SYMBOL(nmi_active);
 
 static unsigned int nmi_hz = HZ;
+static DEFINE_PER_CPU(short, wd_enabled);
+static int endflag __initdata;
 
 static DEFINE_PER_CPU(unsigned int, last_irq_sum);
 static DEFINE_PER_CPU(local_t, alert_counter);
@@ -110,7 +117,7 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs)
 		__get_cpu_var(last_irq_sum) = sum;
 		local_set(&__get_cpu_var(alert_counter), 0);
 	}
-	if (nmi_usable) {
+	if (__get_cpu_var(wd_enabled)) {
 		write_pic(picl_value(nmi_hz));
 		pcr_ops->write(pcr_enable);
 	}
@@ -121,8 +128,6 @@ static inline unsigned int get_nmi_count(int cpu)
 	return cpu_data(cpu).__nmi_count;
 }
 
-static int endflag __initdata;
-
 static __init void nmi_cpu_busy(void *data)
 {
 	local_irq_enable_in_hardirq();
@@ -143,12 +148,15 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count)
 	printk(KERN_WARNING
 		"and attach the output of the 'dmesg' command.\n");
 
-	nmi_usable = 0;
+	per_cpu(wd_enabled, cpu) = 0;
+	atomic_dec(&nmi_active);
 }
 
-static void stop_watchdog(void *unused)
+static void stop_nmi_watchdog(void *unused)
 {
 	pcr_ops->write(PCR_PIC_PRIV);
+	__get_cpu_var(wd_enabled) = 0;
+	atomic_dec(&nmi_active);
 }
 
 static int __init check_nmi_watchdog(void)
@@ -156,6 +164,9 @@ static int __init check_nmi_watchdog(void)
 	unsigned int *prev_nmi_count;
 	int cpu, err;
 
+	if (!atomic_read(&nmi_active))
+		return 0;
+
 	prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(unsigned int), GFP_KERNEL);
 	if (!prev_nmi_count) {
 		err = -ENOMEM;
@@ -172,12 +183,15 @@ static int __init check_nmi_watchdog(void)
 	mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
 
 	for_each_online_cpu(cpu) {
+		if (!per_cpu(wd_enabled, cpu))
+			continue;
 		if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
 			report_broken_nmi(cpu, prev_nmi_count);
 	}
 	endflag = 1;
-	if (!nmi_usable) {
+	if (!atomic_read(&nmi_active)) {
 		kfree(prev_nmi_count);
+		atomic_set(&nmi_active, -1);
 		err = -ENODEV;
 		goto error;
 	}
@@ -188,12 +202,26 @@ static int __init check_nmi_watchdog(void)
 	kfree(prev_nmi_count);
 	return 0;
 error:
-	on_each_cpu(stop_watchdog, NULL, 1);
+	on_each_cpu(stop_nmi_watchdog, NULL, 1);
 	return err;
 }
 
-static void start_watchdog(void *unused)
+static void start_nmi_watchdog(void *unused)
+{
+	__get_cpu_var(wd_enabled) = 1;
+	atomic_inc(&nmi_active);
+
+	pcr_ops->write(PCR_PIC_PRIV);
+	write_pic(picl_value(nmi_hz));
+
+	pcr_ops->write(pcr_enable);
+}
+
+static void nmi_adjust_hz_one(void *unused)
 {
+	if (!__get_cpu_var(wd_enabled))
+		return;
+
 	pcr_ops->write(PCR_PIC_PRIV);
 	write_pic(picl_value(nmi_hz));
 
@@ -203,13 +231,13 @@ static void start_watchdog(void *unused)
 void nmi_adjust_hz(unsigned int new_hz)
 {
 	nmi_hz = new_hz;
-	on_each_cpu(start_watchdog, NULL, 1);
+	on_each_cpu(nmi_adjust_hz_one, NULL, 1);
 }
 EXPORT_SYMBOL_GPL(nmi_adjust_hz);
 
 static int nmi_shutdown(struct notifier_block *nb, unsigned long cmd, void *p)
 {
-	on_each_cpu(stop_watchdog, NULL, 1);
+	on_each_cpu(stop_nmi_watchdog, NULL, 1);
 	return 0;
 }
 
@@ -221,16 +249,14 @@ int __init nmi_init(void)
 {
 	int err;
 
-	nmi_usable = 1;
-
-	on_each_cpu(start_watchdog, NULL, 1);
+	on_each_cpu(start_nmi_watchdog, NULL, 1);
 
 	err = check_nmi_watchdog();
 	if (!err) {
 		err = register_reboot_notifier(&nmi_reboot_notifier);
 		if (err) {
-			nmi_usable = 0;
-			on_each_cpu(stop_watchdog, NULL, 1);
+			on_each_cpu(stop_nmi_watchdog, NULL, 1);
+			atomic_set(&nmi_active, -1);
 		}
 	}
 	return err;
diff --git a/arch/sparc/oprofile/init.c b/arch/sparc/oprofile/init.c
index d172f86439b..9ce34fd294c 100644
--- a/arch/sparc/oprofile/init.c
+++ b/arch/sparc/oprofile/init.c
@@ -57,7 +57,7 @@ static void timer_stop(void)
 
 static int op_nmi_timer_init(struct oprofile_operations *ops)
 {
-	if (!nmi_usable)
+	if (atomic_read(&nmi_active) <= 0)
 		return -ENODEV;
 
 	ops->start = timer_start;
-- 
cgit v1.2.3


From 4e85f5915dedb7c852c1e1189aa4133c87798a5e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Sep 2009 23:20:39 -0700
Subject: sparc64: Kill unnecessary cast in profile_timer_exceptions_notify().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/oprofile/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/sparc/oprofile/init.c b/arch/sparc/oprofile/init.c
index 9ce34fd294c..f97cb8b6ee5 100644
--- a/arch/sparc/oprofile/init.c
+++ b/arch/sparc/oprofile/init.c
@@ -21,7 +21,7 @@
 static int profile_timer_exceptions_notify(struct notifier_block *self,
 					   unsigned long val, void *data)
 {
-	struct die_args *args = (struct die_args *)data;
+	struct die_args *args = data;
 	int ret = NOTIFY_DONE;
 
 	switch (val) {
-- 
cgit v1.2.3


From d89be56b2153ccad7e8cd9e71b5d8b686632e8c2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Sep 2009 23:29:16 -0700
Subject: sparc64: Make touch_nmi_watchdog() actually work.

It guards it's actions on nmi_watchdog_active, but nothing ever
sets that and it's initial value is zero.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/nmi.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index d1614e8384a..47a465fe3c3 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -31,7 +31,6 @@
  * level 14 as our IRQ off level.
  */
 
-static int nmi_watchdog_active;
 static int panic_on_timeout;
 
 /* nmi_active:
@@ -52,7 +51,7 @@ static DEFINE_PER_CPU(int, nmi_touch);
 
 void touch_nmi_watchdog(void)
 {
-	if (nmi_watchdog_active) {
+	if (atomic_read(&nmi_active)) {
 		int cpu;
 
 		for_each_present_cpu(cpu) {
-- 
cgit v1.2.3


From 733e5e4b4eb1bc1e27acbe092200154051171426 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 9 Sep 2009 08:30:21 +0100
Subject: KEYS: Add missing linux/tracehook.h #inclusions

Add #inclusions of linux/tracehook.h to those arch files that had the tracehook
call for TIF_NOTIFY_RESUME added when support for that flag was added to that
arch.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 arch/alpha/kernel/signal.c  | 1 +
 arch/arm/kernel/signal.c    | 1 +
 arch/avr32/kernel/signal.c  | 1 +
 arch/cris/kernel/ptrace.c   | 1 +
 arch/h8300/kernel/signal.c  | 1 +
 arch/m32r/kernel/signal.c   | 1 +
 arch/mips/kernel/signal.c   | 1 +
 arch/parisc/kernel/signal.c | 1 +
 8 files changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index d91aaa74705..0932dbb1ef8 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -20,6 +20,7 @@
 #include <linux/binfmts.h>
 #include <linux/bitops.h>
 #include <linux/syscalls.h>
+#include <linux/tracehook.h>
 
 #include <asm/uaccess.h>
 #include <asm/sigcontext.h>
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index ea4ad3a43c8..b76fe06d92e 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -12,6 +12,7 @@
 #include <linux/personality.h>
 #include <linux/freezer.h>
 #include <linux/uaccess.h>
+#include <linux/tracehook.h>
 
 #include <asm/elf.h>
 #include <asm/cacheflush.h>
diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c
index de9f7fe2aef..64f886fac2e 100644
--- a/arch/avr32/kernel/signal.c
+++ b/arch/avr32/kernel/signal.c
@@ -16,6 +16,7 @@
 #include <linux/ptrace.h>
 #include <linux/unistd.h>
 #include <linux/freezer.h>
+#include <linux/tracehook.h>
 
 #include <asm/uaccess.h>
 #include <asm/ucontext.h>
diff --git a/arch/cris/kernel/ptrace.c b/arch/cris/kernel/ptrace.c
index 32e9d5ee895..48b0f391263 100644
--- a/arch/cris/kernel/ptrace.c
+++ b/arch/cris/kernel/ptrace.c
@@ -16,6 +16,7 @@
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/user.h>
+#include <linux/tracehook.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c
index abac3ee8c52..af842c369d2 100644
--- a/arch/h8300/kernel/signal.c
+++ b/arch/h8300/kernel/signal.c
@@ -39,6 +39,7 @@
 #include <linux/tty.h>
 #include <linux/binfmts.h>
 #include <linux/freezer.h>
+#include <linux/tracehook.h>
 
 #include <asm/setup.h>
 #include <asm/uaccess.h>
diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c
index f80bac17c65..144b0f124fc 100644
--- a/arch/m32r/kernel/signal.c
+++ b/arch/m32r/kernel/signal.c
@@ -21,6 +21,7 @@
 #include <linux/stddef.h>
 #include <linux/personality.h>
 #include <linux/freezer.h>
+#include <linux/tracehook.h>
 #include <asm/cacheflush.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index c2acf31874a..6254041b942 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -21,6 +21,7 @@
 #include <linux/compiler.h>
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
+#include <linux/tracehook.h>
 
 #include <asm/abi.h>
 #include <asm/asm.h>
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index 5ca1c02b805..8eb3c63c407 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -25,6 +25,7 @@
 #include <linux/stddef.h>
 #include <linux/compat.h>
 #include <linux/elf.h>
+#include <linux/tracehook.h>
 #include <asm/ucontext.h>
 #include <asm/rt_sigframe.h>
 #include <asm/uaccess.h>
-- 
cgit v1.2.3


From 577eebeae34d340685d8985dfdb7dfe337c511e8 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 27 Aug 2009 12:46:35 -0700
Subject: xen: make -fstack-protector work under Xen

-fstack-protector uses a special per-cpu "stack canary" value.
gcc generates special code in each function to test the canary to make
sure that the function's stack hasn't been overrun.

On x86-64, this is simply an offset of %gs, which is the usual per-cpu
base segment register, so setting it up simply requires loading %gs's
base as normal.

On i386, the stack protector segment is %gs (rather than the usual kernel
percpu %fs segment register).  This requires setting up the full kernel
GDT and then loading %gs accordingly.  We also need to make sure %gs is
initialized when bringing up secondary cpus too.

To keep things consistent, we do the full GDT/segment register setup on
both architectures.

Because we need to avoid -fstack-protected code before setting up the GDT
and because there's no way to disable it on a per-function basis, several
files need to have stack-protector inhibited.

[ Impact: allow Xen booting with stack-protector enabled ]

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/mm/Makefile     |   4 ++
 arch/x86/xen/Makefile    |   2 +
 arch/x86/xen/enlighten.c | 131 ++++++++++++++++++++++++++++++++++++++++-------
 arch/x86/xen/smp.c       |   1 +
 4 files changed, 119 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index eefdeee8a87..72bb3a26c73 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,6 +1,10 @@
 obj-y	:=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
 	    pat.o pgtable.o gup.o
 
+# Make sure __phys_addr has no stackprotector
+nostackp := $(call cc-option, -fno-stack-protector)
+CFLAGS_ioremap.o		:= $(nostackp)
+
 obj-$(CONFIG_SMP)		+= tlb.o
 
 obj-$(CONFIG_X86_32)		+= pgtable_32.o iomap_32.o
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 7410640db17..3bb4fc21f4f 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -8,6 +8,7 @@ endif
 # Make sure early boot has no stackprotector
 nostackp := $(call cc-option, -fno-stack-protector)
 CFLAGS_enlighten.o		:= $(nostackp)
+CFLAGS_mmu.o			:= $(nostackp)
 
 obj-y		:= enlighten.o setup.o multicalls.o mmu.o irq.o \
 			time.o xen-asm.o xen-asm_$(BITS).o \
@@ -16,3 +17,4 @@ obj-y		:= enlighten.o setup.o multicalls.o mmu.o irq.o \
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
 obj-$(CONFIG_XEN_DEBUG_FS)	+= debugfs.o
+
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index eb33aaa8415..76143136dea 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -51,6 +51,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/reboot.h>
+#include <asm/stackprotector.h>
 
 #include "xen-ops.h"
 #include "mmu.h"
@@ -330,18 +331,28 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
 	unsigned long frames[pages];
 	int f;
 
-	/* A GDT can be up to 64k in size, which corresponds to 8192
-	   8-byte entries, or 16 4k pages.. */
+	/*
+	 * A GDT can be up to 64k in size, which corresponds to 8192
+	 * 8-byte entries, or 16 4k pages..
+	 */
 
 	BUG_ON(size > 65536);
 	BUG_ON(va & ~PAGE_MASK);
 
 	for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
 		int level;
-		pte_t *ptep = lookup_address(va, &level);
+		pte_t *ptep;
 		unsigned long pfn, mfn;
 		void *virt;
 
+		/*
+		 * The GDT is per-cpu and is in the percpu data area.
+		 * That can be virtually mapped, so we need to do a
+		 * page-walk to get the underlying MFN for the
+		 * hypercall.  The page can also be in the kernel's
+		 * linear range, so we need to RO that mapping too.
+		 */
+		ptep = lookup_address(va, &level);
 		BUG_ON(ptep == NULL);
 
 		pfn = pte_pfn(*ptep);
@@ -358,6 +369,44 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
 		BUG();
 }
 
+/*
+ * load_gdt for early boot, when the gdt is only mapped once
+ */
+static __init void xen_load_gdt_boot(const struct desc_ptr *dtr)
+{
+	unsigned long va = dtr->address;
+	unsigned int size = dtr->size + 1;
+	unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+	unsigned long frames[pages];
+	int f;
+
+	/*
+	 * A GDT can be up to 64k in size, which corresponds to 8192
+	 * 8-byte entries, or 16 4k pages..
+	 */
+
+	BUG_ON(size > 65536);
+	BUG_ON(va & ~PAGE_MASK);
+
+	for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
+		pte_t pte;
+		unsigned long pfn, mfn;
+
+		pfn = virt_to_pfn(va);
+		mfn = pfn_to_mfn(pfn);
+
+		pte = pfn_pte(pfn, PAGE_KERNEL_RO);
+
+		if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
+			BUG();
+
+		frames[f] = mfn;
+	}
+
+	if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
+		BUG();
+}
+
 static void load_TLS_descriptor(struct thread_struct *t,
 				unsigned int cpu, unsigned int i)
 {
@@ -581,6 +630,29 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
 	preempt_enable();
 }
 
+/*
+ * Version of write_gdt_entry for use at early boot-time needed to
+ * update an entry as simply as possible.
+ */
+static __init void xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
+					    const void *desc, int type)
+{
+	switch (type) {
+	case DESC_LDT:
+	case DESC_TSS:
+		/* ignore */
+		break;
+
+	default: {
+		xmaddr_t maddr = virt_to_machine(&dt[entry]);
+
+		if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
+			dt[entry] = *(struct desc_struct *)desc;
+	}
+
+	}
+}
+
 static void xen_load_sp0(struct tss_struct *tss,
 			 struct thread_struct *thread)
 {
@@ -965,6 +1037,23 @@ static const struct machine_ops __initdata xen_machine_ops = {
 	.emergency_restart = xen_emergency_restart,
 };
 
+/*
+ * Set up the GDT and segment registers for -fstack-protector.  Until
+ * we do this, we have to be careful not to call any stack-protected
+ * function, which is most of the kernel.
+ */
+static void __init xen_setup_stackprotector(void)
+{
+	pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
+	pv_cpu_ops.load_gdt = xen_load_gdt_boot;
+
+	setup_stack_canary_segment(0);
+	switch_to_new_gdt(0);
+
+	pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry;
+	pv_cpu_ops.load_gdt = xen_load_gdt;
+}
+
 /* First C function to be called on Xen boot */
 asmlinkage void __init xen_start_kernel(void)
 {
@@ -983,13 +1072,28 @@ asmlinkage void __init xen_start_kernel(void)
 	pv_apic_ops = xen_apic_ops;
 	pv_mmu_ops = xen_mmu_ops;
 
-#ifdef CONFIG_X86_64
 	/*
-	 * Setup percpu state.  We only need to do this for 64-bit
-	 * because 32-bit already has %fs set properly.
+	 * Set up some pagetable state before starting to set any ptes.
 	 */
-	load_percpu_segment(0);
-#endif
+
+	/* Prevent unwanted bits from being set in PTEs. */
+	__supported_pte_mask &= ~_PAGE_GLOBAL;
+	if (!xen_initial_domain())
+		__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
+
+	__supported_pte_mask |= _PAGE_IOMAP;
+
+	xen_setup_features();
+
+	/* Get mfn list */
+	if (!xen_feature(XENFEAT_auto_translated_physmap))
+		xen_build_dynamic_phys_to_machine();
+
+	/*
+	 * Set up kernel GDT and segment registers, mainly so that
+	 * -fstack-protector code can be executed.
+	 */
+	xen_setup_stackprotector();
 
 	xen_init_irq_ops();
 	xen_init_cpuid_mask();
@@ -1001,8 +1105,6 @@ asmlinkage void __init xen_start_kernel(void)
 	set_xen_basic_apic_ops();
 #endif
 
-	xen_setup_features();
-
 	if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
 		pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
 		pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
@@ -1019,17 +1121,8 @@ asmlinkage void __init xen_start_kernel(void)
 
 	xen_smp_init();
 
-	/* Get mfn list */
-	if (!xen_feature(XENFEAT_auto_translated_physmap))
-		xen_build_dynamic_phys_to_machine();
-
 	pgd = (pgd_t *)xen_start_info->pt_base;
 
-	/* Prevent unwanted bits from being set in PTEs. */
-	__supported_pte_mask &= ~_PAGE_GLOBAL;
-	if (!xen_initial_domain())
-		__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
-
 #ifdef CONFIG_X86_64
 	/* Work out if we support NX */
 	check_efer();
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 429834ec168..fe03eeed7b4 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -236,6 +236,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 	ctxt->user_regs.ss = __KERNEL_DS;
 #ifdef CONFIG_X86_32
 	ctxt->user_regs.fs = __KERNEL_PERCPU;
+	ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
 #else
 	ctxt->gs_base_kernel = per_cpu_offset(cpu);
 #endif
-- 
cgit v1.2.3


From 4d576b57b50a92801e6493e76e5243d6cff193d2 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 9 Sep 2009 12:33:51 -0700
Subject: xen: only enable interrupts while actually blocking for spinlock

Where possible we enable interrupts while waiting for a spinlock to
become free, in order to reduce big latency spikes in interrupt handling.

However, at present if we manage to pick up the spinlock just before
blocking, we'll end up holding the lock with interrupts enabled for a
while.  This will cause a deadlock if we recieve an interrupt in that
window, and the interrupt handler tries to take the lock too.

Solve this by shrinking the interrupt-enabled region to just around the
blocking call.

[ Impact: avoid race/deadlock when using Xen PV spinlocks ]

Reported-by: "Yang, Xiaowei" <xiaowei.yang@intel.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/spinlock.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 5601506f2dd..2f91e565192 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -187,7 +187,6 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
 	struct xen_spinlock *prev;
 	int irq = __get_cpu_var(lock_kicker_irq);
 	int ret;
-	unsigned long flags;
 	u64 start;
 
 	/* If kicker interrupts not initialized yet, just spin */
@@ -199,16 +198,12 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
 	/* announce we're spinning */
 	prev = spinning_lock(xl);
 
-	flags = __raw_local_save_flags();
-	if (irq_enable) {
-		ADD_STATS(taken_slow_irqenable, 1);
-		raw_local_irq_enable();
-	}
-
 	ADD_STATS(taken_slow, 1);
 	ADD_STATS(taken_slow_nested, prev != NULL);
 
 	do {
+		unsigned long flags;
+
 		/* clear pending */
 		xen_clear_irq_pending(irq);
 
@@ -228,6 +223,12 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
 			goto out;
 		}
 
+		flags = __raw_local_save_flags();
+		if (irq_enable) {
+			ADD_STATS(taken_slow_irqenable, 1);
+			raw_local_irq_enable();
+		}
+
 		/*
 		 * Block until irq becomes pending.  If we're
 		 * interrupted at this point (after the trylock but
@@ -238,13 +239,15 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
 		 * pending.
 		 */
 		xen_poll_irq(irq);
+
+		raw_local_irq_restore(flags);
+
 		ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq));
 	} while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
 
 	kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
 
 out:
-	raw_local_irq_restore(flags);
 	unspinning_lock(xl, prev);
 	spin_time_accum_blocked(start);
 
-- 
cgit v1.2.3


From 2496afbf1e50c70f80992656bcb730c8583ddac3 Mon Sep 17 00:00:00 2001
From: Yang Xiaowei <xiaowei.yang@intel.com>
Date: Wed, 9 Sep 2009 12:44:52 -0700
Subject: xen: use stronger barrier after unlocking lock

We need to have a stronger barrier between releasing the lock and
checking for any waiting spinners.  A compiler barrier is not sufficient
because the CPU's ordering rules do not prevent the read xl->spinners
from happening before the unlock assignment, as they are different
memory locations.

We need to have an explicit barrier to enforce the write-read ordering
to different memory locations.

Because of it, I can't bring up > 4 HVM guests on one SMP machine.

[ Code and commit comments expanded -J ]

[ Impact: avoid deadlock when using Xen PV spinlocks ]

Signed-off-by: Yang Xiaowei <xiaowei.yang@intel.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/spinlock.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 2f91e565192..36a5141108d 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -326,8 +326,13 @@ static void xen_spin_unlock(struct raw_spinlock *lock)
 	smp_wmb();		/* make sure no writes get moved after unlock */
 	xl->lock = 0;		/* release lock */
 
-	/* make sure unlock happens before kick */
-	barrier();
+	/*
+	 * Make sure unlock happens before checking for waiting
+	 * spinners.  We need a strong barrier to enforce the
+	 * write-read ordering to different memory locations, as the
+	 * CPU makes no implied guarantees about their ordering.
+	 */
+	mb();
 
 	if (unlikely(xl->spinners))
 		xen_spin_unlock_slow(xl);
-- 
cgit v1.2.3


From 76c36d016a3f958d6231dcdef54a132614c3b578 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 10 Sep 2009 05:55:17 -0700
Subject: sparc64: Provide extern decls for sparc_??u_type strings.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/system_64.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/sparc/include/asm/system_64.h b/arch/sparc/include/asm/system_64.h
index 6c077816ab2..25e848f0cad 100644
--- a/arch/sparc/include/asm/system_64.h
+++ b/arch/sparc/include/asm/system_64.h
@@ -29,6 +29,10 @@ enum sparc_cpu {
 /* This cannot ever be a sun4c :) That's just history. */
 #define ARCH_SUN4C 0
 
+extern const char *sparc_cpu_type;
+extern const char *sparc_fpu_type;
+extern const char *sparc_pmu_type;
+
 extern char reboot_command[];
 
 /* These are here in an effort to more fully work around Spitfire Errata
-- 
cgit v1.2.3


From 2d0740c4562493b60f59ca9b0330a2d5e01d43ec Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 10 Sep 2009 05:56:16 -0700
Subject: sparc64: Use nmi_enter() and nmi_exit(), as needed.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/nmi.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index 47a465fe3c3..7de19dd30f4 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -84,6 +84,7 @@ static void die_nmi(const char *str, struct pt_regs *regs, int do_panic)
 	if (do_panic || panic_on_oops)
 		panic("Non maskable interrupt");
 
+	nmi_exit();
 	local_irq_enable();
 	do_exit(SIGBUS);
 }
@@ -98,6 +99,8 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs)
 
 	local_cpu_data().__nmi_count++;
 
+	nmi_enter();
+
 	if (notify_die(DIE_NMI, "nmi", regs, 0,
 		       pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP)
 		touched = 1;
@@ -120,6 +123,8 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs)
 		write_pic(picl_value(nmi_hz));
 		pcr_ops->write(pcr_enable);
 	}
+
+	nmi_exit();
 }
 
 static inline unsigned int get_nmi_count(int cpu)
-- 
cgit v1.2.3


From 5686f9c3d67d5a20108fa26105c98b042df13123 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 10 Sep 2009 05:59:24 -0700
Subject: sparc64: Implement a real set_perf_counter_pending().

When the perf counter subsystem needs to reschedule work out
from an NMI, it invokes set_perf_counter_pending().

This triggers a non-NMI irq which should invoke
perf_counter_do_pending().

Currently this won't trigger because sparc64 won't trigger
the perf counter subsystem from NMIs, but when the HW counter
support is added it will.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/perf_counter.h |  2 +-
 arch/sparc/kernel/pcr.c               | 14 +++++++++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/include/asm/perf_counter.h b/arch/sparc/include/asm/perf_counter.h
index f07c587a830..38d64454643 100644
--- a/arch/sparc/include/asm/perf_counter.h
+++ b/arch/sparc/include/asm/perf_counter.h
@@ -1,7 +1,7 @@
 #ifndef __ASM_SPARC_PERF_COUNTER_H
 #define __ASM_SPARC_PERF_COUNTER_H
 
-static inline void set_perf_counter_pending(void) { }
+extern void set_perf_counter_pending(void);
 
 #define	PERF_COUNTER_INDEX_OFFSET	0
 
diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c
index 1ae8cdd7e70..68ff0010707 100644
--- a/arch/sparc/kernel/pcr.c
+++ b/arch/sparc/kernel/pcr.c
@@ -7,6 +7,8 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 
+#include <linux/perf_counter.h>
+
 #include <asm/pil.h>
 #include <asm/pcr.h>
 #include <asm/nmi.h>
@@ -34,10 +36,20 @@ unsigned int picl_shift;
  */
 void deferred_pcr_work_irq(int irq, struct pt_regs *regs)
 {
+	struct pt_regs *old_regs;
+
 	clear_softint(1 << PIL_DEFERRED_PCR_WORK);
+
+	old_regs = set_irq_regs(regs);
+	irq_enter();
+#ifdef CONFIG_PERF_COUNTERS
+	perf_counter_do_pending();
+#endif
+	irq_exit();
+	set_irq_regs(old_regs);
 }
 
-void schedule_deferred_pcr_work(void)
+void set_perf_counter_pending(void)
 {
 	set_softint(1 << PIL_DEFERRED_PCR_WORK);
 }
-- 
cgit v1.2.3


From 59abbd1e7cfd6018fb8e58a96aa562aaff8711e7 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 10 Sep 2009 06:28:20 -0700
Subject: sparc64: Initial hw perf counter support.

Only supports one simple counter and only UltraSPARC-IIIi chips.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/nmi.h          |   3 +
 arch/sparc/include/asm/perf_counter.h |   6 +
 arch/sparc/kernel/Makefile            |   3 +
 arch/sparc/kernel/nmi.c               |   8 +-
 arch/sparc/kernel/perf_counter.c      | 520 ++++++++++++++++++++++++++++++++++
 5 files changed, 538 insertions(+), 2 deletions(-)
 create mode 100644 arch/sparc/kernel/perf_counter.c

(limited to 'arch')

diff --git a/arch/sparc/include/asm/nmi.h b/arch/sparc/include/asm/nmi.h
index c7d11e435df..72e6500e7ab 100644
--- a/arch/sparc/include/asm/nmi.h
+++ b/arch/sparc/include/asm/nmi.h
@@ -7,4 +7,7 @@ extern void nmi_adjust_hz(unsigned int new_hz);
 
 extern atomic_t nmi_active;
 
+extern void start_nmi_watchdog(void *unused);
+extern void stop_nmi_watchdog(void *unused);
+
 #endif /* __NMI_H */
diff --git a/arch/sparc/include/asm/perf_counter.h b/arch/sparc/include/asm/perf_counter.h
index 38d64454643..5d7a8ca0e49 100644
--- a/arch/sparc/include/asm/perf_counter.h
+++ b/arch/sparc/include/asm/perf_counter.h
@@ -5,4 +5,10 @@ extern void set_perf_counter_pending(void);
 
 #define	PERF_COUNTER_INDEX_OFFSET	0
 
+#ifdef CONFIG_PERF_COUNTERS
+extern void init_hw_perf_counters(void);
+#else
+static inline void init_hw_perf_counters(void)	{ }
+#endif
+
 #endif
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index b0041756c9f..f96dc5761f7 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -103,3 +103,6 @@ obj-$(CONFIG_SUN_LDOMS) += ldc.o vio.o viohs.o ds.o
 obj-$(CONFIG_AUDIT)     += audit.o
 audit--$(CONFIG_AUDIT)  := compat_audit.o
 obj-$(CONFIG_COMPAT)    += $(audit--y)
+
+pc--$(CONFIG_PERF_COUNTERS) := perf_counter.o
+obj-$(CONFIG_SPARC64)	+= $(pc--y)
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index 7de19dd30f4..391a6ed9a18 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -19,6 +19,7 @@
 #include <linux/delay.h>
 #include <linux/smp.h>
 
+#include <asm/perf_counter.h>
 #include <asm/ptrace.h>
 #include <asm/local.h>
 #include <asm/pcr.h>
@@ -156,7 +157,7 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count)
 	atomic_dec(&nmi_active);
 }
 
-static void stop_nmi_watchdog(void *unused)
+void stop_nmi_watchdog(void *unused)
 {
 	pcr_ops->write(PCR_PIC_PRIV);
 	__get_cpu_var(wd_enabled) = 0;
@@ -210,7 +211,7 @@ error:
 	return err;
 }
 
-static void start_nmi_watchdog(void *unused)
+void start_nmi_watchdog(void *unused)
 {
 	__get_cpu_var(wd_enabled) = 1;
 	atomic_inc(&nmi_active);
@@ -263,6 +264,9 @@ int __init nmi_init(void)
 			atomic_set(&nmi_active, -1);
 		}
 	}
+	if (!err)
+		init_hw_perf_counters();
+
 	return err;
 }
 
diff --git a/arch/sparc/kernel/perf_counter.c b/arch/sparc/kernel/perf_counter.c
new file mode 100644
index 00000000000..f2c781450d0
--- /dev/null
+++ b/arch/sparc/kernel/perf_counter.c
@@ -0,0 +1,520 @@
+/* Performance counter support for sparc64.
+ *
+ * Copyright (C) 2009 David S. Miller <davem@davemloft.net>
+ *
+ * This code is based almost entirely upon the x86 perf counter
+ * code, which is:
+ *
+ *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2009 Jaswinder Singh Rajput
+ *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ */
+
+#include <linux/perf_counter.h>
+#include <linux/kprobes.h>
+#include <linux/kernel.h>
+#include <linux/kdebug.h>
+#include <linux/mutex.h>
+
+#include <asm/cpudata.h>
+#include <asm/atomic.h>
+#include <asm/nmi.h>
+#include <asm/pcr.h>
+
+/* Sparc64 chips have two performance counters, 32-bits each, with
+ * overflow interrupts generated on transition from 0xffffffff to 0.
+ * The counters are accessed in one go using a 64-bit register.
+ *
+ * Both counters are controlled using a single control register.  The
+ * only way to stop all sampling is to clear all of the context (user,
+ * supervisor, hypervisor) sampling enable bits.  But these bits apply
+ * to both counters, thus the two counters can't be enabled/disabled
+ * individually.
+ *
+ * The control register has two event fields, one for each of the two
+ * counters.  It's thus nearly impossible to have one counter going
+ * while keeping the other one stopped.  Therefore it is possible to
+ * get overflow interrupts for counters not currently "in use" and
+ * that condition must be checked in the overflow interrupt handler.
+ *
+ * So we use a hack, in that we program inactive counters with the
+ * "sw_count0" and "sw_count1" events.  These count how many times
+ * the instruction "sethi %hi(0xfc000), %g0" is executed.  It's an
+ * unusual way to encode a NOP and therefore will not trigger in
+ * normal code.
+ */
+
+#define MAX_HWCOUNTERS			2
+#define MAX_PERIOD			((1UL << 32) - 1)
+
+#define PIC_UPPER_INDEX			0
+#define PIC_LOWER_INDEX			1
+
+#define PIC_UPPER_NOP			0x1c
+#define PIC_LOWER_NOP			0x14
+
+struct cpu_hw_counters {
+	struct perf_counter	*counters[MAX_HWCOUNTERS];
+	unsigned long		used_mask[BITS_TO_LONGS(MAX_HWCOUNTERS)];
+	unsigned long		active_mask[BITS_TO_LONGS(MAX_HWCOUNTERS)];
+	int enabled;
+};
+DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { .enabled = 1, };
+
+struct perf_event_map {
+	u16	encoding;
+	u8	pic_mask;
+#define PIC_NONE	0x00
+#define PIC_UPPER	0x01
+#define PIC_LOWER	0x02
+};
+
+struct sparc_pmu {
+	const struct perf_event_map	*(*event_map)(int);
+	int				max_events;
+	int				upper_shift;
+	int				lower_shift;
+	int				event_mask;
+};
+
+static const struct perf_event_map ultra3i_perfmon_event_map[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER },
+	[PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER },
+};
+
+static const struct perf_event_map *ultra3i_event_map(int event)
+{
+	return &ultra3i_perfmon_event_map[event];
+}
+
+static const struct sparc_pmu ultra3i_pmu = {
+	.event_map	= ultra3i_event_map,
+	.max_events	= ARRAY_SIZE(ultra3i_perfmon_event_map),
+	.upper_shift	= 11,
+	.lower_shift	= 4,
+	.event_mask	= 0x3f,
+};
+
+static const struct sparc_pmu *sparc_pmu __read_mostly;
+
+static u64 event_encoding(u64 event, int idx)
+{
+	if (idx == PIC_UPPER_INDEX)
+		event <<= sparc_pmu->upper_shift;
+	else
+		event <<= sparc_pmu->lower_shift;
+	return event;
+}
+
+static u64 mask_for_index(int idx)
+{
+	return event_encoding(sparc_pmu->event_mask, idx);
+}
+
+static u64 nop_for_index(int idx)
+{
+	return event_encoding(idx == PIC_UPPER_INDEX ?
+			      PIC_UPPER_NOP : PIC_LOWER_NOP, idx);
+}
+
+static inline void sparc_pmu_enable_counter(struct hw_perf_counter *hwc,
+					    int idx)
+{
+	u64 val, mask = mask_for_index(idx);
+
+	val = pcr_ops->read();
+	pcr_ops->write((val & ~mask) | hwc->config);
+}
+
+static inline void sparc_pmu_disable_counter(struct hw_perf_counter *hwc,
+					     int idx)
+{
+	u64 mask = mask_for_index(idx);
+	u64 nop = nop_for_index(idx);
+	u64 val = pcr_ops->read();
+
+	pcr_ops->write((val & ~mask) | nop);
+}
+
+void hw_perf_enable(void)
+{
+	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+	u64 val;
+	int i;
+
+	if (cpuc->enabled)
+		return;
+
+	cpuc->enabled = 1;
+	barrier();
+
+	val = pcr_ops->read();
+
+	for (i = 0; i < MAX_HWCOUNTERS; i++) {
+		struct perf_counter *cp = cpuc->counters[i];
+		struct hw_perf_counter *hwc;
+
+		if (!cp)
+			continue;
+		hwc = &cp->hw;
+		val |= hwc->config_base;
+	}
+
+	pcr_ops->write(val);
+}
+
+void hw_perf_disable(void)
+{
+	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+	u64 val;
+
+	if (!cpuc->enabled)
+		return;
+
+	cpuc->enabled = 0;
+
+	val = pcr_ops->read();
+	val &= ~(PCR_UTRACE | PCR_STRACE);
+	pcr_ops->write(val);
+}
+
+static u32 read_pmc(int idx)
+{
+	u64 val;
+
+	read_pic(val);
+	if (idx == PIC_UPPER_INDEX)
+		val >>= 32;
+
+	return val & 0xffffffff;
+}
+
+static void write_pmc(int idx, u64 val)
+{
+	u64 shift, mask, pic;
+
+	shift = 0;
+	if (idx == PIC_UPPER_INDEX)
+		shift = 32;
+
+	mask = ((u64) 0xffffffff) << shift;
+	val <<= shift;
+
+	read_pic(pic);
+	pic &= ~mask;
+	pic |= val;
+	write_pic(pic);
+}
+
+static int sparc_perf_counter_set_period(struct perf_counter *counter,
+					 struct hw_perf_counter *hwc, int idx)
+{
+	s64 left = atomic64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+	int ret = 0;
+
+	if (unlikely(left <= -period)) {
+		left = period;
+		atomic64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		atomic64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+	if (left > MAX_PERIOD)
+		left = MAX_PERIOD;
+
+	atomic64_set(&hwc->prev_count, (u64)-left);
+
+	write_pmc(idx, (u64)(-left) & 0xffffffff);
+
+	perf_counter_update_userpage(counter);
+
+	return ret;
+}
+
+static int sparc_pmu_enable(struct perf_counter *counter)
+{
+	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+	struct hw_perf_counter *hwc = &counter->hw;
+	int idx = hwc->idx;
+
+	if (test_and_set_bit(idx, cpuc->used_mask))
+		return -EAGAIN;
+
+	sparc_pmu_disable_counter(hwc, idx);
+
+	cpuc->counters[idx] = counter;
+	set_bit(idx, cpuc->active_mask);
+
+	sparc_perf_counter_set_period(counter, hwc, idx);
+	sparc_pmu_enable_counter(hwc, idx);
+	perf_counter_update_userpage(counter);
+	return 0;
+}
+
+static u64 sparc_perf_counter_update(struct perf_counter *counter,
+				     struct hw_perf_counter *hwc, int idx)
+{
+	int shift = 64 - 32;
+	u64 prev_raw_count, new_raw_count;
+	s64 delta;
+
+again:
+	prev_raw_count = atomic64_read(&hwc->prev_count);
+	new_raw_count = read_pmc(idx);
+
+	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+			     new_raw_count) != prev_raw_count)
+		goto again;
+
+	delta = (new_raw_count << shift) - (prev_raw_count << shift);
+	delta >>= shift;
+
+	atomic64_add(delta, &counter->count);
+	atomic64_sub(delta, &hwc->period_left);
+
+	return new_raw_count;
+}
+
+static void sparc_pmu_disable(struct perf_counter *counter)
+{
+	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+	struct hw_perf_counter *hwc = &counter->hw;
+	int idx = hwc->idx;
+
+	clear_bit(idx, cpuc->active_mask);
+	sparc_pmu_disable_counter(hwc, idx);
+
+	barrier();
+
+	sparc_perf_counter_update(counter, hwc, idx);
+	cpuc->counters[idx] = NULL;
+	clear_bit(idx, cpuc->used_mask);
+
+	perf_counter_update_userpage(counter);
+}
+
+static void sparc_pmu_read(struct perf_counter *counter)
+{
+	struct hw_perf_counter *hwc = &counter->hw;
+	sparc_perf_counter_update(counter, hwc, hwc->idx);
+}
+
+static void sparc_pmu_unthrottle(struct perf_counter *counter)
+{
+	struct hw_perf_counter *hwc = &counter->hw;
+	sparc_pmu_enable_counter(hwc, hwc->idx);
+}
+
+static atomic_t active_counters = ATOMIC_INIT(0);
+static DEFINE_MUTEX(pmc_grab_mutex);
+
+void perf_counter_grab_pmc(void)
+{
+	if (atomic_inc_not_zero(&active_counters))
+		return;
+
+	mutex_lock(&pmc_grab_mutex);
+	if (atomic_read(&active_counters) == 0) {
+		if (atomic_read(&nmi_active) > 0) {
+			on_each_cpu(stop_nmi_watchdog, NULL, 1);
+			BUG_ON(atomic_read(&nmi_active) != 0);
+		}
+		atomic_inc(&active_counters);
+	}
+	mutex_unlock(&pmc_grab_mutex);
+}
+
+void perf_counter_release_pmc(void)
+{
+	if (atomic_dec_and_mutex_lock(&active_counters, &pmc_grab_mutex)) {
+		if (atomic_read(&nmi_active) == 0)
+			on_each_cpu(start_nmi_watchdog, NULL, 1);
+		mutex_unlock(&pmc_grab_mutex);
+	}
+}
+
+static void hw_perf_counter_destroy(struct perf_counter *counter)
+{
+	perf_counter_release_pmc();
+}
+
+static int __hw_perf_counter_init(struct perf_counter *counter)
+{
+	struct perf_counter_attr *attr = &counter->attr;
+	struct hw_perf_counter *hwc = &counter->hw;
+	const struct perf_event_map *pmap;
+	u64 enc;
+
+	if (atomic_read(&nmi_active) < 0)
+		return -ENODEV;
+
+	if (attr->type != PERF_TYPE_HARDWARE)
+		return -EOPNOTSUPP;
+
+	if (attr->config >= sparc_pmu->max_events)
+		return -EINVAL;
+
+	perf_counter_grab_pmc();
+	counter->destroy = hw_perf_counter_destroy;
+
+	/* We save the enable bits in the config_base.  So to
+	 * turn off sampling just write 'config', and to enable
+	 * things write 'config | config_base'.
+	 */
+	hwc->config_base = 0;
+	if (!attr->exclude_user)
+		hwc->config_base |= PCR_UTRACE;
+	if (!attr->exclude_kernel)
+		hwc->config_base |= PCR_STRACE;
+
+	if (!hwc->sample_period) {
+		hwc->sample_period = MAX_PERIOD;
+		hwc->last_period = hwc->sample_period;
+		atomic64_set(&hwc->period_left, hwc->sample_period);
+	}
+
+	pmap = sparc_pmu->event_map(attr->config);
+
+	enc = pmap->encoding;
+	if (pmap->pic_mask & PIC_UPPER) {
+		hwc->idx = PIC_UPPER_INDEX;
+		enc <<= sparc_pmu->upper_shift;
+	} else {
+		hwc->idx = PIC_LOWER_INDEX;
+		enc <<= sparc_pmu->lower_shift;
+	}
+
+	hwc->config |= enc;
+	return 0;
+}
+
+static const struct pmu pmu = {
+	.enable		= sparc_pmu_enable,
+	.disable	= sparc_pmu_disable,
+	.read		= sparc_pmu_read,
+	.unthrottle	= sparc_pmu_unthrottle,
+};
+
+const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
+{
+	int err = __hw_perf_counter_init(counter);
+
+	if (err)
+		return ERR_PTR(err);
+	return &pmu;
+}
+
+void perf_counter_print_debug(void)
+{
+	unsigned long flags;
+	u64 pcr, pic;
+	int cpu;
+
+	if (!sparc_pmu)
+		return;
+
+	local_irq_save(flags);
+
+	cpu = smp_processor_id();
+
+	pcr = pcr_ops->read();
+	read_pic(pic);
+
+	pr_info("\n");
+	pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n",
+		cpu, pcr, pic);
+
+	local_irq_restore(flags);
+}
+
+static int __kprobes perf_counter_nmi_handler(struct notifier_block *self,
+					      unsigned long cmd, void *__args)
+{
+	struct die_args *args = __args;
+	struct perf_sample_data data;
+	struct cpu_hw_counters *cpuc;
+	struct pt_regs *regs;
+	int idx;
+
+	if (!atomic_read(&active_counters))
+		return NOTIFY_DONE;
+
+	switch (cmd) {
+	case DIE_NMI:
+		break;
+
+	default:
+		return NOTIFY_DONE;
+	}
+
+	regs = args->regs;
+
+	data.regs = regs;
+	data.addr = 0;
+
+	cpuc = &__get_cpu_var(cpu_hw_counters);
+	for (idx = 0; idx < MAX_HWCOUNTERS; idx++) {
+		struct perf_counter *counter = cpuc->counters[idx];
+		struct hw_perf_counter *hwc;
+		u64 val;
+
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+		hwc = &counter->hw;
+		val = sparc_perf_counter_update(counter, hwc, idx);
+		if (val & (1ULL << 31))
+			continue;
+
+		data.period = counter->hw.last_period;
+		if (!sparc_perf_counter_set_period(counter, hwc, idx))
+			continue;
+
+		if (perf_counter_overflow(counter, 1, &data))
+			sparc_pmu_disable_counter(hwc, idx);
+	}
+
+	return NOTIFY_STOP;
+}
+
+static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
+	.notifier_call		= perf_counter_nmi_handler,
+};
+
+static bool __init supported_pmu(void)
+{
+	if (!strcmp(sparc_pmu_type, "ultra3i")) {
+		sparc_pmu = &ultra3i_pmu;
+		return true;
+	}
+	return false;
+}
+
+void __init init_hw_perf_counters(void)
+{
+	pr_info("Performance counters: ");
+
+	if (!supported_pmu()) {
+		pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
+		return;
+	}
+
+	pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
+
+	/* All sparc64 PMUs currently have 2 counters.  But this simple
+	 * driver only supports one active counter at a time.
+	 */
+	perf_max_counters = 1;
+
+	register_die_notifier(&perf_counter_nmi_notifier);
+}
-- 
cgit v1.2.3


From 91b9286d819b821fd742c0053fe0748818374198 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 10 Sep 2009 07:09:06 -0700
Subject: sparc64: Provide hypervisor tracing bit support for perf counters.

A PMU need only specify which bit in the PCR enabled hypervisor
tracing in order to enable this.

This will be used in Niagara-2 perf counter support.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/perf_counter.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/perf_counter.c b/arch/sparc/kernel/perf_counter.c
index f2c781450d0..d86009fa6f8 100644
--- a/arch/sparc/kernel/perf_counter.c
+++ b/arch/sparc/kernel/perf_counter.c
@@ -77,6 +77,7 @@ struct sparc_pmu {
 	int				upper_shift;
 	int				lower_shift;
 	int				event_mask;
+	int				hv_bit;
 };
 
 static const struct perf_event_map ultra3i_perfmon_event_map[] = {
@@ -178,7 +179,7 @@ void hw_perf_disable(void)
 	cpuc->enabled = 0;
 
 	val = pcr_ops->read();
-	val &= ~(PCR_UTRACE | PCR_STRACE);
+	val &= ~(PCR_UTRACE | PCR_STRACE | sparc_pmu->hv_bit);
 	pcr_ops->write(val);
 }
 
@@ -377,6 +378,8 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 		hwc->config_base |= PCR_UTRACE;
 	if (!attr->exclude_kernel)
 		hwc->config_base |= PCR_STRACE;
+	if (!attr->exclude_hv)
+		hwc->config_base |= sparc_pmu->hv_bit;
 
 	if (!hwc->sample_period) {
 		hwc->sample_period = MAX_PERIOD;
-- 
cgit v1.2.3


From 496c07e3b43475124d7f2d77fafbc1f5055abfee Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 10 Sep 2009 07:10:59 -0700
Subject: sparc64: Provide a way to specify a perf counter overflow IRQ enable
 bit.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/perf_counter.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/perf_counter.c b/arch/sparc/kernel/perf_counter.c
index d86009fa6f8..0cd5487b341 100644
--- a/arch/sparc/kernel/perf_counter.c
+++ b/arch/sparc/kernel/perf_counter.c
@@ -78,6 +78,7 @@ struct sparc_pmu {
 	int				lower_shift;
 	int				event_mask;
 	int				hv_bit;
+	int				irq_bit;
 };
 
 static const struct perf_event_map ultra3i_perfmon_event_map[] = {
@@ -179,7 +180,8 @@ void hw_perf_disable(void)
 	cpuc->enabled = 0;
 
 	val = pcr_ops->read();
-	val &= ~(PCR_UTRACE | PCR_STRACE | sparc_pmu->hv_bit);
+	val &= ~(PCR_UTRACE | PCR_STRACE |
+		 sparc_pmu->hv_bit | sparc_pmu->irq_bit);
 	pcr_ops->write(val);
 }
 
@@ -373,7 +375,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	 * turn off sampling just write 'config', and to enable
 	 * things write 'config | config_base'.
 	 */
-	hwc->config_base = 0;
+	hwc->config_base = sparc_pmu->irq_bit;
 	if (!attr->exclude_user)
 		hwc->config_base |= PCR_UTRACE;
 	if (!attr->exclude_kernel)
-- 
cgit v1.2.3


From 660d13765f342ecb6e774fcbc35b64beb0a069da Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 10 Sep 2009 07:13:26 -0700
Subject: sparc64: Perf counter 'nop' event is not constant.

On Niagara-2, for example, it's going to be different.  So make
it something specified in sparc_pmu.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/perf_counter.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/perf_counter.c b/arch/sparc/kernel/perf_counter.c
index 0cd5487b341..f3b8ccca3c9 100644
--- a/arch/sparc/kernel/perf_counter.c
+++ b/arch/sparc/kernel/perf_counter.c
@@ -52,9 +52,6 @@
 #define PIC_UPPER_INDEX			0
 #define PIC_LOWER_INDEX			1
 
-#define PIC_UPPER_NOP			0x1c
-#define PIC_LOWER_NOP			0x14
-
 struct cpu_hw_counters {
 	struct perf_counter	*counters[MAX_HWCOUNTERS];
 	unsigned long		used_mask[BITS_TO_LONGS(MAX_HWCOUNTERS)];
@@ -79,6 +76,8 @@ struct sparc_pmu {
 	int				event_mask;
 	int				hv_bit;
 	int				irq_bit;
+	int				upper_nop;
+	int				lower_nop;
 };
 
 static const struct perf_event_map ultra3i_perfmon_event_map[] = {
@@ -99,6 +98,8 @@ static const struct sparc_pmu ultra3i_pmu = {
 	.upper_shift	= 11,
 	.lower_shift	= 4,
 	.event_mask	= 0x3f,
+	.upper_nop	= 0x1c,
+	.lower_nop	= 0x14,
 };
 
 static const struct sparc_pmu *sparc_pmu __read_mostly;
@@ -120,7 +121,8 @@ static u64 mask_for_index(int idx)
 static u64 nop_for_index(int idx)
 {
 	return event_encoding(idx == PIC_UPPER_INDEX ?
-			      PIC_UPPER_NOP : PIC_LOWER_NOP, idx);
+			      sparc_pmu->upper_nop :
+			      sparc_pmu->lower_nop, idx);
 }
 
 static inline void sparc_pmu_enable_counter(struct hw_perf_counter *hwc,
-- 
cgit v1.2.3


From b73d884756303316ead4cd7dad51236b2a515a1a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 10 Sep 2009 07:22:18 -0700
Subject: sparc64: Initial niagara2 perf counter support.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/perf_counter.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'arch')

diff --git a/arch/sparc/kernel/perf_counter.c b/arch/sparc/kernel/perf_counter.c
index f3b8ccca3c9..09de4035eaa 100644
--- a/arch/sparc/kernel/perf_counter.c
+++ b/arch/sparc/kernel/perf_counter.c
@@ -102,6 +102,32 @@ static const struct sparc_pmu ultra3i_pmu = {
 	.lower_nop	= 0x14,
 };
 
+static const struct perf_event_map niagara2_perfmon_event_map[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER },
+};
+
+static const struct perf_event_map *niagara2_event_map(int event)
+{
+	return &niagara2_perfmon_event_map[event];
+}
+
+static const struct sparc_pmu niagara2_pmu = {
+	.event_map	= niagara2_event_map,
+	.max_events	= ARRAY_SIZE(niagara2_perfmon_event_map),
+	.upper_shift	= 19,
+	.lower_shift	= 6,
+	.event_mask	= 0xfff,
+	.hv_bit		= 0x8,
+	.irq_bit	= 0x03,
+	.upper_nop	= 0x220,
+	.lower_nop	= 0x220,
+};
+
 static const struct sparc_pmu *sparc_pmu __read_mostly;
 
 static u64 event_encoding(u64 event, int idx)
@@ -504,6 +530,10 @@ static bool __init supported_pmu(void)
 		sparc_pmu = &ultra3i_pmu;
 		return true;
 	}
+	if (!strcmp(sparc_pmu_type, "niagara2")) {
+		sparc_pmu = &niagara2_pmu;
+		return true;
+	}
 	return false;
 }
 
-- 
cgit v1.2.3


From 78c86e5e5691fc84d5fbea0cd4ac7147e87b7490 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 10 Sep 2009 10:09:38 -0700
Subject: x86: split __phys_addr out into separate file

Split __phys_addr out into its own file so we can disable
-fstack-protector in a fine-grained fashion.  Also it doesn't
have terribly much to do with the rest of ioremap.c.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/mm/Makefile   |  4 +--
 arch/x86/mm/ioremap.c  | 72 +-------------------------------------------------
 arch/x86/mm/physaddr.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/mm/physaddr.h | 10 +++++++
 4 files changed, 83 insertions(+), 73 deletions(-)
 create mode 100644 arch/x86/mm/physaddr.c
 create mode 100644 arch/x86/mm/physaddr.h

(limited to 'arch')

diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 72bb3a26c73..9b5a9f59a47 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,9 +1,9 @@
 obj-y	:=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
-	    pat.o pgtable.o gup.o
+	    pat.o pgtable.o physaddr.o gup.o
 
 # Make sure __phys_addr has no stackprotector
 nostackp := $(call cc-option, -fno-stack-protector)
-CFLAGS_ioremap.o		:= $(nostackp)
+CFLAGS_physaddr.o		:= $(nostackp)
 
 obj-$(CONFIG_SMP)		+= tlb.o
 
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 8a450930834..04e1ad60c63 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -22,77 +22,7 @@
 #include <asm/pgalloc.h>
 #include <asm/pat.h>
 
-static inline int phys_addr_valid(resource_size_t addr)
-{
-#ifdef CONFIG_PHYS_ADDR_T_64BIT
-	return !(addr >> boot_cpu_data.x86_phys_bits);
-#else
-	return 1;
-#endif
-}
-
-#ifdef CONFIG_X86_64
-
-unsigned long __phys_addr(unsigned long x)
-{
-	if (x >= __START_KERNEL_map) {
-		x -= __START_KERNEL_map;
-		VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE);
-		x += phys_base;
-	} else {
-		VIRTUAL_BUG_ON(x < PAGE_OFFSET);
-		x -= PAGE_OFFSET;
-		VIRTUAL_BUG_ON(!phys_addr_valid(x));
-	}
-	return x;
-}
-EXPORT_SYMBOL(__phys_addr);
-
-bool __virt_addr_valid(unsigned long x)
-{
-	if (x >= __START_KERNEL_map) {
-		x -= __START_KERNEL_map;
-		if (x >= KERNEL_IMAGE_SIZE)
-			return false;
-		x += phys_base;
-	} else {
-		if (x < PAGE_OFFSET)
-			return false;
-		x -= PAGE_OFFSET;
-		if (!phys_addr_valid(x))
-			return false;
-	}
-
-	return pfn_valid(x >> PAGE_SHIFT);
-}
-EXPORT_SYMBOL(__virt_addr_valid);
-
-#else
-
-#ifdef CONFIG_DEBUG_VIRTUAL
-unsigned long __phys_addr(unsigned long x)
-{
-	/* VMALLOC_* aren't constants  */
-	VIRTUAL_BUG_ON(x < PAGE_OFFSET);
-	VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x));
-	return x - PAGE_OFFSET;
-}
-EXPORT_SYMBOL(__phys_addr);
-#endif
-
-bool __virt_addr_valid(unsigned long x)
-{
-	if (x < PAGE_OFFSET)
-		return false;
-	if (__vmalloc_start_set && is_vmalloc_addr((void *) x))
-		return false;
-	if (x >= FIXADDR_START)
-		return false;
-	return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT);
-}
-EXPORT_SYMBOL(__virt_addr_valid);
-
-#endif
+#include "physaddr.h"
 
 int page_is_ram(unsigned long pagenr)
 {
diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c
new file mode 100644
index 00000000000..d2e2735327b
--- /dev/null
+++ b/arch/x86/mm/physaddr.c
@@ -0,0 +1,70 @@
+#include <linux/mmdebug.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+
+#include <asm/page.h>
+
+#include "physaddr.h"
+
+#ifdef CONFIG_X86_64
+
+unsigned long __phys_addr(unsigned long x)
+{
+	if (x >= __START_KERNEL_map) {
+		x -= __START_KERNEL_map;
+		VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE);
+		x += phys_base;
+	} else {
+		VIRTUAL_BUG_ON(x < PAGE_OFFSET);
+		x -= PAGE_OFFSET;
+		VIRTUAL_BUG_ON(!phys_addr_valid(x));
+	}
+	return x;
+}
+EXPORT_SYMBOL(__phys_addr);
+
+bool __virt_addr_valid(unsigned long x)
+{
+	if (x >= __START_KERNEL_map) {
+		x -= __START_KERNEL_map;
+		if (x >= KERNEL_IMAGE_SIZE)
+			return false;
+		x += phys_base;
+	} else {
+		if (x < PAGE_OFFSET)
+			return false;
+		x -= PAGE_OFFSET;
+		if (!phys_addr_valid(x))
+			return false;
+	}
+
+	return pfn_valid(x >> PAGE_SHIFT);
+}
+EXPORT_SYMBOL(__virt_addr_valid);
+
+#else
+
+#ifdef CONFIG_DEBUG_VIRTUAL
+unsigned long __phys_addr(unsigned long x)
+{
+	/* VMALLOC_* aren't constants  */
+	VIRTUAL_BUG_ON(x < PAGE_OFFSET);
+	VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x));
+	return x - PAGE_OFFSET;
+}
+EXPORT_SYMBOL(__phys_addr);
+#endif
+
+bool __virt_addr_valid(unsigned long x)
+{
+	if (x < PAGE_OFFSET)
+		return false;
+	if (__vmalloc_start_set && is_vmalloc_addr((void *) x))
+		return false;
+	if (x >= FIXADDR_START)
+		return false;
+	return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT);
+}
+EXPORT_SYMBOL(__virt_addr_valid);
+
+#endif	/* CONFIG_X86_64 */
diff --git a/arch/x86/mm/physaddr.h b/arch/x86/mm/physaddr.h
new file mode 100644
index 00000000000..a3cd5a0c97b
--- /dev/null
+++ b/arch/x86/mm/physaddr.h
@@ -0,0 +1,10 @@
+#include <asm/processor.h>
+
+static inline int phys_addr_valid(resource_size_t addr)
+{
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+	return !(addr >> boot_cpu_data.x86_phys_bits);
+#else
+	return 1;
+#endif
+}
-- 
cgit v1.2.3


From 5367b6887e7d8c870a5da7d9b8c6e9c207684e43 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Thu, 10 Sep 2009 02:53:50 +0100
Subject: x86: Fix code patching for paravirt-alternatives on 486

As reported in <http://bugs.debian.org/511703> and
<http://bugs.debian.org/515982>, kernels with paravirt-alternatives
enabled crash in text_poke_early() on at least some 486-class
processors.

The problem is that text_poke_early() itself uses inline functions
affected by paravirt-alternatives and so will modify instructions that
have already been prefetched.  Pentium and later processors will
invalidate the prefetched instructions in this case, but 486-class
processors do not.

Change sync_core() to limit prefetching on 486-class (and 386-class)
processors, and move the call to sync_core() above the call to the
modifiable local_irq_restore().

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
LKML-Reference: <1252547631.3423.134.camel@localhost>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/processor.h | 16 +++++++++++++---
 arch/x86/kernel/alternative.c    |  2 +-
 2 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c7768269b1c..2db56c57a28 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -703,13 +703,23 @@ static inline void cpu_relax(void)
 	rep_nop();
 }
 
-/* Stop speculative execution: */
+/* Stop speculative execution and prefetching of modified code. */
 static inline void sync_core(void)
 {
 	int tmp;
 
-	asm volatile("cpuid" : "=a" (tmp) : "0" (1)
-		     : "ebx", "ecx", "edx", "memory");
+#if defined(CONFIG_M386) || defined(CONFIG_M486)
+	if (boot_cpu_data.x86 < 5)
+		/* There is no speculative execution.
+		 * jmp is a barrier to prefetching. */
+		asm volatile("jmp 1f\n1:\n" ::: "memory");
+	else
+#endif
+		/* cpuid is a barrier to speculative execution.
+		 * Prefetched instructions are automatically
+		 * invalidated when modified. */
+		asm volatile("cpuid" : "=a" (tmp) : "0" (1)
+			     : "ebx", "ecx", "edx", "memory");
 }
 
 static inline void __monitor(const void *eax, unsigned long ecx,
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index f5765870257..b8ebd0b689b 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -490,8 +490,8 @@ void *text_poke_early(void *addr, const void *opcode, size_t len)
 	unsigned long flags;
 	local_irq_save(flags);
 	memcpy(addr, opcode, len);
-	local_irq_restore(flags);
 	sync_core();
+	local_irq_restore(flags);
 	/* Could also do a CLFLUSH here to speed up CPU recovery; but
 	   that causes hangs on some VIA CPUs. */
 	return addr;
-- 
cgit v1.2.3


From 62733e5a5a480a893e56fa6133ae90904d857bc4 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 11 Sep 2009 10:28:15 +0200
Subject: [S390] cio: move scsw helper functions to header file

All scsw helper functions are very short and usage of them shouldn't
result in function calls. Therefore we move them to a separate header
file.
Also saves a lot of EXPORT_SYMBOLs.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/chsc.h |  28 ++
 arch/s390/include/asm/cio.h  | 223 +---------
 arch/s390/include/asm/scsw.h | 956 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 985 insertions(+), 222 deletions(-)
 create mode 100644 arch/s390/include/asm/scsw.h

(limited to 'arch')

diff --git a/arch/s390/include/asm/chsc.h b/arch/s390/include/asm/chsc.h
index 807997f7414..4943654ed7f 100644
--- a/arch/s390/include/asm/chsc.h
+++ b/arch/s390/include/asm/chsc.h
@@ -125,4 +125,32 @@ struct chsc_cpd_info {
 #define CHSC_INFO_CPD _IOWR(CHSC_IOCTL_MAGIC, 0x87, struct chsc_cpd_info)
 #define CHSC_INFO_DCAL _IOWR(CHSC_IOCTL_MAGIC, 0x88, struct chsc_dcal)
 
+#ifdef __KERNEL__
+
+struct css_general_char {
+	u64 : 12;
+	u32 dynio : 1;	 /* bit 12 */
+	u32 : 28;
+	u32 aif : 1;	 /* bit 41 */
+	u32 : 3;
+	u32 mcss : 1;	 /* bit 45 */
+	u32 fcs : 1;	 /* bit 46 */
+	u32 : 1;
+	u32 ext_mb : 1;  /* bit 48 */
+	u32 : 7;
+	u32 aif_tdd : 1; /* bit 56 */
+	u32 : 1;
+	u32 qebsm : 1;	 /* bit 58 */
+	u32 : 8;
+	u32 aif_osa : 1; /* bit 67 */
+	u32 : 14;
+	u32 cib : 1;	 /* bit 82 */
+	u32 : 5;
+	u32 fcx : 1;	 /* bit 88 */
+	u32 : 7;
+}__attribute__((packed));
+
+extern struct css_general_char css_general_characteristics;
+
+#endif /* __KERNEL__ */
 #endif
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index 619bf94b11f..e85679af54d 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -15,228 +15,7 @@
 #define LPM_ANYPATH 0xff
 #define __MAX_CSSID 0
 
-/**
- * struct cmd_scsw - command-mode subchannel status word
- * @key: subchannel key
- * @sctl: suspend control
- * @eswf: esw format
- * @cc: deferred condition code
- * @fmt: format
- * @pfch: prefetch
- * @isic: initial-status interruption control
- * @alcc: address-limit checking control
- * @ssi: suppress-suspended interruption
- * @zcc: zero condition code
- * @ectl: extended control
- * @pno: path not operational
- * @res: reserved
- * @fctl: function control
- * @actl: activity control
- * @stctl: status control
- * @cpa: channel program address
- * @dstat: device status
- * @cstat: subchannel status
- * @count: residual count
- */
-struct cmd_scsw {
-	__u32 key  : 4;
-	__u32 sctl : 1;
-	__u32 eswf : 1;
-	__u32 cc   : 2;
-	__u32 fmt  : 1;
-	__u32 pfch : 1;
-	__u32 isic : 1;
-	__u32 alcc : 1;
-	__u32 ssi  : 1;
-	__u32 zcc  : 1;
-	__u32 ectl : 1;
-	__u32 pno  : 1;
-	__u32 res  : 1;
-	__u32 fctl : 3;
-	__u32 actl : 7;
-	__u32 stctl : 5;
-	__u32 cpa;
-	__u32 dstat : 8;
-	__u32 cstat : 8;
-	__u32 count : 16;
-} __attribute__ ((packed));
-
-/**
- * struct tm_scsw - transport-mode subchannel status word
- * @key: subchannel key
- * @eswf: esw format
- * @cc: deferred condition code
- * @fmt: format
- * @x: IRB-format control
- * @q: interrogate-complete
- * @ectl: extended control
- * @pno: path not operational
- * @fctl: function control
- * @actl: activity control
- * @stctl: status control
- * @tcw: TCW address
- * @dstat: device status
- * @cstat: subchannel status
- * @fcxs: FCX status
- * @schxs: subchannel-extended status
- */
-struct tm_scsw {
-	u32 key:4;
-	u32 :1;
-	u32 eswf:1;
-	u32 cc:2;
-	u32 fmt:3;
-	u32 x:1;
-	u32 q:1;
-	u32 :1;
-	u32 ectl:1;
-	u32 pno:1;
-	u32 :1;
-	u32 fctl:3;
-	u32 actl:7;
-	u32 stctl:5;
-	u32 tcw;
-	u32 dstat:8;
-	u32 cstat:8;
-	u32 fcxs:8;
-	u32 schxs:8;
-} __attribute__ ((packed));
-
-/**
- * union scsw - subchannel status word
- * @cmd: command-mode SCSW
- * @tm: transport-mode SCSW
- */
-union scsw {
-	struct cmd_scsw cmd;
-	struct tm_scsw tm;
-} __attribute__ ((packed));
-
-int scsw_is_tm(union scsw *scsw);
-u32 scsw_key(union scsw *scsw);
-u32 scsw_eswf(union scsw *scsw);
-u32 scsw_cc(union scsw *scsw);
-u32 scsw_ectl(union scsw *scsw);
-u32 scsw_pno(union scsw *scsw);
-u32 scsw_fctl(union scsw *scsw);
-u32 scsw_actl(union scsw *scsw);
-u32 scsw_stctl(union scsw *scsw);
-u32 scsw_dstat(union scsw *scsw);
-u32 scsw_cstat(union scsw *scsw);
-int scsw_is_solicited(union scsw *scsw);
-int scsw_is_valid_key(union scsw *scsw);
-int scsw_is_valid_eswf(union scsw *scsw);
-int scsw_is_valid_cc(union scsw *scsw);
-int scsw_is_valid_ectl(union scsw *scsw);
-int scsw_is_valid_pno(union scsw *scsw);
-int scsw_is_valid_fctl(union scsw *scsw);
-int scsw_is_valid_actl(union scsw *scsw);
-int scsw_is_valid_stctl(union scsw *scsw);
-int scsw_is_valid_dstat(union scsw *scsw);
-int scsw_is_valid_cstat(union scsw *scsw);
-int scsw_cmd_is_valid_key(union scsw *scsw);
-int scsw_cmd_is_valid_sctl(union scsw *scsw);
-int scsw_cmd_is_valid_eswf(union scsw *scsw);
-int scsw_cmd_is_valid_cc(union scsw *scsw);
-int scsw_cmd_is_valid_fmt(union scsw *scsw);
-int scsw_cmd_is_valid_pfch(union scsw *scsw);
-int scsw_cmd_is_valid_isic(union scsw *scsw);
-int scsw_cmd_is_valid_alcc(union scsw *scsw);
-int scsw_cmd_is_valid_ssi(union scsw *scsw);
-int scsw_cmd_is_valid_zcc(union scsw *scsw);
-int scsw_cmd_is_valid_ectl(union scsw *scsw);
-int scsw_cmd_is_valid_pno(union scsw *scsw);
-int scsw_cmd_is_valid_fctl(union scsw *scsw);
-int scsw_cmd_is_valid_actl(union scsw *scsw);
-int scsw_cmd_is_valid_stctl(union scsw *scsw);
-int scsw_cmd_is_valid_dstat(union scsw *scsw);
-int scsw_cmd_is_valid_cstat(union scsw *scsw);
-int scsw_cmd_is_solicited(union scsw *scsw);
-int scsw_tm_is_valid_key(union scsw *scsw);
-int scsw_tm_is_valid_eswf(union scsw *scsw);
-int scsw_tm_is_valid_cc(union scsw *scsw);
-int scsw_tm_is_valid_fmt(union scsw *scsw);
-int scsw_tm_is_valid_x(union scsw *scsw);
-int scsw_tm_is_valid_q(union scsw *scsw);
-int scsw_tm_is_valid_ectl(union scsw *scsw);
-int scsw_tm_is_valid_pno(union scsw *scsw);
-int scsw_tm_is_valid_fctl(union scsw *scsw);
-int scsw_tm_is_valid_actl(union scsw *scsw);
-int scsw_tm_is_valid_stctl(union scsw *scsw);
-int scsw_tm_is_valid_dstat(union scsw *scsw);
-int scsw_tm_is_valid_cstat(union scsw *scsw);
-int scsw_tm_is_valid_fcxs(union scsw *scsw);
-int scsw_tm_is_valid_schxs(union scsw *scsw);
-int scsw_tm_is_solicited(union scsw *scsw);
-
-#define SCSW_FCTL_CLEAR_FUNC	 0x1
-#define SCSW_FCTL_HALT_FUNC	 0x2
-#define SCSW_FCTL_START_FUNC	 0x4
-
-#define SCSW_ACTL_SUSPENDED	 0x1
-#define SCSW_ACTL_DEVACT	 0x2
-#define SCSW_ACTL_SCHACT	 0x4
-#define SCSW_ACTL_CLEAR_PEND	 0x8
-#define SCSW_ACTL_HALT_PEND	 0x10
-#define SCSW_ACTL_START_PEND	 0x20
-#define SCSW_ACTL_RESUME_PEND	 0x40
-
-#define SCSW_STCTL_STATUS_PEND	 0x1
-#define SCSW_STCTL_SEC_STATUS	 0x2
-#define SCSW_STCTL_PRIM_STATUS	 0x4
-#define SCSW_STCTL_INTER_STATUS	 0x8
-#define SCSW_STCTL_ALERT_STATUS	 0x10
-
-#define DEV_STAT_ATTENTION	 0x80
-#define DEV_STAT_STAT_MOD	 0x40
-#define DEV_STAT_CU_END		 0x20
-#define DEV_STAT_BUSY		 0x10
-#define DEV_STAT_CHN_END	 0x08
-#define DEV_STAT_DEV_END	 0x04
-#define DEV_STAT_UNIT_CHECK	 0x02
-#define DEV_STAT_UNIT_EXCEP	 0x01
-
-#define SCHN_STAT_PCI		 0x80
-#define SCHN_STAT_INCORR_LEN	 0x40
-#define SCHN_STAT_PROG_CHECK	 0x20
-#define SCHN_STAT_PROT_CHECK	 0x10
-#define SCHN_STAT_CHN_DATA_CHK	 0x08
-#define SCHN_STAT_CHN_CTRL_CHK	 0x04
-#define SCHN_STAT_INTF_CTRL_CHK	 0x02
-#define SCHN_STAT_CHAIN_CHECK	 0x01
-
-/*
- * architectured values for first sense byte
- */
-#define SNS0_CMD_REJECT		0x80
-#define SNS_CMD_REJECT		SNS0_CMD_REJEC
-#define SNS0_INTERVENTION_REQ	0x40
-#define SNS0_BUS_OUT_CHECK	0x20
-#define SNS0_EQUIPMENT_CHECK	0x10
-#define SNS0_DATA_CHECK		0x08
-#define SNS0_OVERRUN		0x04
-#define SNS0_INCOMPL_DOMAIN	0x01
-
-/*
- * architectured values for second sense byte
- */
-#define SNS1_PERM_ERR		0x80
-#define SNS1_INV_TRACK_FORMAT	0x40
-#define SNS1_EOC		0x20
-#define SNS1_MESSAGE_TO_OPER	0x10
-#define SNS1_NO_REC_FOUND	0x08
-#define SNS1_FILE_PROTECTED	0x04
-#define SNS1_WRITE_INHIBITED	0x02
-#define SNS1_INPRECISE_END	0x01
-
-/*
- * architectured values for third sense byte
- */
-#define SNS2_REQ_INH_WRITE	0x80
-#define SNS2_CORRECTABLE	0x40
-#define SNS2_FIRST_LOG_ERR	0x20
-#define SNS2_ENV_DATA_PRESENT	0x10
-#define SNS2_INPRECISE_END	0x04
+#include <asm/scsw.h>
 
 /**
  * struct ccw1 - channel command word
diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h
new file mode 100644
index 00000000000..de389cb54d2
--- /dev/null
+++ b/arch/s390/include/asm/scsw.h
@@ -0,0 +1,956 @@
+/*
+ *  Helper functions for scsw access.
+ *
+ *    Copyright IBM Corp. 2008,2009
+ *    Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_SCSW_H_
+#define _ASM_S390_SCSW_H_
+
+#include <linux/types.h>
+#include <asm/chsc.h>
+#include <asm/cio.h>
+
+/**
+ * struct cmd_scsw - command-mode subchannel status word
+ * @key: subchannel key
+ * @sctl: suspend control
+ * @eswf: esw format
+ * @cc: deferred condition code
+ * @fmt: format
+ * @pfch: prefetch
+ * @isic: initial-status interruption control
+ * @alcc: address-limit checking control
+ * @ssi: suppress-suspended interruption
+ * @zcc: zero condition code
+ * @ectl: extended control
+ * @pno: path not operational
+ * @res: reserved
+ * @fctl: function control
+ * @actl: activity control
+ * @stctl: status control
+ * @cpa: channel program address
+ * @dstat: device status
+ * @cstat: subchannel status
+ * @count: residual count
+ */
+struct cmd_scsw {
+	__u32 key  : 4;
+	__u32 sctl : 1;
+	__u32 eswf : 1;
+	__u32 cc   : 2;
+	__u32 fmt  : 1;
+	__u32 pfch : 1;
+	__u32 isic : 1;
+	__u32 alcc : 1;
+	__u32 ssi  : 1;
+	__u32 zcc  : 1;
+	__u32 ectl : 1;
+	__u32 pno  : 1;
+	__u32 res  : 1;
+	__u32 fctl : 3;
+	__u32 actl : 7;
+	__u32 stctl : 5;
+	__u32 cpa;
+	__u32 dstat : 8;
+	__u32 cstat : 8;
+	__u32 count : 16;
+} __attribute__ ((packed));
+
+/**
+ * struct tm_scsw - transport-mode subchannel status word
+ * @key: subchannel key
+ * @eswf: esw format
+ * @cc: deferred condition code
+ * @fmt: format
+ * @x: IRB-format control
+ * @q: interrogate-complete
+ * @ectl: extended control
+ * @pno: path not operational
+ * @fctl: function control
+ * @actl: activity control
+ * @stctl: status control
+ * @tcw: TCW address
+ * @dstat: device status
+ * @cstat: subchannel status
+ * @fcxs: FCX status
+ * @schxs: subchannel-extended status
+ */
+struct tm_scsw {
+	u32 key:4;
+	u32 :1;
+	u32 eswf:1;
+	u32 cc:2;
+	u32 fmt:3;
+	u32 x:1;
+	u32 q:1;
+	u32 :1;
+	u32 ectl:1;
+	u32 pno:1;
+	u32 :1;
+	u32 fctl:3;
+	u32 actl:7;
+	u32 stctl:5;
+	u32 tcw;
+	u32 dstat:8;
+	u32 cstat:8;
+	u32 fcxs:8;
+	u32 schxs:8;
+} __attribute__ ((packed));
+
+/**
+ * union scsw - subchannel status word
+ * @cmd: command-mode SCSW
+ * @tm: transport-mode SCSW
+ */
+union scsw {
+	struct cmd_scsw cmd;
+	struct tm_scsw tm;
+} __attribute__ ((packed));
+
+#define SCSW_FCTL_CLEAR_FUNC	 0x1
+#define SCSW_FCTL_HALT_FUNC	 0x2
+#define SCSW_FCTL_START_FUNC	 0x4
+
+#define SCSW_ACTL_SUSPENDED	 0x1
+#define SCSW_ACTL_DEVACT	 0x2
+#define SCSW_ACTL_SCHACT	 0x4
+#define SCSW_ACTL_CLEAR_PEND	 0x8
+#define SCSW_ACTL_HALT_PEND	 0x10
+#define SCSW_ACTL_START_PEND	 0x20
+#define SCSW_ACTL_RESUME_PEND	 0x40
+
+#define SCSW_STCTL_STATUS_PEND	 0x1
+#define SCSW_STCTL_SEC_STATUS	 0x2
+#define SCSW_STCTL_PRIM_STATUS	 0x4
+#define SCSW_STCTL_INTER_STATUS	 0x8
+#define SCSW_STCTL_ALERT_STATUS	 0x10
+
+#define DEV_STAT_ATTENTION	 0x80
+#define DEV_STAT_STAT_MOD	 0x40
+#define DEV_STAT_CU_END		 0x20
+#define DEV_STAT_BUSY		 0x10
+#define DEV_STAT_CHN_END	 0x08
+#define DEV_STAT_DEV_END	 0x04
+#define DEV_STAT_UNIT_CHECK	 0x02
+#define DEV_STAT_UNIT_EXCEP	 0x01
+
+#define SCHN_STAT_PCI		 0x80
+#define SCHN_STAT_INCORR_LEN	 0x40
+#define SCHN_STAT_PROG_CHECK	 0x20
+#define SCHN_STAT_PROT_CHECK	 0x10
+#define SCHN_STAT_CHN_DATA_CHK	 0x08
+#define SCHN_STAT_CHN_CTRL_CHK	 0x04
+#define SCHN_STAT_INTF_CTRL_CHK	 0x02
+#define SCHN_STAT_CHAIN_CHECK	 0x01
+
+/*
+ * architectured values for first sense byte
+ */
+#define SNS0_CMD_REJECT		0x80
+#define SNS_CMD_REJECT		SNS0_CMD_REJEC
+#define SNS0_INTERVENTION_REQ	0x40
+#define SNS0_BUS_OUT_CHECK	0x20
+#define SNS0_EQUIPMENT_CHECK	0x10
+#define SNS0_DATA_CHECK		0x08
+#define SNS0_OVERRUN		0x04
+#define SNS0_INCOMPL_DOMAIN	0x01
+
+/*
+ * architectured values for second sense byte
+ */
+#define SNS1_PERM_ERR		0x80
+#define SNS1_INV_TRACK_FORMAT	0x40
+#define SNS1_EOC		0x20
+#define SNS1_MESSAGE_TO_OPER	0x10
+#define SNS1_NO_REC_FOUND	0x08
+#define SNS1_FILE_PROTECTED	0x04
+#define SNS1_WRITE_INHIBITED	0x02
+#define SNS1_INPRECISE_END	0x01
+
+/*
+ * architectured values for third sense byte
+ */
+#define SNS2_REQ_INH_WRITE	0x80
+#define SNS2_CORRECTABLE	0x40
+#define SNS2_FIRST_LOG_ERR	0x20
+#define SNS2_ENV_DATA_PRESENT	0x10
+#define SNS2_INPRECISE_END	0x04
+
+/**
+ * scsw_is_tm - check for transport mode scsw
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the specified scsw is a transport mode scsw, zero
+ * otherwise.
+ */
+static inline int scsw_is_tm(union scsw *scsw)
+{
+	return css_general_characteristics.fcx && (scsw->tm.x == 1);
+}
+
+/**
+ * scsw_key - return scsw key field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the key field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_key(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.key;
+	else
+		return scsw->cmd.key;
+}
+
+/**
+ * scsw_eswf - return scsw eswf field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the eswf field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_eswf(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.eswf;
+	else
+		return scsw->cmd.eswf;
+}
+
+/**
+ * scsw_cc - return scsw cc field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the cc field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_cc(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.cc;
+	else
+		return scsw->cmd.cc;
+}
+
+/**
+ * scsw_ectl - return scsw ectl field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the ectl field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_ectl(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.ectl;
+	else
+		return scsw->cmd.ectl;
+}
+
+/**
+ * scsw_pno - return scsw pno field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the pno field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_pno(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.pno;
+	else
+		return scsw->cmd.pno;
+}
+
+/**
+ * scsw_fctl - return scsw fctl field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the fctl field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_fctl(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.fctl;
+	else
+		return scsw->cmd.fctl;
+}
+
+/**
+ * scsw_actl - return scsw actl field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the actl field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_actl(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.actl;
+	else
+		return scsw->cmd.actl;
+}
+
+/**
+ * scsw_stctl - return scsw stctl field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the stctl field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_stctl(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.stctl;
+	else
+		return scsw->cmd.stctl;
+}
+
+/**
+ * scsw_dstat - return scsw dstat field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the dstat field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_dstat(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.dstat;
+	else
+		return scsw->cmd.dstat;
+}
+
+/**
+ * scsw_cstat - return scsw cstat field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the cstat field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_cstat(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.cstat;
+	else
+		return scsw->cmd.cstat;
+}
+
+/**
+ * scsw_cmd_is_valid_key - check key field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the key field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_key(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_sctl - check fctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fctl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_sctl(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_eswf - check eswf field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the eswf field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_eswf(union scsw *scsw)
+{
+	return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND);
+}
+
+/**
+ * scsw_cmd_is_valid_cc - check cc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cc field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_cc(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) &&
+	       (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND);
+}
+
+/**
+ * scsw_cmd_is_valid_fmt - check fmt field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fmt field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_fmt(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_pfch - check pfch field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the pfch field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_pfch(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_isic - check isic field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the isic field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_isic(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_alcc - check alcc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the alcc field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_alcc(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_ssi - check ssi field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the ssi field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_ssi(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_zcc - check zcc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the zcc field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_zcc(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) &&
+	       (scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS);
+}
+
+/**
+ * scsw_cmd_is_valid_ectl - check ectl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the ectl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_ectl(union scsw *scsw)
+{
+	return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
+	       !(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) &&
+	       (scsw->cmd.stctl & SCSW_STCTL_ALERT_STATUS);
+}
+
+/**
+ * scsw_cmd_is_valid_pno - check pno field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the pno field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_pno(union scsw *scsw)
+{
+	return (scsw->cmd.fctl != 0) &&
+	       (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
+	       (!(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) ||
+		 ((scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) &&
+		  (scsw->cmd.actl & SCSW_ACTL_SUSPENDED)));
+}
+
+/**
+ * scsw_cmd_is_valid_fctl - check fctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fctl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_fctl(union scsw *scsw)
+{
+	/* Only valid if pmcw.dnv == 1*/
+	return 1;
+}
+
+/**
+ * scsw_cmd_is_valid_actl - check actl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the actl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_actl(union scsw *scsw)
+{
+	/* Only valid if pmcw.dnv == 1*/
+	return 1;
+}
+
+/**
+ * scsw_cmd_is_valid_stctl - check stctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the stctl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_stctl(union scsw *scsw)
+{
+	/* Only valid if pmcw.dnv == 1*/
+	return 1;
+}
+
+/**
+ * scsw_cmd_is_valid_dstat - check dstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the dstat field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_dstat(union scsw *scsw)
+{
+	return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
+	       (scsw->cmd.cc != 3);
+}
+
+/**
+ * scsw_cmd_is_valid_cstat - check cstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cstat field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_cstat(union scsw *scsw)
+{
+	return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
+	       (scsw->cmd.cc != 3);
+}
+
+/**
+ * scsw_tm_is_valid_key - check key field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the key field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_key(union scsw *scsw)
+{
+	return (scsw->tm.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_tm_is_valid_eswf - check eswf field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the eswf field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_eswf(union scsw *scsw)
+{
+	return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND);
+}
+
+/**
+ * scsw_tm_is_valid_cc - check cc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cc field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_cc(union scsw *scsw)
+{
+	return (scsw->tm.fctl & SCSW_FCTL_START_FUNC) &&
+	       (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND);
+}
+
+/**
+ * scsw_tm_is_valid_fmt - check fmt field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fmt field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_fmt(union scsw *scsw)
+{
+	return 1;
+}
+
+/**
+ * scsw_tm_is_valid_x - check x field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the x field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_x(union scsw *scsw)
+{
+	return 1;
+}
+
+/**
+ * scsw_tm_is_valid_q - check q field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the q field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_q(union scsw *scsw)
+{
+	return 1;
+}
+
+/**
+ * scsw_tm_is_valid_ectl - check ectl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the ectl field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_ectl(union scsw *scsw)
+{
+	return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
+	       !(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) &&
+	       (scsw->tm.stctl & SCSW_STCTL_ALERT_STATUS);
+}
+
+/**
+ * scsw_tm_is_valid_pno - check pno field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the pno field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_pno(union scsw *scsw)
+{
+	return (scsw->tm.fctl != 0) &&
+	       (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
+	       (!(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) ||
+		 ((scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) &&
+		  (scsw->tm.actl & SCSW_ACTL_SUSPENDED)));
+}
+
+/**
+ * scsw_tm_is_valid_fctl - check fctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fctl field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_fctl(union scsw *scsw)
+{
+	/* Only valid if pmcw.dnv == 1*/
+	return 1;
+}
+
+/**
+ * scsw_tm_is_valid_actl - check actl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the actl field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_actl(union scsw *scsw)
+{
+	/* Only valid if pmcw.dnv == 1*/
+	return 1;
+}
+
+/**
+ * scsw_tm_is_valid_stctl - check stctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the stctl field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_stctl(union scsw *scsw)
+{
+	/* Only valid if pmcw.dnv == 1*/
+	return 1;
+}
+
+/**
+ * scsw_tm_is_valid_dstat - check dstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the dstat field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_dstat(union scsw *scsw)
+{
+	return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
+	       (scsw->tm.cc != 3);
+}
+
+/**
+ * scsw_tm_is_valid_cstat - check cstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cstat field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_cstat(union scsw *scsw)
+{
+	return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
+	       (scsw->tm.cc != 3);
+}
+
+/**
+ * scsw_tm_is_valid_fcxs - check fcxs field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fcxs field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_fcxs(union scsw *scsw)
+{
+	return 1;
+}
+
+/**
+ * scsw_tm_is_valid_schxs - check schxs field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the schxs field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_schxs(union scsw *scsw)
+{
+	return (scsw->tm.cstat & (SCHN_STAT_PROG_CHECK |
+				  SCHN_STAT_INTF_CTRL_CHK |
+				  SCHN_STAT_PROT_CHECK |
+				  SCHN_STAT_CHN_DATA_CHK));
+}
+
+/**
+ * scsw_is_valid_actl - check actl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the actl field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_actl(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_actl(scsw);
+	else
+		return scsw_cmd_is_valid_actl(scsw);
+}
+
+/**
+ * scsw_is_valid_cc - check cc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cc field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_cc(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_cc(scsw);
+	else
+		return scsw_cmd_is_valid_cc(scsw);
+}
+
+/**
+ * scsw_is_valid_cstat - check cstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cstat field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_cstat(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_cstat(scsw);
+	else
+		return scsw_cmd_is_valid_cstat(scsw);
+}
+
+/**
+ * scsw_is_valid_dstat - check dstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the dstat field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_dstat(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_dstat(scsw);
+	else
+		return scsw_cmd_is_valid_dstat(scsw);
+}
+
+/**
+ * scsw_is_valid_ectl - check ectl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the ectl field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_ectl(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_ectl(scsw);
+	else
+		return scsw_cmd_is_valid_ectl(scsw);
+}
+
+/**
+ * scsw_is_valid_eswf - check eswf field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the eswf field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_eswf(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_eswf(scsw);
+	else
+		return scsw_cmd_is_valid_eswf(scsw);
+}
+
+/**
+ * scsw_is_valid_fctl - check fctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fctl field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_fctl(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_fctl(scsw);
+	else
+		return scsw_cmd_is_valid_fctl(scsw);
+}
+
+/**
+ * scsw_is_valid_key - check key field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the key field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_key(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_key(scsw);
+	else
+		return scsw_cmd_is_valid_key(scsw);
+}
+
+/**
+ * scsw_is_valid_pno - check pno field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the pno field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_pno(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_pno(scsw);
+	else
+		return scsw_cmd_is_valid_pno(scsw);
+}
+
+/**
+ * scsw_is_valid_stctl - check stctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the stctl field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_stctl(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_stctl(scsw);
+	else
+		return scsw_cmd_is_valid_stctl(scsw);
+}
+
+/**
+ * scsw_cmd_is_solicited - check for solicited scsw
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the command mode scsw indicates that the associated
+ * status condition is solicited, zero if it is unsolicited.
+ */
+static inline int scsw_cmd_is_solicited(union scsw *scsw)
+{
+	return (scsw->cmd.cc != 0) || (scsw->cmd.stctl !=
+		(SCSW_STCTL_STATUS_PEND | SCSW_STCTL_ALERT_STATUS));
+}
+
+/**
+ * scsw_tm_is_solicited - check for solicited scsw
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the transport mode scsw indicates that the associated
+ * status condition is solicited, zero if it is unsolicited.
+ */
+static inline int scsw_tm_is_solicited(union scsw *scsw)
+{
+	return (scsw->tm.cc != 0) || (scsw->tm.stctl !=
+		(SCSW_STCTL_STATUS_PEND | SCSW_STCTL_ALERT_STATUS));
+}
+
+/**
+ * scsw_is_solicited - check for solicited scsw
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the transport or command mode scsw indicates that the
+ * associated status condition is solicited, zero if it is unsolicited.
+ */
+static inline int scsw_is_solicited(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_solicited(scsw);
+	else
+		return scsw_cmd_is_solicited(scsw);
+}
+
+#endif /* _ASM_S390_SCSW_H_ */
-- 
cgit v1.2.3


From 05e7ff7da78bad3edc1290ed86b4a37da72ced62 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 11 Sep 2009 10:28:31 +0200
Subject: [S390] introduce get_clock_monotonic

Introduce get_clock_monotonic() function which can be used to get a
(fast) timestamp. Resolution is the same as for get_clock(). The
only difference is that the timestamps are monotonic and don't jump
backward or forward.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/timex.h | 14 ++++++++++++++
 arch/s390/kernel/time.c       |  3 ++-
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index cc21e3e20fd..24aa1cda20a 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -90,4 +90,18 @@ unsigned long long monotonic_clock(void);
 
 extern u64 sched_clock_base_cc;
 
+/**
+ * get_clock_monotonic - returns current time in clock rate units
+ *
+ * The caller must ensure that preemption is disabled.
+ * The clock and sched_clock_base get changed via stop_machine.
+ * Therefore preemption must be disabled when calling this
+ * function, otherwise the returned value is not guaranteed to
+ * be monotonic.
+ */
+static inline unsigned long long get_clock_monotonic(void)
+{
+	return get_clock_xt() - sched_clock_base_cc;
+}
+
 #endif
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index d4c8e9c47c8..54e327e9af0 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -60,6 +60,7 @@
 #define TICK_SIZE tick
 
 u64 sched_clock_base_cc = -1;	/* Force to data section. */
+EXPORT_SYMBOL_GPL(sched_clock_base_cc);
 
 static DEFINE_PER_CPU(struct clock_event_device, comparators);
 
@@ -68,7 +69,7 @@ static DEFINE_PER_CPU(struct clock_event_device, comparators);
  */
 unsigned long long notrace sched_clock(void)
 {
-	return ((get_clock_xt() - sched_clock_base_cc) * 125) >> 9;
+	return (get_clock_monotonic() * 125) >> 9;
 }
 
 /*
-- 
cgit v1.2.3


From 04efc3be767cfed0d348fd598eba8fe5f5bf5fe6 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 11 Sep 2009 10:28:32 +0200
Subject: [S390] convert/optimize csum_fold() to C

In the meantime gcc generates better code than the old inline
assemblies do. Original inline assembly results in:

lr	%r1,%r2
sr	%r3,%r3
lr	%r2,%r1
srdl	%r2,16
alr	%r2,%r3
alr	%r1,%r2
srl	%r1,16
xilf	%r1,65535
llghr	%r2,%r1
br	%r14

Out of the C code gcc generates this:

rll	%r1,%r2,16
ar	%r1,%r2
srl	%r1,16
xilf	%r1,65535
llghr	%r2,%r1
br	%r14

In addition we don't have any static register allocations anymore and
gcc is free to shuffle instructions around for better pipeline usage.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/checksum.h | 25 ++++---------------------
 1 file changed, 4 insertions(+), 21 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
index d5a8e7c1477..6c00f6800a3 100644
--- a/arch/s390/include/asm/checksum.h
+++ b/arch/s390/include/asm/checksum.h
@@ -78,28 +78,11 @@ csum_partial_copy_nocheck (const void *src, void *dst, int len, __wsum sum)
  */
 static inline __sum16 csum_fold(__wsum sum)
 {
-#ifndef __s390x__
-	register_pair rp;
+	u32 csum = (__force u32) sum;
 
-	asm volatile(
-		"	slr	%N1,%N1\n"	/* %0 = H L */
-		"	lr	%1,%0\n"	/* %0 = H L, %1 = H L 0 0 */
-		"	srdl	%1,16\n"	/* %0 = H L, %1 = 0 H L 0 */
-		"	alr	%1,%N1\n"	/* %0 = H L, %1 = L H L 0 */
-		"	alr	%0,%1\n"	/* %0 = H+L+C L+H */
-		"	srl	%0,16\n"	/* %0 = H+L+C */
-		: "+&d" (sum), "=d" (rp) : : "cc");
-#else /* __s390x__ */
-	asm volatile(
-		"	sr	3,3\n"		/* %0 = H*65536 + L */
-		"	lr	2,%0\n"		/* %0 = H L, 2/3 = H L / 0 0 */
-		"	srdl	2,16\n"		/* %0 = H L, 2/3 = 0 H / L 0 */
-		"	alr	2,3\n"		/* %0 = H L, 2/3 = L H / L 0 */
-		"	alr	%0,2\n"		/* %0 = H+L+C L+H */
-		"	srl	%0,16\n"	/* %0 = H+L+C */
-		: "+&d" (sum) : : "cc", "2", "3");
-#endif /* __s390x__ */
-	return (__force __sum16) ~sum;
+	csum += (csum >> 16) + (csum << 16);
+	csum >>= 16;
+	return (__force __sum16) ~csum;
 }
 
 /*
-- 
cgit v1.2.3


From 6ac2a4ddd10d6916785b4c566d521025c855f823 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 11 Sep 2009 10:28:33 +0200
Subject: [S390] improve mcount code

Move the 64 bit mount code from mcount.S into mcount64.S and avoid
code duplication.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/Makefile   |   2 +-
 arch/s390/kernel/mcount.S   | 147 +++-----------------------------------------
 arch/s390/kernel/mcount64.S |  78 +++++++++++++++++++++++
 3 files changed, 89 insertions(+), 138 deletions(-)
 create mode 100644 arch/s390/kernel/mcount64.S

(limited to 'arch')

diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index c75ed43b1a1..794955305f3 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -41,7 +41,7 @@ obj-$(CONFIG_COMPAT)		+= compat_linux.o compat_signal.o \
 
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
-obj-$(CONFIG_FUNCTION_TRACER)	+= mcount.o
+obj-$(CONFIG_FUNCTION_TRACER)	+= $(if $(CONFIG_64BIT),mcount64.o,mcount.o)
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
 
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 2a0a5e97ba8..dfe015d7398 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -11,111 +11,27 @@
 ftrace_stub:
 	br	%r14
 
-#ifdef CONFIG_64BIT
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-
 	.globl _mcount
 _mcount:
-	br	%r14
-
-	.globl ftrace_caller
-ftrace_caller:
-	larl	%r1,function_trace_stop
-	icm	%r1,0xf,0(%r1)
-	bnzr	%r14
-	stmg	%r2,%r5,32(%r15)
-	stg	%r14,112(%r15)
-	lgr	%r1,%r15
-	aghi	%r15,-160
-	stg	%r1,__SF_BACKCHAIN(%r15)
-	lgr	%r2,%r14
-	lg	%r3,168(%r15)
-	larl	%r14,ftrace_dyn_func
-	lg	%r14,0(%r14)
-	basr	%r14,%r14
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	.globl	ftrace_graph_caller
-ftrace_graph_caller:
-	# This unconditional branch gets runtime patched. Change only if
-	# you know what you are doing. See ftrace_enable_graph_caller().
-	j	0f
-	lg	%r2,272(%r15)
-	lg	%r3,168(%r15)
-	brasl	%r14,prepare_ftrace_return
-	stg	%r2,168(%r15)
-0:
-#endif
-	aghi	%r15,160
-	lmg	%r2,%r5,32(%r15)
-	lg	%r14,112(%r15)
+#ifdef CONFIG_DYNAMIC_FTRACE
 	br	%r14
 
 	.data
 	.globl	ftrace_dyn_func
 ftrace_dyn_func:
-	.quad	ftrace_stub
+	.long	ftrace_stub
 	.previous
 
-#else /* CONFIG_DYNAMIC_FTRACE */
-
-	.globl _mcount
-_mcount:
-	larl	%r1,function_trace_stop
-	icm	%r1,0xf,0(%r1)
-	bnzr	%r14
-	stmg	%r2,%r5,32(%r15)
-	stg	%r14,112(%r15)
-	lgr	%r1,%r15
-	aghi	%r15,-160
-	stg	%r1,__SF_BACKCHAIN(%r15)
-	lgr	%r2,%r14
-	lg	%r3,168(%r15)
-	larl	%r14,ftrace_trace_function
-	lg	%r14,0(%r14)
-	basr	%r14,%r14
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	lg	%r2,272(%r15)
-	lg	%r3,168(%r15)
-	brasl	%r14,prepare_ftrace_return
-	stg	%r2,168(%r15)
-#endif
-	aghi	%r15,160
-	lmg	%r2,%r5,32(%r15)
-	lg	%r14,112(%r15)
-	br	%r14
-
-#endif /* CONFIG_DYNAMIC_FTRACE */
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-
-	.globl	return_to_handler
-return_to_handler:
-	stmg	%r2,%r5,32(%r15)
-	lgr	%r1,%r15
-	aghi	%r15,-160
-	stg	%r1,__SF_BACKCHAIN(%r15)
-	brasl	%r14,ftrace_return_to_handler
-	aghi	%r15,160
-	lgr	%r14,%r2
-	lmg	%r2,%r5,32(%r15)
-	br	%r14
-
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-
-#else /* CONFIG_64BIT */
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-
-	.globl _mcount
-_mcount:
-	br	%r14
-
 	.globl ftrace_caller
 ftrace_caller:
+#endif
 	stm	%r2,%r5,16(%r15)
 	bras	%r1,2f
+#ifdef CONFIG_DYNAMIC_FTRACE
+0:	.long	ftrace_dyn_func
+#else
 0:	.long	ftrace_trace_function
+#endif
 1:	.long	function_trace_stop
 2:	l	%r2,1b-0b(%r1)
 	icm	%r2,0xf,0(%r2)
@@ -131,53 +47,13 @@ ftrace_caller:
 	l	%r14,0(%r14)
 	basr	%r14,%r14
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+#ifdef CONFIG_DYNAMIC_FTRACE
 	.globl	ftrace_graph_caller
 ftrace_graph_caller:
 	# This unconditional branch gets runtime patched. Change only if
 	# you know what you are doing. See ftrace_enable_graph_caller().
 	j	1f
-	bras	%r1,0f
-	.long	prepare_ftrace_return
-0:	l	%r2,152(%r15)
-	l	%r4,0(%r1)
-	l	%r3,100(%r15)
-	basr	%r14,%r4
-	st	%r2,100(%r15)
-1:
 #endif
-	ahi	%r15,96
-	l	%r14,56(%r15)
-3:	lm	%r2,%r5,16(%r15)
-	br	%r14
-
-	.data
-	.globl	ftrace_dyn_func
-ftrace_dyn_func:
-	.long	ftrace_stub
-	.previous
-
-#else /* CONFIG_DYNAMIC_FTRACE */
-
-	.globl _mcount
-_mcount:
-	stm	%r2,%r5,16(%r15)
-	bras	%r1,2f
-0:	.long	ftrace_trace_function
-1:	.long	function_trace_stop
-2:	l	%r2,1b-0b(%r1)
-	icm	%r2,0xf,0(%r2)
-	jnz	3f
-	st	%r14,56(%r15)
-	lr	%r0,%r15
-	ahi	%r15,-96
-	l	%r3,100(%r15)
-	la	%r2,0(%r14)
-	st	%r0,__SF_BACKCHAIN(%r15)
-	la	%r3,0(%r3)
-	l	%r14,0b-0b(%r1)
-	l	%r14,0(%r14)
-	basr	%r14,%r14
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	bras	%r1,0f
 	.long	prepare_ftrace_return
 0:	l	%r2,152(%r15)
@@ -185,14 +61,13 @@ _mcount:
 	l	%r3,100(%r15)
 	basr	%r14,%r4
 	st	%r2,100(%r15)
+1:
 #endif
 	ahi	%r15,96
 	l	%r14,56(%r15)
 3:	lm	%r2,%r5,16(%r15)
 	br	%r14
 
-#endif /* CONFIG_DYNAMIC_FTRACE */
-
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
 	.globl	return_to_handler
@@ -211,6 +86,4 @@ return_to_handler:
 	lm	%r2,%r5,16(%r15)
 	br	%r14
 
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-
-#endif /* CONFIG_64BIT */
+#endif
diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S
new file mode 100644
index 00000000000..c37211c6092
--- /dev/null
+++ b/arch/s390/kernel/mcount64.S
@@ -0,0 +1,78 @@
+/*
+ * Copyright IBM Corp. 2008,2009
+ *
+ *   Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ */
+
+#include <asm/asm-offsets.h>
+
+	.globl ftrace_stub
+ftrace_stub:
+	br	%r14
+
+	.globl _mcount
+_mcount:
+#ifdef CONFIG_DYNAMIC_FTRACE
+	br	%r14
+
+	.data
+	.globl	ftrace_dyn_func
+ftrace_dyn_func:
+	.quad	ftrace_stub
+	.previous
+
+	.globl ftrace_caller
+ftrace_caller:
+#endif
+	larl	%r1,function_trace_stop
+	icm	%r1,0xf,0(%r1)
+	bnzr	%r14
+	stmg	%r2,%r5,32(%r15)
+	stg	%r14,112(%r15)
+	lgr	%r1,%r15
+	aghi	%r15,-160
+	stg	%r1,__SF_BACKCHAIN(%r15)
+	lgr	%r2,%r14
+	lg	%r3,168(%r15)
+#ifdef CONFIG_DYNAMIC_FTRACE
+	larl	%r14,ftrace_dyn_func
+#else
+	larl	%r14,ftrace_trace_function
+#endif
+	lg	%r14,0(%r14)
+	basr	%r14,%r14
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+#ifdef CONFIG_DYNAMIC_FTRACE
+	.globl	ftrace_graph_caller
+ftrace_graph_caller:
+	# This unconditional branch gets runtime patched. Change only if
+	# you know what you are doing. See ftrace_enable_graph_caller().
+	j	0f
+#endif
+	lg	%r2,272(%r15)
+	lg	%r3,168(%r15)
+	brasl	%r14,prepare_ftrace_return
+	stg	%r2,168(%r15)
+0:
+#endif
+	aghi	%r15,160
+	lmg	%r2,%r5,32(%r15)
+	lg	%r14,112(%r15)
+	br	%r14
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+	.globl	return_to_handler
+return_to_handler:
+	stmg	%r2,%r5,32(%r15)
+	lgr	%r1,%r15
+	aghi	%r15,-160
+	stg	%r1,__SF_BACKCHAIN(%r15)
+	brasl	%r14,ftrace_return_to_handler
+	aghi	%r15,160
+	lgr	%r14,%r2
+	lmg	%r2,%r5,32(%r15)
+	br	%r14
+
+#endif
-- 
cgit v1.2.3


From 12751058515860ed43c8f874ebcb2097b323736a Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 11 Sep 2009 10:28:34 +0200
Subject: [S390] atomic ops: add effecient atomic64 support for 31 bit

Use compare double and swap to implement efficient atomic64 ops for 31 bit.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/Kconfig              |   1 -
 arch/s390/include/asm/atomic.h | 164 +++++++++++++++++++++++++++++++----------
 2 files changed, 127 insertions(+), 38 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 2ae5d72f47e..47836b945d0 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -95,7 +95,6 @@ config S390
 	select HAVE_ARCH_TRACEHOOK
 	select INIT_ALL_POSSIBLE
 	select HAVE_PERF_COUNTERS
-	select GENERIC_ATOMIC64 if !64BIT
 
 config SCHED_OMIT_FRAME_POINTER
 	bool
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index c7d0abfb0f0..b491d5e963c 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -1,28 +1,20 @@
 #ifndef __ARCH_S390_ATOMIC__
 #define __ARCH_S390_ATOMIC__
 
-#include <linux/compiler.h>
-#include <linux/types.h>
-
 /*
- *  include/asm-s390/atomic.h
+ * Copyright 1999,2009 IBM Corp.
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *	      Denis Joseph Barrow,
+ *	      Arnd Bergmann <arndb@de.ibm.com>,
  *
- *  S390 version
- *    Copyright (C) 1999-2005 IBM Deutschland Entwicklung GmbH, IBM Corporation
- *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- *               Denis Joseph Barrow,
- *		 Arnd Bergmann (arndb@de.ibm.com)
- *
- *  Derived from "include/asm-i386/bitops.h"
- *    Copyright (C) 1992, Linus Torvalds
+ * Atomic operations that C can't guarantee us.
+ * Useful for resource counting etc.
+ * s390 uses 'Compare And Swap' for atomicity in SMP enviroment.
  *
  */
 
-/*
- * Atomic operations that C can't guarantee us.  Useful for
- * resource counting etc..
- * S390 uses 'Compare And Swap' for atomicity in SMP enviroment
- */
+#include <linux/compiler.h>
+#include <linux/types.h>
 
 #define ATOMIC_INIT(i)  { (i) }
 
@@ -146,9 +138,10 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
 
 #undef __CS_LOOP
 
-#ifdef __s390x__
 #define ATOMIC64_INIT(i)  { (i) }
 
+#ifdef CONFIG_64BIT
+
 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
 
 #define __CSG_LOOP(ptr, op_val, op_string) ({				\
@@ -202,21 +195,11 @@ static __inline__ long long atomic64_add_return(long long i, atomic64_t * v)
 {
 	return __CSG_LOOP(v, i, "agr");
 }
-#define atomic64_add(_i, _v)		atomic64_add_return(_i, _v)
-#define atomic64_add_negative(_i, _v)	(atomic64_add_return(_i, _v) < 0)
-#define atomic64_inc(_v)		atomic64_add_return(1, _v)
-#define atomic64_inc_return(_v)		atomic64_add_return(1, _v)
-#define atomic64_inc_and_test(_v)	(atomic64_add_return(1, _v) == 0)
 
 static __inline__ long long atomic64_sub_return(long long i, atomic64_t * v)
 {
 	return __CSG_LOOP(v, i, "sgr");
 }
-#define atomic64_sub(_i, _v)		atomic64_sub_return(_i, _v)
-#define atomic64_sub_and_test(_i, _v)	(atomic64_sub_return(_i, _v) == 0)
-#define atomic64_dec(_v)		atomic64_sub_return(1, _v)
-#define atomic64_dec_return(_v)		atomic64_sub_return(1, _v)
-#define atomic64_dec_and_test(_v)	(atomic64_sub_return(1, _v) == 0)
 
 static __inline__ void atomic64_clear_mask(unsigned long mask, atomic64_t * v)
 {
@@ -249,6 +232,111 @@ static __inline__ long long atomic64_cmpxchg(atomic64_t *v,
 	return old;
 }
 
+#undef __CSG_LOOP
+
+#else /* CONFIG_64BIT */
+
+typedef struct {
+	long long counter;
+} atomic64_t;
+
+static inline long long atomic64_read(const atomic64_t *v)
+{
+	register_pair rp;
+
+	asm volatile(
+		"	lm	%0,%N0,0(%1)"
+		: "=&d" (rp)
+		: "a" (&v->counter), "m" (v->counter)
+		);
+	return rp.pair;
+}
+
+static inline void atomic64_set(atomic64_t *v, long long i)
+{
+	register_pair rp = {.pair = i};
+
+	asm volatile(
+		"	stm	%1,%N1,0(%2)"
+		: "=m" (v->counter)
+		: "d" (rp), "a" (&v->counter)
+		);
+}
+
+static inline long long atomic64_xchg(atomic64_t *v, long long new)
+{
+	register_pair rp_new = {.pair = new};
+	register_pair rp_old;
+
+	asm volatile(
+		"	lm	%0,%N0,0(%2)\n"
+		"0:	cds	%0,%3,0(%2)\n"
+		"	jl	0b\n"
+		: "=&d" (rp_old), "+m" (v->counter)
+		: "a" (&v->counter), "d" (rp_new)
+		: "cc");
+	return rp_old.pair;
+}
+
+static inline long long atomic64_cmpxchg(atomic64_t *v,
+					 long long old, long long new)
+{
+	register_pair rp_old = {.pair = old};
+	register_pair rp_new = {.pair = new};
+
+	asm volatile(
+		"	cds	%0,%3,0(%2)"
+		: "+&d" (rp_old), "+m" (v->counter)
+		: "a" (&v->counter), "d" (rp_new)
+		: "cc");
+	return rp_old.pair;
+}
+
+
+static inline long long atomic64_add_return(long long i, atomic64_t *v)
+{
+	long long old, new;
+
+	do {
+		old = atomic64_read(v);
+		new = old + i;
+	} while (atomic64_cmpxchg(v, old, new) != old);
+	return new;
+}
+
+static inline long long atomic64_sub_return(long long i, atomic64_t *v)
+{
+	long long old, new;
+
+	do {
+		old = atomic64_read(v);
+		new = old - i;
+	} while (atomic64_cmpxchg(v, old, new) != old);
+	return new;
+}
+
+static inline void atomic64_set_mask(unsigned long long mask, atomic64_t *v)
+{
+	long long old, new;
+
+	do {
+		old = atomic64_read(v);
+		new = old | mask;
+	} while (atomic64_cmpxchg(v, old, new) != old);
+}
+
+static inline void atomic64_clear_mask(unsigned long long mask, atomic64_t *v)
+{
+	long long old, new;
+
+	do {
+		old = atomic64_read(v);
+		new = old & mask;
+	} while (atomic64_cmpxchg(v, old, new) != old);
+}
+
+#endif /* CONFIG_64BIT */
+
 static __inline__ int atomic64_add_unless(atomic64_t *v,
 					  long long a, long long u)
 {
@@ -265,15 +353,17 @@ static __inline__ int atomic64_add_unless(atomic64_t *v,
 	return c != u;
 }
 
-#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
-
-#undef __CSG_LOOP
-
-#else /* __s390x__ */
-
-#include <asm-generic/atomic64.h>
-
-#endif /* __s390x__ */
+#define atomic64_add(_i, _v)		atomic64_add_return(_i, _v)
+#define atomic64_add_negative(_i, _v)	(atomic64_add_return(_i, _v) < 0)
+#define atomic64_inc(_v)		atomic64_add_return(1, _v)
+#define atomic64_inc_return(_v)		atomic64_add_return(1, _v)
+#define atomic64_inc_and_test(_v)	(atomic64_add_return(1, _v) == 0)
+#define atomic64_sub(_i, _v)		atomic64_sub_return(_i, _v)
+#define atomic64_sub_and_test(_i, _v)	(atomic64_sub_return(_i, _v) == 0)
+#define atomic64_dec(_v)		atomic64_sub_return(1, _v)
+#define atomic64_dec_return(_v)		atomic64_sub_return(1, _v)
+#define atomic64_dec_and_test(_v)	(atomic64_sub_return(1, _v) == 0)
+#define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1, 0)
 
 #define smp_mb__before_atomic_dec()	smp_mb()
 #define smp_mb__after_atomic_dec()	smp_mb()
-- 
cgit v1.2.3


From bfe3349b516df011dcf6462b0fd748a6f5c2e8af Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 11 Sep 2009 10:28:35 +0200
Subject: [S390] atomic ops: small cleanups

Couple of coding style fixes, replace __inline__ with inline and
remove #ifdef __KERNEL_- since the header file isn't exported.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/atomic.h | 41 +++++++++++++++++++----------------------
 1 file changed, 19 insertions(+), 22 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index b491d5e963c..ae7c8f9f94a 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -18,8 +18,6 @@
 
 #define ATOMIC_INIT(i)  { (i) }
 
-#ifdef __KERNEL__
-
 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
 
 #define __CS_LOOP(ptr, op_val, op_string) ({				\
@@ -69,7 +67,7 @@ static inline void atomic_set(atomic_t *v, int i)
 	barrier();
 }
 
-static __inline__ int atomic_add_return(int i, atomic_t * v)
+static inline int atomic_add_return(int i, atomic_t *v)
 {
 	return __CS_LOOP(v, i, "ar");
 }
@@ -79,7 +77,7 @@ static __inline__ int atomic_add_return(int i, atomic_t * v)
 #define atomic_inc_return(_v)		atomic_add_return(1, _v)
 #define atomic_inc_and_test(_v)		(atomic_add_return(1, _v) == 0)
 
-static __inline__ int atomic_sub_return(int i, atomic_t * v)
+static inline int atomic_sub_return(int i, atomic_t *v)
 {
 	return __CS_LOOP(v, i, "sr");
 }
@@ -89,19 +87,19 @@ static __inline__ int atomic_sub_return(int i, atomic_t * v)
 #define atomic_dec_return(_v)		atomic_sub_return(1, _v)
 #define atomic_dec_and_test(_v)		(atomic_sub_return(1, _v) == 0)
 
-static __inline__ void atomic_clear_mask(unsigned long mask, atomic_t * v)
+static inline void atomic_clear_mask(unsigned long mask, atomic_t *v)
 {
-	       __CS_LOOP(v, ~mask, "nr");
+	__CS_LOOP(v, ~mask, "nr");
 }
 
-static __inline__ void atomic_set_mask(unsigned long mask, atomic_t * v)
+static inline void atomic_set_mask(unsigned long mask, atomic_t *v)
 {
-	       __CS_LOOP(v, mask, "or");
+	__CS_LOOP(v, mask, "or");
 }
 
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
-static __inline__ int atomic_cmpxchg(atomic_t *v, int old, int new)
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
 	asm volatile(
@@ -119,7 +117,7 @@ static __inline__ int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return old;
 }
 
-static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
@@ -155,7 +153,7 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
 		: "=&d" (old_val), "=&d" (new_val),			\
 		  "=Q" (((atomic_t *)(ptr))->counter)			\
 		: "d" (op_val),	"Q" (((atomic_t *)(ptr))->counter)	\
-		: "cc", "memory" );					\
+		: "cc", "memory");					\
 	new_val;							\
 })
 
@@ -173,7 +171,7 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
 		  "=m" (((atomic_t *)(ptr))->counter)			\
 		: "a" (ptr), "d" (op_val),				\
 		  "m" (((atomic_t *)(ptr))->counter)			\
-		: "cc", "memory" );					\
+		: "cc", "memory");					\
 	new_val;							\
 })
 
@@ -191,29 +189,29 @@ static inline void atomic64_set(atomic64_t *v, long long i)
 	barrier();
 }
 
-static __inline__ long long atomic64_add_return(long long i, atomic64_t * v)
+static inline long long atomic64_add_return(long long i, atomic64_t *v)
 {
 	return __CSG_LOOP(v, i, "agr");
 }
 
-static __inline__ long long atomic64_sub_return(long long i, atomic64_t * v)
+static inline long long atomic64_sub_return(long long i, atomic64_t *v)
 {
 	return __CSG_LOOP(v, i, "sgr");
 }
 
-static __inline__ void atomic64_clear_mask(unsigned long mask, atomic64_t * v)
+static inline void atomic64_clear_mask(unsigned long mask, atomic64_t *v)
 {
-	       __CSG_LOOP(v, ~mask, "ngr");
+	__CSG_LOOP(v, ~mask, "ngr");
 }
 
-static __inline__ void atomic64_set_mask(unsigned long mask, atomic64_t * v)
+static inline void atomic64_set_mask(unsigned long mask, atomic64_t *v)
 {
-	       __CSG_LOOP(v, mask, "ogr");
+	__CSG_LOOP(v, mask, "ogr");
 }
 
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 
-static __inline__ long long atomic64_cmpxchg(atomic64_t *v,
+static inline long long atomic64_cmpxchg(atomic64_t *v,
 					     long long old, long long new)
 {
 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
@@ -337,8 +335,7 @@ static inline void atomic64_clear_mask(unsigned long long mask, atomic64_t *v)
 
 #endif /* CONFIG_64BIT */
 
-static __inline__ int atomic64_add_unless(atomic64_t *v,
-					  long long a, long long u)
+static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
 {
 	long long c, old;
 	c = atomic64_read(v);
@@ -371,5 +368,5 @@ static __inline__ int atomic64_add_unless(atomic64_t *v,
 #define smp_mb__after_atomic_inc()	smp_mb()
 
 #include <asm-generic/atomic-long.h>
-#endif /* __KERNEL__ */
+
 #endif /* __ARCH_S390_ATOMIC__  */
-- 
cgit v1.2.3


From f3d1263e81daf2be1353baf1ad3f6e25d135f2c5 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 11 Sep 2009 10:28:36 +0200
Subject: [S390] hibernation: remove dead file

There is no caller of do_after_copyback() anywhere. Remove it.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/power/Makefile    |  1 -
 arch/s390/power/swsusp_64.c | 17 -----------------
 2 files changed, 18 deletions(-)
 delete mode 100644 arch/s390/power/swsusp_64.c

(limited to 'arch')

diff --git a/arch/s390/power/Makefile b/arch/s390/power/Makefile
index 973bb45a8fe..ee2f279beff 100644
--- a/arch/s390/power/Makefile
+++ b/arch/s390/power/Makefile
@@ -4,5 +4,4 @@
 
 obj-$(CONFIG_HIBERNATION) += suspend.o
 obj-$(CONFIG_HIBERNATION) += swsusp.o
-obj-$(CONFIG_HIBERNATION) += swsusp_64.o
 obj-$(CONFIG_HIBERNATION) += swsusp_asm64.o
diff --git a/arch/s390/power/swsusp_64.c b/arch/s390/power/swsusp_64.c
deleted file mode 100644
index 9516a517d72..00000000000
--- a/arch/s390/power/swsusp_64.c
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Support for suspend and resume on s390
- *
- * Copyright IBM Corp. 2009
- *
- * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
- *
- */
-
-#include <asm/system.h>
-#include <linux/interrupt.h>
-
-void do_after_copyback(void)
-{
-	mb();
-}
-
-- 
cgit v1.2.3


From c48ff644f2c86f34f69f382b68b16c6d30854783 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 11 Sep 2009 10:28:37 +0200
Subject: [S390] hibernation: merge files and move to kernel/

Merge the nearly empty C files and move everything from power/ to
kernel/. That way the files are easier to handle.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/Makefile              |   3 +-
 arch/s390/kernel/Makefile       |   2 +-
 arch/s390/kernel/suspend.c      |  73 ++++++++++++++++
 arch/s390/kernel/swsusp_asm64.S | 184 ++++++++++++++++++++++++++++++++++++++++
 arch/s390/power/Makefile        |   7 --
 arch/s390/power/suspend.c       |  40 ---------
 arch/s390/power/swsusp.c        |  42 ---------
 arch/s390/power/swsusp_asm64.S  | 184 ----------------------------------------
 8 files changed, 259 insertions(+), 276 deletions(-)
 create mode 100644 arch/s390/kernel/suspend.c
 create mode 100644 arch/s390/kernel/swsusp_asm64.S
 delete mode 100644 arch/s390/power/Makefile
 delete mode 100644 arch/s390/power/suspend.c
 delete mode 100644 arch/s390/power/swsusp.c
 delete mode 100644 arch/s390/power/swsusp_asm64.S

(limited to 'arch')

diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 0ff387cebf8..fc8fb20e7fc 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -88,8 +88,7 @@ LDFLAGS_vmlinux := -e start
 head-y		:= arch/s390/kernel/head.o arch/s390/kernel/init_task.o
 
 core-y		+= arch/s390/mm/ arch/s390/kernel/ arch/s390/crypto/ \
-		   arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/ \
-		   arch/s390/power/
+		   arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/
 
 libs-y		+= arch/s390/lib/
 drivers-y	+= drivers/s390/
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 794955305f3..c7be8e10b87 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -32,7 +32,7 @@ extra-y				+= head.o init_task.o vmlinux.lds
 
 obj-$(CONFIG_MODULES)		+= s390_ksyms.o module.o
 obj-$(CONFIG_SMP)		+= smp.o topology.o
-
+obj-$(CONFIG_HIBERNATION)	+= suspend.o swsusp_asm64.o
 obj-$(CONFIG_AUDIT)		+= audit.o
 compat-obj-$(CONFIG_AUDIT)	+= compat_audit.o
 obj-$(CONFIG_COMPAT)		+= compat_linux.o compat_signal.o \
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
new file mode 100644
index 00000000000..086bee970ca
--- /dev/null
+++ b/arch/s390/kernel/suspend.c
@@ -0,0 +1,73 @@
+/*
+ * Suspend support specific for s390.
+ *
+ * Copyright IBM Corp. 2009
+ *
+ * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
+ */
+
+#include <linux/suspend.h>
+#include <linux/reboot.h>
+#include <linux/pfn.h>
+#include <linux/mm.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/ipl.h>
+
+/*
+ * References to section boundaries
+ */
+extern const void __nosave_begin, __nosave_end;
+
+/*
+ *  check if given pfn is in the 'nosave' or in the read only NSS section
+ */
+int pfn_is_nosave(unsigned long pfn)
+{
+	unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
+	unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end))
+					>> PAGE_SHIFT;
+	unsigned long eshared_pfn = PFN_DOWN(__pa(&_eshared)) - 1;
+	unsigned long stext_pfn = PFN_DOWN(__pa(&_stext));
+
+	if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn)
+		return 1;
+	if (pfn >= stext_pfn && pfn <= eshared_pfn) {
+		if (ipl_info.type == IPL_TYPE_NSS)
+			return 1;
+	} else if ((tprot(pfn * PAGE_SIZE) && pfn > 0))
+		return 1;
+	return 0;
+}
+
+void save_processor_state(void)
+{
+	/* swsusp_arch_suspend() actually saves all cpu register contents.
+	 * Machine checks must be disabled since swsusp_arch_suspend() stores
+	 * register contents to their lowcore save areas. That's the same
+	 * place where register contents on machine checks would be saved.
+	 * To avoid register corruption disable machine checks.
+	 * We must also disable machine checks in the new psw mask for
+	 * program checks, since swsusp_arch_suspend() may generate program
+	 * checks. Disabling machine checks for all other new psw masks is
+	 * just paranoia.
+	 */
+	local_mcck_disable();
+	/* Disable lowcore protection */
+	__ctl_clear_bit(0,28);
+	S390_lowcore.external_new_psw.mask &= ~PSW_MASK_MCHECK;
+	S390_lowcore.svc_new_psw.mask &= ~PSW_MASK_MCHECK;
+	S390_lowcore.io_new_psw.mask &= ~PSW_MASK_MCHECK;
+	S390_lowcore.program_new_psw.mask &= ~PSW_MASK_MCHECK;
+}
+
+void restore_processor_state(void)
+{
+	S390_lowcore.external_new_psw.mask |= PSW_MASK_MCHECK;
+	S390_lowcore.svc_new_psw.mask |= PSW_MASK_MCHECK;
+	S390_lowcore.io_new_psw.mask |= PSW_MASK_MCHECK;
+	S390_lowcore.program_new_psw.mask |= PSW_MASK_MCHECK;
+	/* Enable lowcore protection */
+	__ctl_set_bit(0,28);
+	local_mcck_enable();
+}
diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S
new file mode 100644
index 00000000000..7cd6b096f0d
--- /dev/null
+++ b/arch/s390/kernel/swsusp_asm64.S
@@ -0,0 +1,184 @@
+/*
+ * S390 64-bit swsusp implementation
+ *
+ * Copyright IBM Corp. 2009
+ *
+ * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
+ *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/asm-offsets.h>
+
+/*
+ * Save register context in absolute 0 lowcore and call swsusp_save() to
+ * create in-memory kernel image. The context is saved in the designated
+ * "store status" memory locations (see POP).
+ * We return from this function twice. The first time during the suspend to
+ * disk process. The second time via the swsusp_arch_resume() function
+ * (see below) in the resume process.
+ * This function runs with disabled interrupts.
+ */
+	.section .text
+	.align	4
+	.globl swsusp_arch_suspend
+swsusp_arch_suspend:
+	stmg	%r6,%r15,__SF_GPRS(%r15)
+	lgr	%r1,%r15
+	aghi	%r15,-STACK_FRAME_OVERHEAD
+	stg	%r1,__SF_BACKCHAIN(%r15)
+
+	/* Deactivate DAT */
+	stnsm	__SF_EMPTY(%r15),0xfb
+
+	/* Store prefix register on stack */
+	stpx	__SF_EMPTY(%r15)
+
+	/* Save prefix register contents for lowcore */
+	llgf	%r4,__SF_EMPTY(%r15)
+
+	/* Get pointer to save area */
+	lghi	%r1,0x1000
+
+	/* Store registers */
+	mvc	0x318(4,%r1),__SF_EMPTY(%r15)	/* move prefix to lowcore */
+	stfpc	0x31c(%r1)			/* store fpu control */
+	std	0,0x200(%r1)			/* store f0 */
+	std	1,0x208(%r1)			/* store f1 */
+	std	2,0x210(%r1)			/* store f2 */
+	std	3,0x218(%r1)			/* store f3 */
+	std	4,0x220(%r1)			/* store f4 */
+	std	5,0x228(%r1)			/* store f5 */
+	std	6,0x230(%r1)			/* store f6 */
+	std	7,0x238(%r1)			/* store f7 */
+	std	8,0x240(%r1)			/* store f8 */
+	std	9,0x248(%r1)			/* store f9 */
+	std	10,0x250(%r1)			/* store f10 */
+	std	11,0x258(%r1)			/* store f11 */
+	std	12,0x260(%r1)			/* store f12 */
+	std	13,0x268(%r1)			/* store f13 */
+	std	14,0x270(%r1)			/* store f14 */
+	std	15,0x278(%r1)			/* store f15 */
+	stam	%a0,%a15,0x340(%r1)		/* store access registers */
+	stctg	%c0,%c15,0x380(%r1)		/* store control registers */
+	stmg	%r0,%r15,0x280(%r1)		/* store general registers */
+
+	stpt	0x328(%r1)			/* store timer */
+	stckc	0x330(%r1)			/* store clock comparator */
+
+	/* Activate DAT */
+	stosm	__SF_EMPTY(%r15),0x04
+
+	/* Set prefix page to zero */
+	xc	__SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
+	spx	__SF_EMPTY(%r15)
+
+	lghi	%r2,0
+	lghi	%r3,2*PAGE_SIZE
+	lghi	%r5,2*PAGE_SIZE
+1:	mvcle	%r2,%r4,0
+	jo	1b
+
+	/* Save image */
+	brasl	%r14,swsusp_save
+
+	/* Restore prefix register and return */
+	lghi	%r1,0x1000
+	spx	0x318(%r1)
+	lmg	%r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
+	lghi	%r2,0
+	br	%r14
+
+/*
+ * Restore saved memory image to correct place and restore register context.
+ * Then we return to the function that called swsusp_arch_suspend().
+ * swsusp_arch_resume() runs with disabled interrupts.
+ */
+	.globl swsusp_arch_resume
+swsusp_arch_resume:
+	stmg	%r6,%r15,__SF_GPRS(%r15)
+	lgr	%r1,%r15
+	aghi	%r15,-STACK_FRAME_OVERHEAD
+	stg	%r1,__SF_BACKCHAIN(%r15)
+
+#ifdef CONFIG_SMP
+	/* Save boot cpu number */
+	brasl	%r14,smp_get_phys_cpu_id
+	lgr	%r10,%r2
+#endif
+	/* Deactivate DAT */
+	stnsm	__SF_EMPTY(%r15),0xfb
+
+	/* Set prefix page to zero */
+	xc	__SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
+	spx	__SF_EMPTY(%r15)
+
+	/* Restore saved image */
+	larl	%r1,restore_pblist
+	lg	%r1,0(%r1)
+	ltgr	%r1,%r1
+	jz	2f
+0:
+	lg	%r2,8(%r1)
+	lg	%r4,0(%r1)
+	lghi	%r3,PAGE_SIZE
+	lghi	%r5,PAGE_SIZE
+1:
+	mvcle	%r2,%r4,0
+	jo	1b
+	lg	%r1,16(%r1)
+	ltgr	%r1,%r1
+	jnz	0b
+2:
+	ptlb				/* flush tlb */
+
+	/* Restore registers */
+	lghi	%r13,0x1000		/* %r1 = pointer to save arae */
+
+	spt	0x328(%r13)		/* reprogram timer */
+	//sckc	0x330(%r13)		/* set clock comparator */
+
+	lctlg	%c0,%c15,0x380(%r13)	/* load control registers */
+	lam	%a0,%a15,0x340(%r13)	/* load access registers */
+
+	lfpc	0x31c(%r13)		/* load fpu control */
+	ld	0,0x200(%r13)		/* load f0 */
+	ld	1,0x208(%r13)		/* load f1 */
+	ld	2,0x210(%r13)		/* load f2 */
+	ld	3,0x218(%r13)		/* load f3 */
+	ld	4,0x220(%r13)		/* load f4 */
+	ld	5,0x228(%r13)		/* load f5 */
+	ld	6,0x230(%r13)		/* load f6 */
+	ld	7,0x238(%r13)		/* load f7 */
+	ld	8,0x240(%r13)		/* load f8 */
+	ld	9,0x248(%r13)		/* load f9 */
+	ld	10,0x250(%r13)		/* load f10 */
+	ld	11,0x258(%r13)		/* load f11 */
+	ld	12,0x260(%r13)		/* load f12 */
+	ld	13,0x268(%r13)		/* load f13 */
+	ld	14,0x270(%r13)		/* load f14 */
+	ld	15,0x278(%r13)		/* load f15 */
+
+	/* Load old stack */
+	lg	%r15,0x2f8(%r13)
+
+	/* Pointer to save area */
+	lghi	%r13,0x1000
+
+#ifdef CONFIG_SMP
+	/* Switch CPUs */
+	lgr	%r2,%r10		/* get cpu id */
+	llgf	%r3,0x318(%r13)
+	brasl	%r14,smp_switch_boot_cpu_in_resume
+#endif
+	/* Restore prefix register */
+	spx	0x318(%r13)
+
+	/* Activate DAT */
+	stosm	__SF_EMPTY(%r15),0x04
+
+	/* Return 0 */
+	lmg	%r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
+	lghi	%r2,0
+	br	%r14
diff --git a/arch/s390/power/Makefile b/arch/s390/power/Makefile
deleted file mode 100644
index ee2f279beff..00000000000
--- a/arch/s390/power/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# Makefile for s390 PM support
-#
-
-obj-$(CONFIG_HIBERNATION) += suspend.o
-obj-$(CONFIG_HIBERNATION) += swsusp.o
-obj-$(CONFIG_HIBERNATION) += swsusp_asm64.o
diff --git a/arch/s390/power/suspend.c b/arch/s390/power/suspend.c
deleted file mode 100644
index b3351eceebb..00000000000
--- a/arch/s390/power/suspend.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Suspend support specific for s390.
- *
- * Copyright IBM Corp. 2009
- *
- * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
- */
-
-#include <linux/mm.h>
-#include <linux/suspend.h>
-#include <linux/reboot.h>
-#include <linux/pfn.h>
-#include <asm/sections.h>
-#include <asm/ipl.h>
-
-/*
- * References to section boundaries
- */
-extern const void __nosave_begin, __nosave_end;
-
-/*
- *  check if given pfn is in the 'nosave' or in the read only NSS section
- */
-int pfn_is_nosave(unsigned long pfn)
-{
-	unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
-	unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end))
-					>> PAGE_SHIFT;
-	unsigned long eshared_pfn = PFN_DOWN(__pa(&_eshared)) - 1;
-	unsigned long stext_pfn = PFN_DOWN(__pa(&_stext));
-
-	if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn)
-		return 1;
-	if (pfn >= stext_pfn && pfn <= eshared_pfn) {
-		if (ipl_info.type == IPL_TYPE_NSS)
-			return 1;
-	} else if ((tprot(pfn * PAGE_SIZE) && pfn > 0))
-		return 1;
-	return 0;
-}
diff --git a/arch/s390/power/swsusp.c b/arch/s390/power/swsusp.c
deleted file mode 100644
index bd1f5c6b0b8..00000000000
--- a/arch/s390/power/swsusp.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Support for suspend and resume on s390
- *
- * Copyright IBM Corp. 2009
- *
- * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
- *
- */
-
-#include <asm/system.h>
-
-void save_processor_state(void)
-{
-	/* swsusp_arch_suspend() actually saves all cpu register contents.
-	 * Machine checks must be disabled since swsusp_arch_suspend() stores
-	 * register contents to their lowcore save areas. That's the same
-	 * place where register contents on machine checks would be saved.
-	 * To avoid register corruption disable machine checks.
-	 * We must also disable machine checks in the new psw mask for
-	 * program checks, since swsusp_arch_suspend() may generate program
-	 * checks. Disabling machine checks for all other new psw masks is
-	 * just paranoia.
-	 */
-	local_mcck_disable();
-	/* Disable lowcore protection */
-	__ctl_clear_bit(0,28);
-	S390_lowcore.external_new_psw.mask &= ~PSW_MASK_MCHECK;
-	S390_lowcore.svc_new_psw.mask &= ~PSW_MASK_MCHECK;
-	S390_lowcore.io_new_psw.mask &= ~PSW_MASK_MCHECK;
-	S390_lowcore.program_new_psw.mask &= ~PSW_MASK_MCHECK;
-}
-
-void restore_processor_state(void)
-{
-	S390_lowcore.external_new_psw.mask |= PSW_MASK_MCHECK;
-	S390_lowcore.svc_new_psw.mask |= PSW_MASK_MCHECK;
-	S390_lowcore.io_new_psw.mask |= PSW_MASK_MCHECK;
-	S390_lowcore.program_new_psw.mask |= PSW_MASK_MCHECK;
-	/* Enable lowcore protection */
-	__ctl_set_bit(0,28);
-	local_mcck_enable();
-}
diff --git a/arch/s390/power/swsusp_asm64.S b/arch/s390/power/swsusp_asm64.S
deleted file mode 100644
index b26df5c5933..00000000000
--- a/arch/s390/power/swsusp_asm64.S
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * S390 64-bit swsusp implementation
- *
- * Copyright IBM Corp. 2009
- *
- * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
- *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
- */
-
-#include <asm/page.h>
-#include <asm/ptrace.h>
-#include <asm/asm-offsets.h>
-
-/*
- * Save register context in absolute 0 lowcore and call swsusp_save() to
- * create in-memory kernel image. The context is saved in the designated
- * "store status" memory locations (see POP).
- * We return from this function twice. The first time during the suspend to
- * disk process. The second time via the swsusp_arch_resume() function
- * (see below) in the resume process.
- * This function runs with disabled interrupts.
- */
-	.section .text
-	.align	2
-	.globl swsusp_arch_suspend
-swsusp_arch_suspend:
-	stmg	%r6,%r15,__SF_GPRS(%r15)
-	lgr	%r1,%r15
-	aghi	%r15,-STACK_FRAME_OVERHEAD
-	stg	%r1,__SF_BACKCHAIN(%r15)
-
-	/* Deactivate DAT */
-	stnsm	__SF_EMPTY(%r15),0xfb
-
-	/* Store prefix register on stack */
-	stpx	__SF_EMPTY(%r15)
-
-	/* Save prefix register contents for lowcore */
-	llgf	%r4,__SF_EMPTY(%r15)
-
-	/* Get pointer to save area */
-	lghi	%r1,0x1000
-
-	/* Store registers */
-	mvc	0x318(4,%r1),__SF_EMPTY(%r15)	/* move prefix to lowcore */
-	stfpc	0x31c(%r1)			/* store fpu control */
-	std	0,0x200(%r1)			/* store f0 */
-	std	1,0x208(%r1)			/* store f1 */
-	std	2,0x210(%r1)			/* store f2 */
-	std	3,0x218(%r1)			/* store f3 */
-	std	4,0x220(%r1)			/* store f4 */
-	std	5,0x228(%r1)			/* store f5 */
-	std	6,0x230(%r1)			/* store f6 */
-	std	7,0x238(%r1)			/* store f7 */
-	std	8,0x240(%r1)			/* store f8 */
-	std	9,0x248(%r1)			/* store f9 */
-	std	10,0x250(%r1)			/* store f10 */
-	std	11,0x258(%r1)			/* store f11 */
-	std	12,0x260(%r1)			/* store f12 */
-	std	13,0x268(%r1)			/* store f13 */
-	std	14,0x270(%r1)			/* store f14 */
-	std	15,0x278(%r1)			/* store f15 */
-	stam	%a0,%a15,0x340(%r1)		/* store access registers */
-	stctg	%c0,%c15,0x380(%r1)		/* store control registers */
-	stmg	%r0,%r15,0x280(%r1)		/* store general registers */
-
-	stpt	0x328(%r1)			/* store timer */
-	stckc	0x330(%r1)			/* store clock comparator */
-
-	/* Activate DAT */
-	stosm	__SF_EMPTY(%r15),0x04
-
-	/* Set prefix page to zero */
-	xc	__SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
-	spx	__SF_EMPTY(%r15)
-
-	lghi	%r2,0
-	lghi	%r3,2*PAGE_SIZE
-	lghi	%r5,2*PAGE_SIZE
-1:	mvcle	%r2,%r4,0
-	jo	1b
-
-	/* Save image */
-	brasl	%r14,swsusp_save
-
-	/* Restore prefix register and return */
-	lghi	%r1,0x1000
-	spx	0x318(%r1)
-	lmg	%r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
-	lghi	%r2,0
-	br	%r14
-
-/*
- * Restore saved memory image to correct place and restore register context.
- * Then we return to the function that called swsusp_arch_suspend().
- * swsusp_arch_resume() runs with disabled interrupts.
- */
-	.globl swsusp_arch_resume
-swsusp_arch_resume:
-	stmg	%r6,%r15,__SF_GPRS(%r15)
-	lgr	%r1,%r15
-	aghi	%r15,-STACK_FRAME_OVERHEAD
-	stg	%r1,__SF_BACKCHAIN(%r15)
-
-#ifdef CONFIG_SMP
-	/* Save boot cpu number */
-	brasl	%r14,smp_get_phys_cpu_id
-	lgr	%r10,%r2
-#endif
-	/* Deactivate DAT */
-	stnsm	__SF_EMPTY(%r15),0xfb
-
-	/* Set prefix page to zero */
-	xc	__SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
-	spx	__SF_EMPTY(%r15)
-
-	/* Restore saved image */
-	larl	%r1,restore_pblist
-	lg	%r1,0(%r1)
-	ltgr	%r1,%r1
-	jz	2f
-0:
-	lg	%r2,8(%r1)
-	lg	%r4,0(%r1)
-	lghi	%r3,PAGE_SIZE
-	lghi	%r5,PAGE_SIZE
-1:
-	mvcle	%r2,%r4,0
-	jo	1b
-	lg	%r1,16(%r1)
-	ltgr	%r1,%r1
-	jnz	0b
-2:
-	ptlb				/* flush tlb */
-
-	/* Restore registers */
-	lghi	%r13,0x1000		/* %r1 = pointer to save arae */
-
-	spt	0x328(%r13)		/* reprogram timer */
-	//sckc	0x330(%r13)		/* set clock comparator */
-
-	lctlg	%c0,%c15,0x380(%r13)	/* load control registers */
-	lam	%a0,%a15,0x340(%r13)	/* load access registers */
-
-	lfpc	0x31c(%r13)		/* load fpu control */
-	ld	0,0x200(%r13)		/* load f0 */
-	ld	1,0x208(%r13)		/* load f1 */
-	ld	2,0x210(%r13)		/* load f2 */
-	ld	3,0x218(%r13)		/* load f3 */
-	ld	4,0x220(%r13)		/* load f4 */
-	ld	5,0x228(%r13)		/* load f5 */
-	ld	6,0x230(%r13)		/* load f6 */
-	ld	7,0x238(%r13)		/* load f7 */
-	ld	8,0x240(%r13)		/* load f8 */
-	ld	9,0x248(%r13)		/* load f9 */
-	ld	10,0x250(%r13)		/* load f10 */
-	ld	11,0x258(%r13)		/* load f11 */
-	ld	12,0x260(%r13)		/* load f12 */
-	ld	13,0x268(%r13)		/* load f13 */
-	ld	14,0x270(%r13)		/* load f14 */
-	ld	15,0x278(%r13)		/* load f15 */
-
-	/* Load old stack */
-	lg	%r15,0x2f8(%r13)
-
-	/* Pointer to save area */
-	lghi	%r13,0x1000
-
-#ifdef CONFIG_SMP
-	/* Switch CPUs */
-	lgr	%r2,%r10		/* get cpu id */
-	llgf	%r3,0x318(%r13)
-	brasl	%r14,smp_switch_boot_cpu_in_resume
-#endif
-	/* Restore prefix register */
-	spx	0x318(%r13)
-
-	/* Activate DAT */
-	stosm	__SF_EMPTY(%r15),0x04
-
-	/* Return 0 */
-	lmg	%r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
-	lghi	%r2,0
-	br	%r14
-- 
cgit v1.2.3


From 684d2fd48e718e70dad21ef7c528649578147e48 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Fri, 11 Sep 2009 10:28:40 +0200
Subject: [S390] kernel: Append scpdata to kernel boot command line

Append scpdata to the kernel boot command line. If scpdata starts
with the equal sign (=), the kernel boot command line is replaced.
(For consistency with zIPL and IPL PARM parameters.)

To use scpdata for the kernel boot command line, scpdata must consist
of ascii characters only. If scpdata contains other characters,
scpdata is not appended to the kernel boot command line.
In addition, re-IPL is extended for setting scpdata for the next
Linux reboot.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/ipl.h   |   5 +-
 arch/s390/include/asm/setup.h |   2 +-
 arch/s390/kernel/early.c      |  35 +++++++---
 arch/s390/kernel/ipl.c        | 157 ++++++++++++++++++++++++++++++++++++++----
 4 files changed, 172 insertions(+), 27 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 1171e6d144a..5e95d95450b 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -57,6 +57,8 @@ struct ipl_block_fcp {
 } __attribute__((packed));
 
 #define DIAG308_VMPARM_SIZE	64
+#define DIAG308_SCPDATA_SIZE	(PAGE_SIZE - (sizeof(struct ipl_list_hdr) + \
+				 offsetof(struct ipl_block_fcp, scp_data)))
 
 struct ipl_block_ccw {
 	u8  load_parm[8];
@@ -91,7 +93,8 @@ extern void do_halt(void);
 extern void do_poff(void);
 extern void ipl_save_parameters(void);
 extern void ipl_update_parameters(void);
-extern void get_ipl_vmparm(char *);
+extern size_t append_ipl_vmparm(char *, size_t);
+extern size_t append_ipl_scpdata(char *, size_t);
 
 enum {
 	IPL_DEVNO_VALID		= 1,
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 38b0fc221ed..e37478e8728 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -8,7 +8,7 @@
 #ifndef _ASM_S390_SETUP_H
 #define _ASM_S390_SETUP_H
 
-#define COMMAND_LINE_SIZE	1024
+#define COMMAND_LINE_SIZE	4096
 
 #define ARCH_COMMAND_LINE_SIZE	896
 
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index cae14c49951..21f3799fe27 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -81,6 +81,8 @@ asm(
 	"	br	14\n"
 	"	.size	savesys_ipl_nss, .-savesys_ipl_nss\n");
 
+static __initdata char upper_command_line[COMMAND_LINE_SIZE];
+
 static noinline __init void create_kernel_nss(void)
 {
 	unsigned int i, stext_pfn, eshared_pfn, end_pfn, min_size;
@@ -90,7 +92,6 @@ static noinline __init void create_kernel_nss(void)
 	int response;
 	size_t len;
 	char *savesys_ptr;
-	char upper_command_line[COMMAND_LINE_SIZE];
 	char defsys_cmd[DEFSYS_CMD_SIZE];
 	char savesys_cmd[SAVESYS_CMD_SIZE];
 
@@ -367,21 +368,35 @@ static __init void rescue_initrd(void)
 }
 
 /* Set up boot command line */
-static void __init setup_boot_command_line(void)
+static void __init append_to_cmdline(size_t (*ipl_data)(char *, size_t))
 {
-	char *parm = NULL;
+	char *parm, *delim;
+	size_t rc, len;
+
+	len = strlen(boot_command_line);
 
+	delim = boot_command_line + len;	/* '\0' character position */
+	parm  = boot_command_line + len + 1;	/* append right after '\0' */
+
+	rc = ipl_data(parm, COMMAND_LINE_SIZE - len - 1);
+	if (rc) {
+		if (*parm == '=')
+			memmove(boot_command_line, parm + 1, rc);
+		else
+			*delim = ' ';		/* replace '\0' with space */
+	}
+}
+
+static void __init setup_boot_command_line(void)
+{
 	/* copy arch command line */
 	strlcpy(boot_command_line, COMMAND_LINE, ARCH_COMMAND_LINE_SIZE);
 
 	/* append IPL PARM data to the boot command line */
-	if (MACHINE_IS_VM) {
-		parm = boot_command_line + strlen(boot_command_line);
-		*parm++ = ' ';
-		get_ipl_vmparm(parm);
-		if (parm[0] == '=')
-			memmove(boot_command_line, parm + 1, strlen(parm));
-	}
+	if (MACHINE_IS_VM)
+		append_to_cmdline(append_ipl_vmparm);
+
+	append_to_cmdline(append_ipl_scpdata);
 }
 
 
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 371a2d88f4a..04451a5e3c1 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -272,17 +272,18 @@ static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr,
 static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
 
 /* VM IPL PARM routines */
-static void reipl_get_ascii_vmparm(char *dest,
+size_t reipl_get_ascii_vmparm(char *dest, size_t size,
 				   const struct ipl_parameter_block *ipb)
 {
 	int i;
-	int len = 0;
+	size_t len;
 	char has_lowercase = 0;
 
+	len = 0;
 	if ((ipb->ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) &&
 	    (ipb->ipl_info.ccw.vm_parm_len > 0)) {
 
-		len = ipb->ipl_info.ccw.vm_parm_len;
+		len = min_t(size_t, size - 1, ipb->ipl_info.ccw.vm_parm_len);
 		memcpy(dest, ipb->ipl_info.ccw.vm_parm, len);
 		/* If at least one character is lowercase, we assume mixed
 		 * case; otherwise we convert everything to lowercase.
@@ -299,14 +300,20 @@ static void reipl_get_ascii_vmparm(char *dest,
 		EBCASC(dest, len);
 	}
 	dest[len] = 0;
+
+	return len;
 }
 
-void get_ipl_vmparm(char *dest)
+size_t append_ipl_vmparm(char *dest, size_t size)
 {
+	size_t rc;
+
+	rc = 0;
 	if (diag308_set_works && (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW))
-		reipl_get_ascii_vmparm(dest, &ipl_block);
+		rc = reipl_get_ascii_vmparm(dest, size, &ipl_block);
 	else
 		dest[0] = 0;
+	return rc;
 }
 
 static ssize_t ipl_vm_parm_show(struct kobject *kobj,
@@ -314,10 +321,56 @@ static ssize_t ipl_vm_parm_show(struct kobject *kobj,
 {
 	char parm[DIAG308_VMPARM_SIZE + 1] = {};
 
-	get_ipl_vmparm(parm);
+	append_ipl_vmparm(parm, sizeof(parm));
 	return sprintf(page, "%s\n", parm);
 }
 
+static size_t scpdata_length(const char* buf, size_t count)
+{
+	while (count) {
+		if (buf[count - 1] != '\0' && buf[count - 1] != ' ')
+			break;
+		count--;
+	}
+	return count;
+}
+
+size_t reipl_append_ascii_scpdata(char *dest, size_t size,
+				  const struct ipl_parameter_block *ipb)
+{
+	size_t count;
+	size_t i;
+
+	count = min(size - 1, scpdata_length(ipb->ipl_info.fcp.scp_data,
+					     ipb->ipl_info.fcp.scp_data_len));
+	if (!count)
+		goto out;
+
+	for (i = 0; i < count; i++)
+		if (!isascii(ipb->ipl_info.fcp.scp_data[i])) {
+			count = 0;
+			goto out;
+		}
+
+	memcpy(dest, ipb->ipl_info.fcp.scp_data, count);
+out:
+	dest[count] = '\0';
+	return count;
+}
+
+size_t append_ipl_scpdata(char *dest, size_t len)
+{
+	size_t rc;
+
+	rc = 0;
+	if (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP)
+		rc = reipl_append_ascii_scpdata(dest, len, &ipl_block);
+	else
+		dest[0] = 0;
+	return rc;
+}
+
+
 static struct kobj_attribute sys_ipl_vm_parm_attr =
 	__ATTR(parm, S_IRUGO, ipl_vm_parm_show, NULL);
 
@@ -553,7 +606,7 @@ static ssize_t reipl_generic_vmparm_show(struct ipl_parameter_block *ipb,
 {
 	char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
 
-	reipl_get_ascii_vmparm(vmparm, ipb);
+	reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
 	return sprintf(page, "%s\n", vmparm);
 }
 
@@ -626,6 +679,59 @@ static struct kobj_attribute sys_reipl_ccw_vmparm_attr =
 
 /* FCP reipl device attributes */
 
+static ssize_t reipl_fcp_scpdata_read(struct kobject *kobj,
+				      struct bin_attribute *attr,
+				      char *buf, loff_t off, size_t count)
+{
+	size_t size = reipl_block_fcp->ipl_info.fcp.scp_data_len;
+	void *scp_data = reipl_block_fcp->ipl_info.fcp.scp_data;
+
+	return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+
+static ssize_t reipl_fcp_scpdata_write(struct kobject *kobj,
+				       struct bin_attribute *attr,
+				       char *buf, loff_t off, size_t count)
+{
+	size_t padding;
+	size_t scpdata_len;
+
+	if (off < 0)
+		return -EINVAL;
+
+	if (off >= DIAG308_SCPDATA_SIZE)
+		return -ENOSPC;
+
+	if (count > DIAG308_SCPDATA_SIZE - off)
+		count = DIAG308_SCPDATA_SIZE - off;
+
+	memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf + off, count);
+	scpdata_len = off + count;
+
+	if (scpdata_len % 8) {
+		padding = 8 - (scpdata_len % 8);
+		memset(reipl_block_fcp->ipl_info.fcp.scp_data + scpdata_len,
+		       0, padding);
+		scpdata_len += padding;
+	}
+
+	reipl_block_fcp->ipl_info.fcp.scp_data_len = scpdata_len;
+	reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN + scpdata_len;
+	reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN + scpdata_len;
+
+	return count;
+}
+
+static struct bin_attribute sys_reipl_fcp_scp_data_attr = {
+	.attr = {
+		.name = "scp_data",
+		.mode = S_IRUGO | S_IWUSR,
+	},
+	.size = PAGE_SIZE,
+	.read = reipl_fcp_scpdata_read,
+	.write = reipl_fcp_scpdata_write,
+};
+
 DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%016llx\n",
 		   reipl_block_fcp->ipl_info.fcp.wwpn);
 DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%016llx\n",
@@ -647,7 +753,6 @@ static struct attribute *reipl_fcp_attrs[] = {
 };
 
 static struct attribute_group reipl_fcp_attr_group = {
-	.name  = IPL_FCP_STR,
 	.attrs = reipl_fcp_attrs,
 };
 
@@ -895,6 +1000,7 @@ static struct kobj_attribute reipl_type_attr =
 	__ATTR(reipl_type, 0644, reipl_type_show, reipl_type_store);
 
 static struct kset *reipl_kset;
+static struct kset *reipl_fcp_kset;
 
 static void get_ipl_string(char *dst, struct ipl_parameter_block *ipb,
 			   const enum ipl_method m)
@@ -906,7 +1012,7 @@ static void get_ipl_string(char *dst, struct ipl_parameter_block *ipb,
 
 	reipl_get_ascii_loadparm(loadparm, ipb);
 	reipl_get_ascii_nss_name(nss_name, ipb);
-	reipl_get_ascii_vmparm(vmparm, ipb);
+	reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
 
 	switch (m) {
 	case REIPL_METHOD_CCW_VM:
@@ -1076,23 +1182,44 @@ static int __init reipl_fcp_init(void)
 	int rc;
 
 	if (!diag308_set_works) {
-		if (ipl_info.type == IPL_TYPE_FCP)
+		if (ipl_info.type == IPL_TYPE_FCP) {
 			make_attrs_ro(reipl_fcp_attrs);
-		else
+			sys_reipl_fcp_scp_data_attr.attr.mode = S_IRUGO;
+		} else
 			return 0;
 	}
 
 	reipl_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
 	if (!reipl_block_fcp)
 		return -ENOMEM;
-	rc = sysfs_create_group(&reipl_kset->kobj, &reipl_fcp_attr_group);
+
+	/* sysfs: create fcp kset for mixing attr group and bin attrs */
+	reipl_fcp_kset = kset_create_and_add(IPL_FCP_STR, NULL,
+					     &reipl_kset->kobj);
+	if (!reipl_kset) {
+		free_page((unsigned long) reipl_block_fcp);
+		return -ENOMEM;
+	}
+
+	rc = sysfs_create_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
 	if (rc) {
-		free_page((unsigned long)reipl_block_fcp);
+		kset_unregister(reipl_fcp_kset);
+		free_page((unsigned long) reipl_block_fcp);
 		return rc;
 	}
-	if (ipl_info.type == IPL_TYPE_FCP) {
+
+	rc = sysfs_create_bin_file(&reipl_fcp_kset->kobj,
+				   &sys_reipl_fcp_scp_data_attr);
+	if (rc) {
+		sysfs_remove_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
+		kset_unregister(reipl_fcp_kset);
+		free_page((unsigned long) reipl_block_fcp);
+		return rc;
+	}
+
+	if (ipl_info.type == IPL_TYPE_FCP)
 		memcpy(reipl_block_fcp, IPL_PARMBLOCK_START, PAGE_SIZE);
-	} else {
+	else {
 		reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN;
 		reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
 		reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN;
-- 
cgit v1.2.3


From 18d00acfe2f3fc5ee62f679eb2e397ae962fe69b Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Fri, 11 Sep 2009 10:28:41 +0200
Subject: [S390] kernel: Convert upper case scpdata to lower case

If the CP SET LOADDEV on the 3215 console has been used to specify
SCPdata, all data is converted to upper case letters.

When scpdata contains upper case letters only, convert all letters
to lower case.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/ipl.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 04451a5e3c1..ee57a42e6e9 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -340,19 +340,28 @@ size_t reipl_append_ascii_scpdata(char *dest, size_t size,
 {
 	size_t count;
 	size_t i;
+	int has_lowercase;
 
 	count = min(size - 1, scpdata_length(ipb->ipl_info.fcp.scp_data,
 					     ipb->ipl_info.fcp.scp_data_len));
 	if (!count)
 		goto out;
 
-	for (i = 0; i < count; i++)
+	has_lowercase = 0;
+	for (i = 0; i < count; i++) {
 		if (!isascii(ipb->ipl_info.fcp.scp_data[i])) {
 			count = 0;
 			goto out;
 		}
+		if (!has_lowercase && islower(ipb->ipl_info.fcp.scp_data[i]))
+			has_lowercase = 1;
+	}
 
-	memcpy(dest, ipb->ipl_info.fcp.scp_data, count);
+	if (has_lowercase)
+		memcpy(dest, ipb->ipl_info.fcp.scp_data, count);
+	else
+		for (i = 0; i < count; i++)
+			dest[i] = tolower(ipb->ipl_info.fcp.scp_data[i]);
 out:
 	dest[count] = '\0';
 	return count;
-- 
cgit v1.2.3


From a8c3cb4955d1a051732ead9ecf8bcffc8e7c039d Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Fri, 11 Sep 2009 10:28:42 +0200
Subject: [S390] move (io|sysc)_restore_trace_psw into .data section

The sysc_restore_trace_psw and io_restore_trace_psw storage locations
are created in the .text section. When creating and IPLing from a named
saved system (NSS), writing to these locations causes a protection exception
(because the .text section is mapped as shared read-only in the NSS).

To permit write access, move the storage locations into the .data section.

The problem occurs only when CONFIG_TRACE_IRQFLAGS is set.
The git commmit that has introduced these variables is:
411788ea7fca01ee803af8225ac35807b4d02050

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/entry.S   | 16 ++++++++++++++--
 arch/s390/kernel/entry64.S |  4 ++++
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index c4c80a22bc1..f78580a7403 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -278,7 +278,8 @@ sysc_return:
 	bnz	BASED(sysc_work)  # there is work to do (signals etc.)
 sysc_restore:
 #ifdef CONFIG_TRACE_IRQFLAGS
-	la	%r1,BASED(sysc_restore_trace_psw)
+	la	%r1,BASED(sysc_restore_trace_psw_addr)
+	l	%r1,0(%r1)
 	lpsw	0(%r1)
 sysc_restore_trace:
 	TRACE_IRQS_CHECK
@@ -289,10 +290,15 @@ sysc_leave:
 sysc_done:
 
 #ifdef CONFIG_TRACE_IRQFLAGS
+sysc_restore_trace_psw_addr:
+	.long sysc_restore_trace_psw
+
+	.section .data,"aw",@progbits
 	.align	8
 	.globl	sysc_restore_trace_psw
 sysc_restore_trace_psw:
 	.long	0, sysc_restore_trace + 0x80000000
+	.previous
 #endif
 
 #
@@ -606,7 +612,8 @@ io_return:
 	bnz	BASED(io_work)		# there is work to do (signals etc.)
 io_restore:
 #ifdef CONFIG_TRACE_IRQFLAGS
-	la	%r1,BASED(io_restore_trace_psw)
+	la	%r1,BASED(io_restore_trace_psw_addr)
+	l	%r1,0(%r1)
 	lpsw	0(%r1)
 io_restore_trace:
 	TRACE_IRQS_CHECK
@@ -617,10 +624,15 @@ io_leave:
 io_done:
 
 #ifdef CONFIG_TRACE_IRQFLAGS
+io_restore_trace_psw_addr:
+	.long io_restore_trace_psw
+
+	.section .data,"aw",@progbits
 	.align	8
 	.globl	io_restore_trace_psw
 io_restore_trace_psw:
 	.long	0, io_restore_trace + 0x80000000
+	.previous
 #endif
 
 #
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index f6618e9e15e..009ca6175db 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -284,10 +284,12 @@ sysc_leave:
 sysc_done:
 
 #ifdef CONFIG_TRACE_IRQFLAGS
+	.section .data,"aw",@progbits
 	.align	8
 	.globl sysc_restore_trace_psw
 sysc_restore_trace_psw:
 	.quad	0, sysc_restore_trace
+	.previous
 #endif
 
 #
@@ -595,10 +597,12 @@ io_leave:
 io_done:
 
 #ifdef CONFIG_TRACE_IRQFLAGS
+	.section .data,"aw",@progbits
 	.align	8
 	.globl io_restore_trace_psw
 io_restore_trace_psw:
 	.quad	0, io_restore_trace
+	.previous
 #endif
 
 #
-- 
cgit v1.2.3


From 26803144666bd2155a19392fa58a7d512d9c0962 Mon Sep 17 00:00:00 2001
From: Tim Abbott <tabbott@ksplice.com>
Date: Fri, 11 Sep 2009 10:28:43 +0200
Subject: [S390] Use macros for .data.page_aligned.

.data.page_aligned should not need a separate output section, so as
part of this cleanup I moved into the .data output section in the
linker scripts in order to eliminate unnecessary references to the
section name.

Remove the reference to .data.idt, since nothing is put into the
.data.idt section on the s390 architecture.  It looks like Cyrill
Gorcunov posted a patch to remove the .data.idt code on s390
previously:

<http://lkml.indiana.edu/hypermail/linux/kernel/0802.2/2536.html>

CCing him and the people who acked that patch in case there's a reason
it wasn't applied.

Signed-off-by: Tim Abbott <tabbott@ksplice.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/vmlinux.lds.S | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index a53db23ee09..8e023c8c246 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -60,6 +60,7 @@ SECTIONS
 	} :data
 
 	.data : {		/* Data */
+		PAGE_ALIGNED_DATA(PAGE_SIZE)
 		DATA_DATA
 		CONSTRUCTORS
 	}
@@ -72,11 +73,6 @@ SECTIONS
 	. = ALIGN(PAGE_SIZE);
 	__nosave_end = .;
 
-	. = ALIGN(PAGE_SIZE);
-	.data.page_aligned : {
-		*(.data.idt)
-	}
-
 	. = ALIGN(0x100);
 	.data.cacheline_aligned : {
 		*(.data.cacheline_aligned)
-- 
cgit v1.2.3


From 04a95f6df9d7753098729ef1464e38552627af9d Mon Sep 17 00:00:00 2001
From: Nelson Elhage <nelhage@ksplice.com>
Date: Fri, 11 Sep 2009 10:28:44 +0200
Subject: [S390] clean up linker script using new linker script macros.

Note that this patch moves .data.init_task inside _edata.  In
addition, the alignment of .init.ramfs changes: It is now PAGE_ALIGNED
and __initramfs_end is arbitrarily aligned; Previously it was
only aligned to a 0x100-byte boundary, and always ended on an even
byte.

This change results in fewer output sections and in some data being
reordered, but should have no functional effect.

Signed-off-by: Nelson Elhage <nelhage@ksplice.com>
Signed-off-by: Tim Abbott <tabbott@ksplice.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: linux-s390@vger.kernel.org
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/vmlinux.lds.S | 83 ++++--------------------------------------
 1 file changed, 7 insertions(+), 76 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 8e023c8c246..7315f9e67e1 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -52,51 +52,18 @@ SECTIONS
 	. = ALIGN(PAGE_SIZE);
 	_eshared = .;		/* End of shareable data */
 
-	. = ALIGN(16);		/* Exception table */
-	__ex_table : {
-		__start___ex_table = .;
-		*(__ex_table)
-		__stop___ex_table = .;
-	} :data
-
-	.data : {		/* Data */
-		PAGE_ALIGNED_DATA(PAGE_SIZE)
-		DATA_DATA
-		CONSTRUCTORS
-	}
-
-	. = ALIGN(PAGE_SIZE);
-	.data_nosave : {
-	__nosave_begin = .;
-		*(.data.nosave)
-	}
-	. = ALIGN(PAGE_SIZE);
-	__nosave_end = .;
+	EXCEPTION_TABLE(16) :data
 
-	. = ALIGN(0x100);
-	.data.cacheline_aligned : {
-		*(.data.cacheline_aligned)
-	}
+	RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE)
 
-	. = ALIGN(0x100);
-	.data.read_mostly : {
-		*(.data.read_mostly)
-	}
 	_edata = .;		/* End of data section */
 
-	. = ALIGN(THREAD_SIZE);	/* init_task */
-	.data.init_task : {
-		*(.data.init_task)
-	}
-
 	/* will be freed after init */
 	. = ALIGN(PAGE_SIZE);	/* Init code and data */
 	__init_begin = .;
-	.init.text : {
-		_sinittext = .;
-		INIT_TEXT
-		_einittext = .;
-	}
+
+	INIT_TEXT_SECTION(PAGE_SIZE)
+
 	/*
 	 * .exit.text is discarded at runtime, not link time,
 	 * to deal with references from __bug_table
@@ -107,49 +74,13 @@ SECTIONS
 
 	/* early.c uses stsi, which requires page aligned data. */
 	. = ALIGN(PAGE_SIZE);
-	.init.data : {
-		INIT_DATA
-	}
-	. = ALIGN(0x100);
-	.init.setup : {
-		__setup_start = .;
-		*(.init.setup)
-		__setup_end = .;
-	}
-	.initcall.init : {
-		__initcall_start = .;
-		INITCALLS
-		__initcall_end = .;
-	}
-
-	.con_initcall.init : {
-		__con_initcall_start = .;
-		*(.con_initcall.init)
-		__con_initcall_end = .;
-	}
-	SECURITY_INIT
-
-#ifdef CONFIG_BLK_DEV_INITRD
-	. = ALIGN(0x100);
-	.init.ramfs : {
-		__initramfs_start = .;
-		*(.init.ramfs)
-		. = ALIGN(2);
-		__initramfs_end = .;
-	}
-#endif
+	INIT_DATA_SECTION(0x100)
 
 	PERCPU(PAGE_SIZE)
 	. = ALIGN(PAGE_SIZE);
 	__init_end = .;		/* freed after init ends here */
 
-	/* BSS */
-	.bss : {
-		__bss_start = .;
-		*(.bss)
-		. = ALIGN(2);
-		__bss_stop = .;
-	}
+	BSS_SECTION(0, 2, 0)
 
 	_end = . ;
 
-- 
cgit v1.2.3


From d3135e0c40c9a13ad0c57783f2b9569c4c00bd26 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 11 Sep 2009 10:28:45 +0200
Subject: [S390] kernel: always keep machine flags in lowcore

Eleminate the local variable machine_flags and always change machine
flags directly in the lowcore.
This avoids confusion about when and why the two variables have to be
synchronized.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/early.c | 28 +++++++++++-----------------
 1 file changed, 11 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 21f3799fe27..734deeaa736 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -35,8 +35,6 @@
 
 char kernel_nss_name[NSS_NAME_SIZE + 1];
 
-static unsigned long machine_flags;
-
 static void __init setup_boot_command_line(void);
 
 /*
@@ -206,12 +204,9 @@ static noinline __init void detect_machine_type(void)
 
 	/* Running under KVM? If not we assume z/VM */
 	if (!memcmp(vmms.vm[0].cpi, "\xd2\xe5\xd4", 3))
-		machine_flags |= MACHINE_FLAG_KVM;
+		S390_lowcore.machine_flags |= MACHINE_FLAG_KVM;
 	else
-		machine_flags |= MACHINE_FLAG_VM;
-
-	/* Store machine flags for setting up lowcore early */
-	S390_lowcore.machine_flags = machine_flags;
+		S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
 }
 
 static __init void early_pgm_check_handler(void)
@@ -246,7 +241,7 @@ static noinline __init void setup_hpage(void)
 	facilities = stfl();
 	if (!(facilities & (1UL << 23)) || !(facilities & (1UL << 29)))
 		return;
-	machine_flags |= MACHINE_FLAG_HPAGE;
+	S390_lowcore.machine_flags |= MACHINE_FLAG_HPAGE;
 	__ctl_set_bit(0, 23);
 #endif
 }
@@ -264,7 +259,7 @@ static __init void detect_mvpg(void)
 		EX_TABLE(0b,1b)
 		: "=d" (rc) : "0" (-EOPNOTSUPP), "a" (0) : "memory", "cc", "0");
 	if (!rc)
-		machine_flags |= MACHINE_FLAG_MVPG;
+		S390_lowcore.machine_flags |= MACHINE_FLAG_MVPG;
 #endif
 }
 
@@ -280,7 +275,7 @@ static __init void detect_ieee(void)
 		EX_TABLE(0b,1b)
 		: "=d" (rc), "=d" (tmp): "0" (-EOPNOTSUPP) : "cc");
 	if (!rc)
-		machine_flags |= MACHINE_FLAG_IEEE;
+		S390_lowcore.machine_flags |= MACHINE_FLAG_IEEE;
 #endif
 }
 
@@ -299,7 +294,7 @@ static __init void detect_csp(void)
 		EX_TABLE(0b,1b)
 		: "=d" (rc) : "0" (-EOPNOTSUPP) : "cc", "0", "1", "2");
 	if (!rc)
-		machine_flags |= MACHINE_FLAG_CSP;
+		S390_lowcore.machine_flags |= MACHINE_FLAG_CSP;
 #endif
 }
 
@@ -316,7 +311,7 @@ static __init void detect_diag9c(void)
 		EX_TABLE(0b,1b)
 		: "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc");
 	if (!rc)
-		machine_flags |= MACHINE_FLAG_DIAG9C;
+		S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C;
 }
 
 static __init void detect_diag44(void)
@@ -331,7 +326,7 @@ static __init void detect_diag44(void)
 		EX_TABLE(0b,1b)
 		: "=d" (rc) : "0" (-EOPNOTSUPP) : "cc");
 	if (!rc)
-		machine_flags |= MACHINE_FLAG_DIAG44;
+		S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44;
 #endif
 }
 
@@ -342,11 +337,11 @@ static __init void detect_machine_facilities(void)
 
 	facilities = stfl();
 	if (facilities & (1 << 28))
-		machine_flags |= MACHINE_FLAG_IDTE;
+		S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE;
 	if (facilities & (1 << 23))
-		machine_flags |= MACHINE_FLAG_PFMF;
+		S390_lowcore.machine_flags |= MACHINE_FLAG_PFMF;
 	if (facilities & (1 << 4))
-		machine_flags |= MACHINE_FLAG_MVCOS;
+		S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS;
 #endif
 }
 
@@ -428,7 +423,6 @@ void __init startup_init(void)
 	setup_hpage();
 	sclp_facilities_detect();
 	detect_memory_layout(memory_chunk);
-	S390_lowcore.machine_flags = machine_flags;
 #ifdef CONFIG_DYNAMIC_FTRACE
 	S390_lowcore.ftrace_func = (unsigned long)ftrace_caller;
 #endif
-- 
cgit v1.2.3


From 275c340941991a925969c03ec6b900fd135d09dd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 11 Sep 2009 10:28:46 +0200
Subject: [S390] remove unused irq_cpustat_t defintion

No need to defined a irq_cpustat_t type if __ARCH_IRQ_STAT is defined.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/hardirq.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
index 89ec7056da2..498bc389238 100644
--- a/arch/s390/include/asm/hardirq.h
+++ b/arch/s390/include/asm/hardirq.h
@@ -18,13 +18,6 @@
 #include <linux/interrupt.h>
 #include <asm/lowcore.h>
 
-/* irq_cpustat_t is unused currently, but could be converted
- * into a percpu variable instead of storing softirq_pending
- * on the lowcore */
-typedef struct {
-	unsigned int __softirq_pending;
-} irq_cpustat_t;
-
 #define local_softirq_pending() (S390_lowcore.softirq_pending)
 
 #define __ARCH_IRQ_STAT
-- 
cgit v1.2.3


From 2395ecd98f028b16a6200eb81108a0f67461d16b Mon Sep 17 00:00:00 2001
From: Vitaliy Gusev <vgusev@openvz.org>
Date: Fri, 11 Sep 2009 10:28:48 +0200
Subject: [S390] hypfs: remove useless variable qname

Local variable 'qname' in the function hypfs_create_file() really is not
used for any purpose.

Signed-off-by: Vitaliy Gusev <vgusev@openvz.org>
Cc: Michael Holzheu <holzheu@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/hypfs/inode.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 5a805df216b..e760b5b0141 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -355,11 +355,7 @@ static struct dentry *hypfs_create_file(struct super_block *sb,
 {
 	struct dentry *dentry;
 	struct inode *inode;
-	struct qstr qname;
 
-	qname.name = name;
-	qname.len = strlen(name);
-	qname.hash = full_name_hash(name, qname.len);
 	mutex_lock(&parent->d_inode->i_mutex);
 	dentry = lookup_one_len(name, parent, strlen(name));
 	if (IS_ERR(dentry)) {
-- 
cgit v1.2.3


From 11af97e18ea8c310d73b59f1361e6f04444e05a8 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Fri, 11 Sep 2009 10:28:53 +0200
Subject: [S390] kernel: Print an error message if kernel NSS cannot be defined

If a named saved system (NSS) cannot be defined or saved, print out an
error message with the return code of the underlying z/VM CP command.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/early.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 734deeaa736..bf8b4ae7ff2 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -6,6 +6,9 @@
  *		 Heiko Carstens <heiko.carstens@de.ibm.com>
  */
 
+#define KMSG_COMPONENT "setup"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/compiler.h>
 #include <linux/init.h>
 #include <linux/errno.h>
@@ -16,6 +19,7 @@
 #include <linux/module.h>
 #include <linux/pfn.h>
 #include <linux/uaccess.h>
+#include <linux/kernel.h>
 #include <asm/ebcdic.h>
 #include <asm/ipl.h>
 #include <asm/lowcore.h>
@@ -140,6 +144,8 @@ static noinline __init void create_kernel_nss(void)
 	__cpcmd(defsys_cmd, NULL, 0, &response);
 
 	if (response != 0) {
+		pr_err("Defining the Linux kernel NSS failed with rc=%d\n",
+			response);
 		kernel_nss_name[0] = '\0';
 		return;
 	}
@@ -152,8 +158,11 @@ static noinline __init void create_kernel_nss(void)
 	 *	       max SAVESYS_CMD_SIZE
 	 * On error: response contains the numeric portion of cp error message.
 	 *	     for SAVESYS it will be >= 263
+	 *	     for missing privilege class, it will be 1
 	 */
-	if (response > SAVESYS_CMD_SIZE) {
+	if (response > SAVESYS_CMD_SIZE || response == 1) {
+		pr_err("Saving the Linux kernel NSS failed with rc=%d\n",
+			response);
 		kernel_nss_name[0] = '\0';
 		return;
 	}
-- 
cgit v1.2.3


From ad2a5d8e0b518f997af126dd737127bdada90a6f Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Fri, 11 Sep 2009 10:28:54 +0200
Subject: [S390] hypfs: Use "%u" instead of "%d" for unsigned ints in snprintf

For printing unsigned integers hypfs uses "%d" in snprintf(). This is wrong.
With this patch "%u" is used instead.

Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/hypfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index e760b5b0141..bd9914b8948 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -422,7 +422,7 @@ struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir,
 	char tmp[TMP_SIZE];
 	struct dentry *dentry;
 
-	snprintf(tmp, TMP_SIZE, "%lld\n", (unsigned long long int)value);
+	snprintf(tmp, TMP_SIZE, "%llu\n", (unsigned long long int)value);
 	buffer = kstrdup(tmp, GFP_KERNEL);
 	if (!buffer)
 		return ERR_PTR(-ENOMEM);
-- 
cgit v1.2.3


From c4de0c1a18237c2727dde8ad392e333539b0af3c Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Fri, 11 Sep 2009 10:28:56 +0200
Subject: [S390] kvm: use console_initcall() to initialize s390 virtio console

Use a console_initcall() to initialize the s390 virtio console and
clean up s390 console initialization in setup.c.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/kvm_virtio.h | 10 ----------
 arch/s390/kernel/setup.c           | 10 +++-------
 2 files changed, 3 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/kvm_virtio.h b/arch/s390/include/asm/kvm_virtio.h
index 0503936f101..acdfdff2661 100644
--- a/arch/s390/include/asm/kvm_virtio.h
+++ b/arch/s390/include/asm/kvm_virtio.h
@@ -54,14 +54,4 @@ struct kvm_vqconfig {
  * This is pagesize for historical reasons. */
 #define KVM_S390_VIRTIO_RING_ALIGN	4096
 
-#ifdef __KERNEL__
-/* early virtio console setup */
-#ifdef CONFIG_S390_GUEST
-extern void s390_virtio_console_init(void);
-#else
-static inline void s390_virtio_console_init(void)
-{
-}
-#endif /* CONFIG_VIRTIO_CONSOLE */
-#endif /* __KERNEL__ */
 #endif
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index cbb897bc50b..9ed13a1ed37 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -156,15 +156,11 @@ __setup("condev=", condev_setup);
 
 static void __init set_preferred_console(void)
 {
-	if (MACHINE_IS_KVM) {
+	if (MACHINE_IS_KVM)
 		add_preferred_console("hvc", 0, NULL);
-		s390_virtio_console_init();
-		return;
-	}
-
-	if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
+	else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
 		add_preferred_console("ttyS", 0, NULL);
-	if (CONSOLE_IS_3270)
+	else if (CONSOLE_IS_3270)
 		add_preferred_console("tty3270", 0, NULL);
 }
 
-- 
cgit v1.2.3


From 50aa98bad056a17655864a4d71ebc32d95c629a7 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 11 Sep 2009 10:28:57 +0200
Subject: [S390] fix recursive locking on page_table_lock

Suzuki Poulose reported the following recursive locking bug on s390:

Here is the stack trace : (see Appendix I for more info)

  [<0000000000406ed6>] _spin_lock+0x52/0x94
  [<0000000000103bde>] crst_table_free+0x14e/0x1a4
  [<00000000001ba684>] __pmd_alloc+0x114/0x1ec
  [<00000000001be8d0>] handle_mm_fault+0x2cc/0xb80
  [<0000000000407d62>] do_dat_exception+0x2b6/0x3a0
  [<0000000000114f8c>] sysc_return+0x0/0x8
  [<00000200001642b2>] 0x200001642b2

The page_table_lock is already acquired in __pmd_alloc (mm/memory.c) and
it tries to populate the pud/pgd with a new pmd allocated. If another
thread populates it before we get a chance, we free the pmd using
pmd_free().

On s390x, pmd_free(even pud_free ) is #defined to crst_table_free(),
which acquires the page_table_lock to protect the crst_table index updates.

Hence this ends up in a recursive locking of the page_table_lock.

The solution suggested by Dave Hansen is to use a new spin lock in the mmu
context to protect the access to the crst_list and the pgtable_list.

Reported-by: Suzuki Poulose <suzuki@in.ibm.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/mmu.h     |  1 +
 arch/s390/include/asm/pgalloc.h |  1 +
 arch/s390/mm/pgtable.c          | 24 ++++++++++++------------
 arch/s390/mm/vmem.c             |  1 +
 4 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 3b59216e628..03be99919d6 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -2,6 +2,7 @@
 #define __MMU_H
 
 typedef struct {
+	spinlock_t list_lock;
 	struct list_head crst_list;
 	struct list_head pgtable_list;
 	unsigned long asce_bits;
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index b2658b9220f..ddad5903341 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -140,6 +140,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
+	spin_lock_init(&mm->context.list_lock);
 	INIT_LIST_HEAD(&mm->context.crst_list);
 	INIT_LIST_HEAD(&mm->context.pgtable_list);
 	return (pgd_t *) crst_table_alloc(mm, s390_noexec);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 56566720798..c7021524707 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -78,9 +78,9 @@ unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
 		}
 		page->index = page_to_phys(shadow);
 	}
-	spin_lock(&mm->page_table_lock);
+	spin_lock(&mm->context.list_lock);
 	list_add(&page->lru, &mm->context.crst_list);
-	spin_unlock(&mm->page_table_lock);
+	spin_unlock(&mm->context.list_lock);
 	return (unsigned long *) page_to_phys(page);
 }
 
@@ -89,9 +89,9 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table)
 	unsigned long *shadow = get_shadow_table(table);
 	struct page *page = virt_to_page(table);
 
-	spin_lock(&mm->page_table_lock);
+	spin_lock(&mm->context.list_lock);
 	list_del(&page->lru);
-	spin_unlock(&mm->page_table_lock);
+	spin_unlock(&mm->context.list_lock);
 	if (shadow)
 		free_pages((unsigned long) shadow, ALLOC_ORDER);
 	free_pages((unsigned long) table, ALLOC_ORDER);
@@ -182,7 +182,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 	unsigned long bits;
 
 	bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
-	spin_lock(&mm->page_table_lock);
+	spin_lock(&mm->context.list_lock);
 	page = NULL;
 	if (!list_empty(&mm->context.pgtable_list)) {
 		page = list_first_entry(&mm->context.pgtable_list,
@@ -191,7 +191,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 			page = NULL;
 	}
 	if (!page) {
-		spin_unlock(&mm->page_table_lock);
+		spin_unlock(&mm->context.list_lock);
 		page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
 		if (!page)
 			return NULL;
@@ -202,7 +202,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 			clear_table_pgstes(table);
 		else
 			clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
-		spin_lock(&mm->page_table_lock);
+		spin_lock(&mm->context.list_lock);
 		list_add(&page->lru, &mm->context.pgtable_list);
 	}
 	table = (unsigned long *) page_to_phys(page);
@@ -213,7 +213,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 	page->flags |= bits;
 	if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
 		list_move_tail(&page->lru, &mm->context.pgtable_list);
-	spin_unlock(&mm->page_table_lock);
+	spin_unlock(&mm->context.list_lock);
 	return table;
 }
 
@@ -225,7 +225,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
 	bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
 	bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	spin_lock(&mm->page_table_lock);
+	spin_lock(&mm->context.list_lock);
 	page->flags ^= bits;
 	if (page->flags & FRAG_MASK) {
 		/* Page now has some free pgtable fragments. */
@@ -234,7 +234,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
 	} else
 		/* All fragments of the 4K page have been freed. */
 		list_del(&page->lru);
-	spin_unlock(&mm->page_table_lock);
+	spin_unlock(&mm->context.list_lock);
 	if (page) {
 		pgtable_page_dtor(page);
 		__free_page(page);
@@ -245,7 +245,7 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
 {
 	struct page *page;
 
-	spin_lock(&mm->page_table_lock);
+	spin_lock(&mm->context.list_lock);
 	/* Free shadow region and segment tables. */
 	list_for_each_entry(page, &mm->context.crst_list, lru)
 		if (page->index) {
@@ -255,7 +255,7 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
 	/* "Free" second halves of page tables. */
 	list_for_each_entry(page, &mm->context.pgtable_list, lru)
 		page->flags &= ~SECOND_HALVES;
-	spin_unlock(&mm->page_table_lock);
+	spin_unlock(&mm->context.list_lock);
 	mm->context.noexec = 0;
 	update_mm(mm, tsk);
 }
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index e4868bfc672..5f91a38d759 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -331,6 +331,7 @@ void __init vmem_map_init(void)
 	unsigned long start, end;
 	int i;
 
+	spin_lock_init(&init_mm.context.list_lock);
 	INIT_LIST_HEAD(&init_mm.context.crst_list);
 	INIT_LIST_HEAD(&init_mm.context.pgtable_list);
 	init_mm.context.noexec = 0;
-- 
cgit v1.2.3


From 0c88ee5b7523e76e290d558c28cd0be48ffad597 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 11 Sep 2009 10:28:58 +0200
Subject: [S390] Initialize __LC_THREAD_INFO early.

"lockdep: Fix backtraces" reveales a bug in early setup code: when
lockdep tries to save a stack backtrace before setup_arch has been
called the lowcore pointer for the current thread info pointer isn't
initialized yet.
However our save stack backtrace code relies on it. If the pointer
isn't initialized the saved backtrace will have zero entries.
lockdep however relies (correctly) on the fact that that cannot
happen.
A write access to some random memory region is the result.

Fix this by initializing the thread info pointer early.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/head31.S | 1 +
 arch/s390/kernel/head64.S | 1 +
 2 files changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S
index 2ced846065b..602b508cd4c 100644
--- a/arch/s390/kernel/head31.S
+++ b/arch/s390/kernel/head31.S
@@ -24,6 +24,7 @@ startup_continue:
 # Setup stack
 #
 	l	%r15,.Linittu-.LPG1(%r13)
+	st	%r15,__LC_THREAD_INFO	# cache thread info in lowcore
 	mvc	__LC_CURRENT(4),__TI_task(%r15)
 	ahi	%r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union+THREAD_SIZE
 	st	%r15,__LC_KERNEL_STACK	# set end of kernel stack
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 65667b2e65c..bdcb3f05bcd 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -92,6 +92,7 @@ startup_continue:
 # Setup stack
 #
 	larl	%r15,init_thread_union
+	stg	%r15,__LC_THREAD_INFO	# cache thread info in lowcore
 	lg	%r14,__TI_task(%r15)	# cache current in lowcore
 	stg	%r14,__LC_CURRENT
 	aghi	%r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union + THREAD_SIZE
-- 
cgit v1.2.3


From f64d04c042193183c6dad98944ae2861b998e8b7 Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Fri, 11 Sep 2009 10:28:59 +0200
Subject: [S390] s390dbf: Add description for usage of "%s" in sprintf events

Using "%s" in sprintf event functions is dangerous. This patch adds a short
description for this issue to the s390 debug feature documentation.

Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/debug.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
index 31ed5686a96..18124b75a7a 100644
--- a/arch/s390/include/asm/debug.h
+++ b/arch/s390/include/asm/debug.h
@@ -167,6 +167,10 @@ debug_text_event(debug_info_t* id, int level, const char* txt)
         return debug_event_common(id,level,txt,strlen(txt));
 }
 
+/*
+ * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
+ * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details!
+ */
 extern debug_entry_t *
 debug_sprintf_event(debug_info_t* id,int level,char *string,...)
 	__attribute__ ((format(printf, 3, 4)));
@@ -206,7 +210,10 @@ debug_text_exception(debug_info_t* id, int level, const char* txt)
         return debug_exception_common(id,level,txt,strlen(txt));
 }
 
-
+/*
+ * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
+ * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details!
+ */
 extern debug_entry_t *
 debug_sprintf_exception(debug_info_t* id,int level,char *string,...)
 	__attribute__ ((format(printf, 3, 4)));
-- 
cgit v1.2.3


From 5dd1d2ece0250125fec2dcccbfb8ab9bb2ac020c Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 11 Sep 2009 10:29:00 +0200
Subject: [S390] use generic scatterlist.h

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/scatterlist.h | 20 +-------------------
 1 file changed, 1 insertion(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/scatterlist.h b/arch/s390/include/asm/scatterlist.h
index 29ec8e28c8d..35d786fe93a 100644
--- a/arch/s390/include/asm/scatterlist.h
+++ b/arch/s390/include/asm/scatterlist.h
@@ -1,19 +1 @@
-#ifndef _ASMS390_SCATTERLIST_H
-#define _ASMS390_SCATTERLIST_H
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-    unsigned long sg_magic;
-#endif
-    unsigned long page_link;
-    unsigned int offset;
-    unsigned int length;
-};
-
-#ifdef __s390x__
-#define ISA_DMA_THRESHOLD (0xffffffffffffffffUL)
-#else
-#define ISA_DMA_THRESHOLD (0xffffffffUL)
-#endif
-
-#endif /* _ASMS390X_SCATTERLIST_H */
+#include <asm-generic/scatterlist.h>
-- 
cgit v1.2.3


From 2ddddf3e0a55a7fcd6f240a7416cfcb12dd38b7e Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 11 Sep 2009 10:29:01 +0200
Subject: [S390] Enable guest page hinting by default.

Get rid of the PAGE_STATES config option and enable guest page hinting
by default.
It can be disabled by specifying "cmma=off" at the command line.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/Kconfig              | 7 -------
 arch/s390/include/asm/page.h   | 4 ----
 arch/s390/include/asm/system.h | 4 ----
 arch/s390/mm/Makefile          | 4 ++--
 arch/s390/mm/page-states.c     | 6 ++----
 5 files changed, 4 insertions(+), 21 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 47836b945d0..e030e86ff6a 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -480,13 +480,6 @@ config CMM_IUCV
 	  Select this option to enable the special message interface to
 	  the cooperative memory management.
 
-config PAGE_STATES
-	bool "Unused page notification"
-	help
-	  This enables the notification of unused pages to the
-	  hypervisor. The ESSA instruction is used to do the states
-	  changes between a page that has content and the unused state.
-
 config APPLDATA_BASE
 	bool "Linux - VM Monitor Stream, base infrastructure"
 	depends on PROC_FS
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 3e3594d01f8..5e9daf5d7f2 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -125,8 +125,6 @@ page_get_storage_key(unsigned long addr)
 	return skey;
 }
 
-#ifdef CONFIG_PAGE_STATES
-
 struct page;
 void arch_free_page(struct page *page, int order);
 void arch_alloc_page(struct page *page, int order);
@@ -134,8 +132,6 @@ void arch_alloc_page(struct page *page, int order);
 #define HAVE_ARCH_FREE_PAGE
 #define HAVE_ARCH_ALLOC_PAGE
 
-#endif
-
 #endif /* !__ASSEMBLY__ */
 
 #define __PAGE_OFFSET           0x0UL
diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h
index 4fb83c1cdb7..379661d2f81 100644
--- a/arch/s390/include/asm/system.h
+++ b/arch/s390/include/asm/system.h
@@ -109,11 +109,7 @@ extern void pfault_fini(void);
 #define pfault_fini()		do { } while (0)
 #endif /* CONFIG_PFAULT */
 
-#ifdef CONFIG_PAGE_STATES
 extern void cmma_init(void);
-#else
-static inline void cmma_init(void) { }
-#endif
 
 #define finish_arch_switch(prev) do {					     \
 	set_fs(current->thread.mm_segment);				     \
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index db05661ac89..eec05448441 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the linux s390-specific parts of the memory manager.
 #
 
-obj-y	 := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o
+obj-y	 := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \
+	    page-states.o
 obj-$(CONFIG_CMM) += cmm.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
-obj-$(CONFIG_PAGE_STATES) += page-states.o
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index fc0ad73ffd9..f92ec203ad9 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -1,6 +1,4 @@
 /*
- * arch/s390/mm/page-states.c
- *
  * Copyright IBM Corp. 2008
  *
  * Guest page hinting for unused pages.
@@ -17,11 +15,12 @@
 #define ESSA_SET_STABLE		1
 #define ESSA_SET_UNUSED		2
 
-static int cmma_flag;
+static int cmma_flag = 1;
 
 static int __init cmma(char *str)
 {
 	char *parm;
+
 	parm = strstrip(str);
 	if (strcmp(parm, "yes") == 0 || strcmp(parm, "on") == 0) {
 		cmma_flag = 1;
@@ -32,7 +31,6 @@ static int __init cmma(char *str)
 		return 1;
 	return 0;
 }
-
 __setup("cmma=", cmma);
 
 void __init cmma_init(void)
-- 
cgit v1.2.3


From 4bb5e07b68565d7983108993aa23eccf5f1b35fe Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 11 Sep 2009 10:29:03 +0200
Subject: [S390] Limit cpu detection to 256 physical cpus.

Saves us more than 65k pointless IPIs.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/cpu.h | 6 ++++++
 arch/s390/kernel/head.S     | 1 +
 arch/s390/kernel/head64.S   | 8 +++-----
 arch/s390/kernel/smp.c      | 5 +++--
 4 files changed, 13 insertions(+), 7 deletions(-)
 create mode 100644 arch/s390/include/asm/cpu.h

(limited to 'arch')

diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h
new file mode 100644
index 00000000000..702ad46841c
--- /dev/null
+++ b/arch/s390/include/asm/cpu.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_S390_CPU_H
+#define _ASM_S390_CPU_H
+
+#define MAX_CPU_ADDRESS 255
+
+#endif /* _ASM_S390_CPU_H */
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index ec688234852..c52b4f7742f 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -27,6 +27,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 #include <asm/page.h>
+#include <asm/cpu.h>
 
 #ifdef CONFIG_64BIT
 #define ARCH_OFFSET	4
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index bdcb3f05bcd..6a250808092 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -62,9 +62,9 @@ startup_continue:
 	clr	%r11,%r12
 	je	5f			# no more space in prefix array
 4:
-	ahi	%r8,1				# next cpu (r8 += 1)
-	cl	%r8,.Llast_cpu-.LPG1(%r13)	# is last possible cpu ?
-	jl	1b				# jump if not last cpu
+	ahi	%r8,1			# next cpu (r8 += 1)
+	chi	%r8,MAX_CPU_ADDRESS	# is last possible cpu ?
+	jle	1b			# jump if not last cpu
 5:
 	lhi	%r1,2			# mode 2 = esame (dump)
 	j	6f
@@ -130,8 +130,6 @@ startup_continue:
 #ifdef CONFIG_ZFCPDUMP
 .Lcurrent_cpu:
 	.long 0x0
-.Llast_cpu:
-	.long 0x0000ffff
 .Lpref_arr_ptr:
 	.long zfcpdump_prefix_array
 #endif /* CONFIG_ZFCPDUMP */
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index be2cae08340..9261495ca2c 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -49,6 +49,7 @@
 #include <asm/sclp.h>
 #include <asm/cputime.h>
 #include <asm/vdso.h>
+#include <asm/cpu.h>
 #include "entry.h"
 
 static struct task_struct *current_set[NR_CPUS];
@@ -300,7 +301,7 @@ static int smp_rescan_cpus_sigp(cpumask_t avail)
 	logical_cpu = cpumask_first(&avail);
 	if (logical_cpu >= nr_cpu_ids)
 		return 0;
-	for (cpu_id = 0; cpu_id <= 65535; cpu_id++) {
+	for (cpu_id = 0; cpu_id <= MAX_CPU_ADDRESS; cpu_id++) {
 		if (cpu_known(cpu_id))
 			continue;
 		__cpu_logical_map[logical_cpu] = cpu_id;
@@ -379,7 +380,7 @@ static void __init smp_detect_cpus(void)
 	/* Use sigp detection algorithm if sclp doesn't work. */
 	if (sclp_get_cpu_info(info)) {
 		smp_use_sigp_detection = 1;
-		for (cpu = 0; cpu <= 65535; cpu++) {
+		for (cpu = 0; cpu <= MAX_CPU_ADDRESS; cpu++) {
 			if (cpu == boot_cpu_addr)
 				continue;
 			__cpu_logical_map[CPU_INIT_NO] = cpu;
-- 
cgit v1.2.3


From e86a6ed63f46fe8fb555fda531084bca3ef62fd7 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 11 Sep 2009 10:29:04 +0200
Subject: [S390] Get rid of cpuid.h header file.

Merge cpuid.h header file into cpu.h.
While at it convert from typedef to struct declaration and also
convert cio code to use proper lowcore structure instead of casts.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/cpu.h       | 20 ++++++++++++++++++++
 arch/s390/include/asm/cpuid.h     | 25 -------------------------
 arch/s390/include/asm/kvm_host.h  |  6 +++---
 arch/s390/include/asm/lowcore.h   |  6 +++---
 arch/s390/include/asm/processor.h |  4 ++--
 5 files changed, 28 insertions(+), 33 deletions(-)
 delete mode 100644 arch/s390/include/asm/cpuid.h

(limited to 'arch')

diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h
index 702ad46841c..471234b9057 100644
--- a/arch/s390/include/asm/cpu.h
+++ b/arch/s390/include/asm/cpu.h
@@ -1,6 +1,26 @@
+/*
+ *    Copyright IBM Corp. 2000,2009
+ *    Author(s): Hartmut Penner <hp@de.ibm.com>,
+ *		 Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Christian Ehrhardt <ehrhardt@de.ibm.com>,
+ */
+
 #ifndef _ASM_S390_CPU_H
 #define _ASM_S390_CPU_H
 
 #define MAX_CPU_ADDRESS 255
 
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+struct cpuid
+{
+	unsigned int version :	8;
+	unsigned int ident   : 24;
+	unsigned int machine : 16;
+	unsigned int unused  : 16;
+} __packed;
+
+#endif /* __ASSEMBLY__ */
 #endif /* _ASM_S390_CPU_H */
diff --git a/arch/s390/include/asm/cpuid.h b/arch/s390/include/asm/cpuid.h
deleted file mode 100644
index 07836a2e522..00000000000
--- a/arch/s390/include/asm/cpuid.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- *    Copyright IBM Corp. 2000,2009
- *    Author(s): Hartmut Penner <hp@de.ibm.com>,
- *		 Martin Schwidefsky <schwidefsky@de.ibm.com>
- *		 Christian Ehrhardt <ehrhardt@de.ibm.com>
- */
-
-#ifndef _ASM_S390_CPUID_H_
-#define _ASM_S390_CPUID_H_
-
-/*
- *  CPU type and hardware bug flags. Kept separately for each CPU.
- *  Members of this structure are referenced in head.S, so think twice
- *  before touching them. [mj]
- */
-
-typedef struct
-{
-	unsigned int version :	8;
-	unsigned int ident   : 24;
-	unsigned int machine : 16;
-	unsigned int unused  : 16;
-} __attribute__ ((packed)) cpuid_t;
-
-#endif /* _ASM_S390_CPUID_H_ */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 1cd02f6073a..698988f6940 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -17,7 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/kvm_host.h>
 #include <asm/debug.h>
-#include <asm/cpuid.h>
+#include <asm/cpu.h>
 
 #define KVM_MAX_VCPUS 64
 #define KVM_MEMORY_SLOTS 32
@@ -217,8 +217,8 @@ struct kvm_vcpu_arch {
 	struct hrtimer    ckc_timer;
 	struct tasklet_struct tasklet;
 	union  {
-		cpuid_t	  cpu_id;
-		u64	  stidp_data;
+		struct cpuid	cpu_id;
+		u64		stidp_data;
 	};
 };
 
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 5046ad6b7a6..6bc9426a6fb 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -132,7 +132,7 @@
 
 #ifndef __ASSEMBLY__
 
-#include <asm/cpuid.h>
+#include <asm/cpu.h>
 #include <asm/ptrace.h>
 #include <linux/types.h>
 
@@ -275,7 +275,7 @@ struct _lowcore
 	__u32	user_exec_asce;			/* 0x02ac */
 
 	/* SMP info area */
-	cpuid_t	cpu_id;				/* 0x02b0 */
+	struct cpuid cpu_id;			/* 0x02b0 */
 	__u32	cpu_nr;				/* 0x02b8 */
 	__u32	softirq_pending;		/* 0x02bc */
 	__u32	percpu_offset;			/* 0x02c0 */
@@ -380,7 +380,7 @@ struct _lowcore
 	__u64	user_exec_asce;			/* 0x0318 */
 
 	/* SMP info area */
-	cpuid_t	cpu_id;				/* 0x0320 */
+	struct cpuid cpu_id;			/* 0x0320 */
 	__u32	cpu_nr;				/* 0x0328 */
 	__u32	softirq_pending;		/* 0x032c */
 	__u64	percpu_offset;			/* 0x0330 */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index c139fa7b8e8..cf8eed3fa77 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -14,7 +14,7 @@
 #define __ASM_S390_PROCESSOR_H
 
 #include <linux/linkage.h>
-#include <asm/cpuid.h>
+#include <asm/cpu.h>
 #include <asm/page.h>
 #include <asm/ptrace.h>
 #include <asm/setup.h>
@@ -26,7 +26,7 @@
  */
 #define current_text_addr() ({ void *pc; asm("basr %0,0" : "=a" (pc)); pc; })
 
-static inline void get_cpu_id(cpuid_t *ptr)
+static inline void get_cpu_id(struct cpuid *ptr)
 {
 	asm volatile("stidp 0(%1)" : "=m" (*ptr) : "a" (ptr));
 }
-- 
cgit v1.2.3


From 5c0b912e755caaad555eb6feefdb1124462d8f37 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 11 Sep 2009 10:29:05 +0200
Subject: [S390] Remove smp_cpu_not_running.

smp_cpu_not_running() and cpu_stopped() are doing the same.
Remove one and also get rid of the last hard_smp_processor_id() leftover.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/smp.h | 32 +-------------------------------
 arch/s390/kernel/smp.c      | 34 +++++++++++++++++++---------------
 2 files changed, 20 insertions(+), 46 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 72137bc907a..c991fe6473c 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -51,32 +51,7 @@ extern void machine_power_off_smp(void);
 #define PROC_CHANGE_PENALTY	20		/* Schedule penalty */
 
 #define raw_smp_processor_id()	(S390_lowcore.cpu_nr)
-
-/*
- * returns 1 if cpu is in stopped/check stopped state or not operational
- * returns 0 otherwise
- */
-static inline int
-smp_cpu_not_running(int cpu)
-{
-	__u32 status;
-
-	switch (signal_processor_ps(&status, 0, cpu, sigp_sense)) {
-	case sigp_order_code_accepted:
-	case sigp_status_stored:
-		/* Check for stopped and check stop state */
-		if (status & 0x50)
-			return 1;
-		break;
-	case sigp_not_operational:
-		return 1;
-	default:
-		break;
-	}
-	return 0;
-}
-
-#define cpu_logical_map(cpu) (cpu)
+#define cpu_logical_map(cpu)	(cpu)
 
 extern int __cpu_disable (void);
 extern void __cpu_die (unsigned int cpu);
@@ -91,11 +66,6 @@ extern void arch_send_call_function_ipi(cpumask_t mask);
 
 #endif
 
-#ifndef CONFIG_SMP
-#define hard_smp_processor_id()		0
-#define smp_cpu_not_running(cpu)	1
-#endif
-
 #ifdef CONFIG_HOTPLUG_CPU
 extern int smp_rescan_cpus(void);
 #else
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 9261495ca2c..56c16876b91 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -71,6 +71,23 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices);
 
 static void smp_ext_bitcall(int, ec_bit_sig);
 
+static int cpu_stopped(int cpu)
+{
+	__u32 status;
+
+	switch (signal_processor_ps(&status, 0, cpu, sigp_sense)) {
+	case sigp_order_code_accepted:
+	case sigp_status_stored:
+		/* Check for stopped and check stop state */
+		if (status & 0x50)
+			return 1;
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
 void smp_send_stop(void)
 {
 	int cpu, rc;
@@ -87,7 +104,7 @@ void smp_send_stop(void)
 			rc = signal_processor(cpu, sigp_stop);
 		} while (rc == sigp_busy);
 
-		while (!smp_cpu_not_running(cpu))
+		while (!cpu_stopped(cpu))
 			cpu_relax();
 	}
 }
@@ -270,19 +287,6 @@ static inline void smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) { }
 
 #endif /* CONFIG_ZFCPDUMP */
 
-static int cpu_stopped(int cpu)
-{
-	__u32 status;
-
-	/* Check for stopped state */
-	if (signal_processor_ps(&status, 0, cpu, sigp_sense) ==
-	    sigp_status_stored) {
-		if (status & 0x40)
-			return 1;
-	}
-	return 0;
-}
-
 static int cpu_known(int cpu_id)
 {
 	int cpu;
@@ -636,7 +640,7 @@ int __cpu_disable(void)
 void __cpu_die(unsigned int cpu)
 {
 	/* Wait until target cpu is down */
-	while (!smp_cpu_not_running(cpu))
+	while (!cpu_stopped(cpu))
 		cpu_relax();
 	smp_free_lowcore(cpu);
 	pr_info("Processor %d stopped\n", cpu);
-- 
cgit v1.2.3


From bde69af2ab696eebfac9583ea1e8a46b571e317f Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 11 Sep 2009 10:29:06 +0200
Subject: [S390] Wire up page fault events for software perf counters.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/mm/fault.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index e5e119fe03b..1abbadd497e 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -10,6 +10,7 @@
  *    Copyright (C) 1995  Linus Torvalds
  */
 
+#include <linux/perf_counter.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
@@ -305,7 +306,7 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int write)
 	 * interrupts again and then search the VMAs
 	 */
 	local_irq_enable();
-
+	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 	down_read(&mm->mmap_sem);
 
 	si_code = SEGV_MAPERR;
@@ -363,11 +364,15 @@ good_area:
 		}
 		BUG();
 	}
-	if (fault & VM_FAULT_MAJOR)
+	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-	else
+		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+				     regs, address);
+	} else {
 		tsk->min_flt++;
-
+		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+				     regs, address);
+	}
         up_read(&mm->mmap_sem);
 	/*
 	 * The instruction that caused the program check will
-- 
cgit v1.2.3