From e00320875d0cc5f8099a7227b2f25fbb3231268d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 14 Feb 2008 08:48:23 +0100
Subject: x86: fix stackprotector canary updates during context switches

fix a bug noticed and fixed by pageexec@freemail.hu.

if built with -fstack-protector-all then we'll have canary checks built
into the __switch_to() function. That does not work well with the
canary-switching code there: while we already use the %rsp of the
new task, we still call __switch_to() whith the previous task's canary
value in the PDA, hence the __switch_to() ssp prologue instructions
will store the previous canary. Then we update the PDA and upon return
from __switch_to() the canary check triggers and we panic.

so update the canary after we have called __switch_to(), where we are
at the same stackframe level as the last stackframe of the next
(and now freshly current) task.

Note: this means that we call __switch_to() [and its sub-functions]
still with the old canary, but that is not a problem, both the previous
and the next task has a high-quality canary. The only (mostly academic)
disadvantage is that the canary of one task may leak onto the stack of
another task, increasing the risk of information leaks, were an attacker
able to read the stack of specific tasks (but not that of others).

To solve this we'll have to reorganize the way we switch tasks, and move
the PDA setting into the switch_to() assembly code. That will happen in
another patch.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pda.h    | 2 --
 include/asm-x86/system.h | 6 +++++-
 include/linux/sched.h    | 3 +--
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h
index 101fb9e1195..62b734986a4 100644
--- a/include/asm-x86/pda.h
+++ b/include/asm-x86/pda.h
@@ -16,11 +16,9 @@ struct x8664_pda {
 	unsigned long oldrsp;		/* 24 user rsp for system call */
 	int irqcount;			/* 32 Irq nesting counter. Starts -1 */
 	unsigned int cpunumber;		/* 36 Logical CPU number */
-#ifdef CONFIG_CC_STACKPROTECTOR
 	unsigned long stack_canary;	/* 40 stack canary value */
 					/* gcc-ABI: this canary MUST be at
 					   offset 40!!! */
-#endif
 	char *irqstackptr;
 	unsigned int __softirq_pending;
 	unsigned int __nmi_count;	/* number of NMI on this CPUs */
diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h
index a2f04cd79b2..172f5418509 100644
--- a/include/asm-x86/system.h
+++ b/include/asm-x86/system.h
@@ -92,6 +92,8 @@ do {									\
 	     ".globl thread_return\n"					  \
 	     "thread_return:\n\t"					  \
 	     "movq %%gs:%P[pda_pcurrent],%%rsi\n\t"			  \
+	     "movq %P[task_canary](%%rsi),%%r8\n\t"			  \
+	     "movq %%r8,%%gs:%P[pda_canary]\n\t"			  \
 	     "movq %P[thread_info](%%rsi),%%r8\n\t"			  \
 	     LOCK_PREFIX "btr  %[tif_fork],%P[ti_flags](%%r8)\n\t"	  \
 	     "movq %%rax,%%rdi\n\t" 					  \
@@ -103,7 +105,9 @@ do {									\
 	       [ti_flags] "i" (offsetof(struct thread_info, flags)),	  \
 	       [tif_fork] "i" (TIF_FORK),			  	  \
 	       [thread_info] "i" (offsetof(struct task_struct, stack)),   \
-	       [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent))  \
+	       [task_canary] "i" (offsetof(struct task_struct, stack_canary)),\
+	       [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)), \
+	       [pda_canary] "i" (offsetof(struct x8664_pda, stack_canary))\
 	     : "memory", "cc" __EXTRA_CLOBBER)
 #endif
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5395a6176f4..d6a51515878 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1096,10 +1096,9 @@ struct task_struct {
 	pid_t pid;
 	pid_t tgid;
 
-#ifdef CONFIG_CC_STACKPROTECTOR
 	/* Canary value for the -fstack-protector gcc feature */
 	unsigned long stack_canary;
-#endif
+
 	/* 
 	 * pointers to (original) parent process, youngest child, younger sibling,
 	 * older sibling, respectively.  (p->father can be replaced with 
-- 
cgit v1.2.3


From 9b5609fd773e6ac0b1d6d6e1bf68f32cca64e06b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 14 Feb 2008 09:41:09 +0100
Subject: stackprotector: include files

create <linux/stackprotector.h> for core kernel files to include.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/stackprotector.h | 4 ++++
 include/linux/stackprotector.h   | 8 ++++++++
 2 files changed, 12 insertions(+)
 create mode 100644 include/asm-x86/stackprotector.h
 create mode 100644 include/linux/stackprotector.h

(limited to 'include')

diff --git a/include/asm-x86/stackprotector.h b/include/asm-x86/stackprotector.h
new file mode 100644
index 00000000000..dcac7a6bdba
--- /dev/null
+++ b/include/asm-x86/stackprotector.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_STACKPROTECTOR_H
+#define _ASM_STACKPROTECTOR_H 1
+
+#endif
diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h
new file mode 100644
index 00000000000..d3e8bbe602f
--- /dev/null
+++ b/include/linux/stackprotector.h
@@ -0,0 +1,8 @@
+#ifndef _LINUX_STACKPROTECTOR_H
+#define _LINUX_STACKPROTECTOR_H 1
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+# include <asm/stackprotector.h>
+#endif
+
+#endif
-- 
cgit v1.2.3


From 18aa8bb12dcb10adc3d7c9d69714d53667c0ab7f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 14 Feb 2008 09:42:02 +0100
Subject: stackprotector: add boot_init_stack_canary()

add the boot_init_stack_canary() and make the secondary idle threads
use it.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/stackprotector.h | 20 ++++++++++++++++++++
 include/linux/stackprotector.h   |  4 ++++
 2 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/asm-x86/stackprotector.h b/include/asm-x86/stackprotector.h
index dcac7a6bdba..0f91f7a2688 100644
--- a/include/asm-x86/stackprotector.h
+++ b/include/asm-x86/stackprotector.h
@@ -1,4 +1,24 @@
 #ifndef _ASM_STACKPROTECTOR_H
 #define _ASM_STACKPROTECTOR_H 1
 
+/*
+ * Initialize the stackprotector canary value.
+ *
+ * NOTE: this must only be called from functions that never return,
+ * and it must always be inlined.
+ */
+static __always_inline void boot_init_stack_canary(void)
+{
+	/*
+	 * If we're the non-boot CPU, nothing set the PDA stack
+	 * canary up for us - and if we are the boot CPU we have
+	 * a 0 stack canary. This is a good place for updating
+	 * it, as we wont ever return from this function (so the
+	 * invalid canaries already on the stack wont ever
+	 * trigger):
+	 */
+	current->stack_canary = get_random_int();
+	write_pda(stack_canary, current->stack_canary);
+}
+
 #endif
diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h
index d3e8bbe602f..422e71aafd0 100644
--- a/include/linux/stackprotector.h
+++ b/include/linux/stackprotector.h
@@ -3,6 +3,10 @@
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 # include <asm/stackprotector.h>
+#else
+static inline void boot_init_stack_canary(void)
+{
+}
 #endif
 
 #endif
-- 
cgit v1.2.3


From 420594296838fdc9a674470d710cda7d1487f9f4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 14 Feb 2008 09:44:08 +0100
Subject: x86: fix the stackprotector canary of the boot CPU

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/stackprotector.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h
index 422e71aafd0..6f3e54c704c 100644
--- a/include/linux/stackprotector.h
+++ b/include/linux/stackprotector.h
@@ -1,6 +1,10 @@
 #ifndef _LINUX_STACKPROTECTOR_H
 #define _LINUX_STACKPROTECTOR_H 1
 
+#include <linux/compiler.h>
+#include <linux/sched.h>
+#include <linux/random.h>
+
 #ifdef CONFIG_CC_STACKPROTECTOR
 # include <asm/stackprotector.h>
 #else
-- 
cgit v1.2.3


From 960a672bd9f1ec06e8f197cf81a50fd07ea02e7f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 14 Feb 2008 09:56:04 +0100
Subject: x86: stackprotector: mix TSC to the boot canary

mix the TSC to the boot canary.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/stackprotector.h | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-x86/stackprotector.h b/include/asm-x86/stackprotector.h
index 0f91f7a2688..3baf7ad89be 100644
--- a/include/asm-x86/stackprotector.h
+++ b/include/asm-x86/stackprotector.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_STACKPROTECTOR_H
 #define _ASM_STACKPROTECTOR_H 1
 
+#include <asm/tsc.h>
+
 /*
  * Initialize the stackprotector canary value.
  *
@@ -9,16 +11,28 @@
  */
 static __always_inline void boot_init_stack_canary(void)
 {
+	u64 canary;
+	u64 tsc;
+
 	/*
 	 * If we're the non-boot CPU, nothing set the PDA stack
 	 * canary up for us - and if we are the boot CPU we have
 	 * a 0 stack canary. This is a good place for updating
 	 * it, as we wont ever return from this function (so the
 	 * invalid canaries already on the stack wont ever
-	 * trigger):
+	 * trigger).
+	 *
+	 * We both use the random pool and the current TSC as a source
+	 * of randomness. The TSC only matters for very early init,
+	 * there it already has some randomness on most systems. Later
+	 * on during the bootup the random pool has true entropy too.
 	 */
-	current->stack_canary = get_random_int();
-	write_pda(stack_canary, current->stack_canary);
+	get_random_bytes(&canary, sizeof(canary));
+	tsc = __native_read_tsc();
+	canary += tsc + (tsc << 32UL);
+
+	current->stack_canary = canary;
+	write_pda(stack_canary, canary);
 }
 
 #endif
-- 
cgit v1.2.3


From 7c9f8861e6c9c839f913e49b98c3854daca18f27 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sandeen.net>
Date: Tue, 22 Apr 2008 16:38:23 -0500
Subject: stackprotector: use canary at end of stack to indicate overruns at
 oops time

(Updated with a common max-stack-used checker that knows about
the canary, as suggested by Joe Perches)

Use a canary at the end of the stack to clearly indicate
at oops time whether the stack has ever overflowed.

This is a very simple implementation with a couple of
drawbacks:

1) a thread may legitimately use exactly up to the last
   word on the stack

 -- but the chances of doing this and then oopsing later seem slim

2) it's possible that the stack usage isn't dense enough
   that the canary location could get skipped over

 -- but the worst that happens is that we don't flag the overrun
 -- though this happens fairly often in my testing :(

With the code in place, an intentionally-bloated stack oops might
do:

BUG: unable to handle kernel paging request at ffff8103f84cc680
IP: [<ffffffff810253df>] update_curr+0x9a/0xa8
PGD 8063 PUD 0
Thread overran stack or stack corrupted
Oops: 0000 [1] SMP
CPU 0
...

... unless the stack overrun is so bad that it corrupts some other
thread.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/magic.h |  1 +
 include/linux/sched.h | 13 +++++++++++++
 2 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/magic.h b/include/linux/magic.h
index 1fa0c2ce4de..74e68e20116 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -42,4 +42,5 @@
 #define FUTEXFS_SUPER_MAGIC	0xBAD1DEA
 #define INOTIFYFS_SUPER_MAGIC	0x2BAD1DEA
 
+#define STACK_END_MAGIC		0x57AC6E9D
 #endif /* __LINUX_MAGIC_H__ */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d6a51515878..c5181e77f30 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1969,6 +1969,19 @@ static inline unsigned long *end_of_stack(struct task_struct *p)
 
 extern void thread_info_cache_init(void);
 
+#ifdef CONFIG_DEBUG_STACK_USAGE
+static inline unsigned long stack_not_used(struct task_struct *p)
+{
+	unsigned long *n = end_of_stack(p);
+
+	do { 	/* Skip over canary */
+		n++;
+	} while (!*n);
+
+	return (unsigned long)n - (unsigned long)end_of_stack(p);
+}
+#endif
+
 /* set thread flags in other task's structures
  * - see asm/thread_info.h for TIF_xxxx flags available
  */
-- 
cgit v1.2.3


From af9ff7868f0f76d3364351b1641b9dfa99588e77 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sat, 12 Jul 2008 09:36:38 -0700
Subject: x86: simplify stackprotector self-check

Clean up the code by removing no longer needed code;
make sure the pda is updated and kept in sync

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/pda.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h
index 62b734986a4..a5ff5bb7629 100644
--- a/include/asm-x86/pda.h
+++ b/include/asm-x86/pda.h
@@ -131,4 +131,5 @@ do {									\
 
 #define PDA_STACKOFFSET (5*8)
 
+#define refresh_stack_canary() write_pda(stack_canary, current->stack_canary)
 #endif
-- 
cgit v1.2.3


From 7f7ace0cda64c99599c23785f8979a072e118058 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Sat, 10 Jan 2009 21:58:08 -0800
Subject: cpumask: update irq_desc to use cpumask_var_t

Impact: reduce memory usage, use new cpumask API.

Replace the affinity and pending_masks with cpumask_var_t's.  This adds
to the significant size reduction done with the SPARSE_IRQS changes.

The added functions (init_alloc_desc_masks & init_copy_desc_masks) are
in the include file so they can be inlined (and optimized out for the
!CONFIG_CPUMASKS_OFFSTACK case.)  [Naming chosen to be consistent with
the other init*irq functions, as well as the backwards arg declaration
of "from, to" instead of the more common "to, from" standard.]

Includes a slight change to the declaration of struct irq_desc to embed
the pending_mask within ifdef(CONFIG_SMP) to be consistent with other
references, and some small changes to Xen.

Tested: sparse/non-sparse/cpumask_offstack/non-cpumask_offstack/nonuma/nosmp on x86_64

Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: virtualization@lists.osdl.org
Cc: xen-devel@lists.xensource.com
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
---
 include/linux/irq.h | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 78 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index f899b502f18..fa27210f1df 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -182,11 +182,11 @@ struct irq_desc {
 	unsigned int		irqs_unhandled;
 	spinlock_t		lock;
 #ifdef CONFIG_SMP
-	cpumask_t		affinity;
+	cpumask_var_t		affinity;
 	unsigned int		cpu;
-#endif
 #ifdef CONFIG_GENERIC_PENDING_IRQ
-	cpumask_t		pending_mask;
+	cpumask_var_t		pending_mask;
+#endif
 #endif
 #ifdef CONFIG_PROC_FS
 	struct proc_dir_entry	*dir;
@@ -422,4 +422,79 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
 
 #endif /* !CONFIG_S390 */
 
+#ifdef CONFIG_SMP
+/**
+ * init_alloc_desc_masks - allocate cpumasks for irq_desc
+ * @desc:	pointer to irq_desc struct
+ * @boot:	true if need bootmem
+ *
+ * Allocates affinity and pending_mask cpumask if required.
+ * Returns true if successful (or not required).
+ * Side effect: affinity has all bits set, pending_mask has all bits clear.
+ */
+static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node,
+								bool boot)
+{
+	if (boot) {
+		alloc_bootmem_cpumask_var(&desc->affinity);
+		cpumask_setall(desc->affinity);
+
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+		alloc_bootmem_cpumask_var(&desc->pending_mask);
+		cpumask_clear(desc->pending_mask);
+#endif
+		return true;
+	}
+
+	if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node))
+		return false;
+	cpumask_setall(desc->affinity);
+
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+	if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) {
+		free_cpumask_var(desc->affinity);
+		return false;
+	}
+	cpumask_clear(desc->pending_mask);
+#endif
+	return true;
+}
+
+/**
+ * init_copy_desc_masks - copy cpumasks for irq_desc
+ * @old_desc:	pointer to old irq_desc struct
+ * @new_desc:	pointer to new irq_desc struct
+ *
+ * Insures affinity and pending_masks are copied to new irq_desc.
+ * If !CONFIG_CPUMASKS_OFFSTACK the cpumasks are embedded in the
+ * irq_desc struct so the copy is redundant.
+ */
+
+static inline void init_copy_desc_masks(struct irq_desc *old_desc,
+					struct irq_desc *new_desc)
+{
+#ifdef CONFIG_CPUMASKS_OFFSTACK
+	cpumask_copy(new_desc->affinity, old_desc->affinity);
+
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+	cpumask_copy(new_desc->pending_mask, old_desc->pending_mask);
+#endif
+#endif
+}
+
+#else /* !CONFIG_SMP */
+
+static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node,
+								bool boot)
+{
+	return true;
+}
+
+static inline void init_copy_desc_masks(struct irq_desc *old_desc,
+					struct irq_desc *new_desc)
+{
+}
+
+#endif	/* CONFIG_SMP */
+
 #endif /* _LINUX_IRQ_H */
-- 
cgit v1.2.3


From fbd59a8d1f7cf325fdb6828659f1fb76631e87b3 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 10 Jan 2009 21:58:08 -0800
Subject: cpumask: Use topology_core_cpumask()/topology_thread_cpumask()

Impact: reduce stack usage, use new cpumask API.

This actually uses topology_core_cpumask() and
topology_thread_cpumask(), removing the only users of
topology_core_siblings() and topology_thread_siblings()

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Cc: linux-net-drivers@solarflare.com
---
 include/linux/topology.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/topology.h b/include/linux/topology.h
index e632d29f054..a16b9e06f2e 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -193,5 +193,11 @@ int arch_update_cpu_topology(void);
 #ifndef topology_core_siblings
 #define topology_core_siblings(cpu)		cpumask_of_cpu(cpu)
 #endif
+#ifndef topology_thread_cpumask
+#define topology_thread_cpumask(cpu)		cpumask_of(cpu)
+#endif
+#ifndef topology_core_cpumask
+#define topology_core_cpumask(cpu)		cpumask_of(cpu)
+#endif
 
 #endif /* _LINUX_TOPOLOGY_H */
-- 
cgit v1.2.3


From 802bf931f2688ad125b73db597ce63cc842fb27a Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Sat, 10 Jan 2009 21:58:09 -0800
Subject: cpumask: fix bug in use cpumask_var_t in irq_desc

Impact: fix bug where new irq_desc uses old cpumask pointers which are freed.

As Yinghai pointed out, init_copy_one_irq_desc() copies the old desc to
the new desc overwriting the cpumask pointers.  Since the old_desc and
the cpumask pointers are freed, then memory corruption will occur if
these old pointers are used.

Move the allocation of these pointers to after the copy.

Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Yinghai Lu <yinghai@kernel.org>
---
 include/linux/irq.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index fa27210f1df..27a67536511 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -426,15 +426,18 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
 /**
  * init_alloc_desc_masks - allocate cpumasks for irq_desc
  * @desc:	pointer to irq_desc struct
+ * @cpu:	cpu which will be handling the cpumasks
  * @boot:	true if need bootmem
  *
  * Allocates affinity and pending_mask cpumask if required.
  * Returns true if successful (or not required).
  * Side effect: affinity has all bits set, pending_mask has all bits clear.
  */
-static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node,
+static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu,
 								bool boot)
 {
+	int node;
+
 	if (boot) {
 		alloc_bootmem_cpumask_var(&desc->affinity);
 		cpumask_setall(desc->affinity);
@@ -446,6 +449,8 @@ static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node,
 		return true;
 	}
 
+	node = cpu_to_node(cpu);
+
 	if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node))
 		return false;
 	cpumask_setall(desc->affinity);
@@ -484,7 +489,7 @@ static inline void init_copy_desc_masks(struct irq_desc *old_desc,
 
 #else /* !CONFIG_SMP */
 
-static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node,
+static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu,
 								bool boot)
 {
 	return true;
-- 
cgit v1.2.3


From 9332fccdedf8e09448f3b69b624211ae879f6c45 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Sat, 10 Jan 2009 22:24:07 -0800
Subject: irq: initialize nr_irqs based on nr_cpu_ids

Impact: Reduce memory usage.

This is the second half of the changes to make the irq_desc_ptrs be
variable sized based on nr_cpu_ids.  This is done by adding a new
"max_nr_irqs" macro to irq_vectors.h (and a dummy in irqnr.h) to
return a max NR_IRQS value based on NR_CPUS or nr_cpu_ids.

This necessitated moving the define of MAX_IO_APICS to a separate
file (asm/apicnum.h) so it could be included without the baggage
of the other asm/apicdef.h declarations.

Signed-off-by: Mike Travis <travis@sgi.com>
---
 include/linux/irqnr.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index 86af92e9e84..de66e4e1040 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -20,11 +20,18 @@
 
 # define for_each_irq_desc_reverse(irq, desc)                          \
 	for (irq = nr_irqs - 1; irq >= 0; irq--)
+
 #else /* CONFIG_GENERIC_HARDIRQS */
 
+#include <asm/irq_vectors.h>	/* need possible max_nr_irqs() */
+
 extern int nr_irqs;
 extern struct irq_desc *irq_to_desc(unsigned int irq);
 
+# ifndef max_nr_irqs
+#  define max_nr_irqs(nr_cpus)	NR_IRQS
+# endif
+
 # define for_each_irq_desc(irq, desc)					\
 	for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs;		\
 	     irq++, desc = irq_to_desc(irq))				\
-- 
cgit v1.2.3


From 92296c6d6e908c35fca287a21af27be814af9c75 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Sun, 11 Jan 2009 09:22:58 -0800
Subject: cpumask, irq: non-x86 build failures

Ingo Molnar wrote:

> All non-x86 architectures fail to build:
>
> In file included from /home/mingo/tip/include/linux/random.h:11,
>                  from /home/mingo/tip/include/linux/stackprotector.h:6,
>                  from /home/mingo/tip/init/main.c:17:
> /home/mingo/tip/include/linux/irqnr.h:26:63: error: asm/irq_vectors.h: No such file or directory

Do not include asm/irq_vectors.h in generic code - it's not available
on all architectures.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irqnr.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index de66e4e1040..887477bc2ab 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -23,15 +23,9 @@
 
 #else /* CONFIG_GENERIC_HARDIRQS */
 
-#include <asm/irq_vectors.h>	/* need possible max_nr_irqs() */
-
 extern int nr_irqs;
 extern struct irq_desc *irq_to_desc(unsigned int irq);
 
-# ifndef max_nr_irqs
-#  define max_nr_irqs(nr_cpus)	NR_IRQS
-# endif
-
 # define for_each_irq_desc(irq, desc)					\
 	for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs;		\
 	     irq++, desc = irq_to_desc(irq))				\
-- 
cgit v1.2.3


From 4a046d1754ee6ebb6f399696805ed61ea0444d4c Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 12 Jan 2009 17:39:24 -0800
Subject: x86: arch_probe_nr_irqs

Impact: save RAM with large NR_CPUS, get smaller nr_irqs

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Mike Travis <travis@sgi.com>
---
 include/linux/interrupt.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 9127f6b51a3..472f11765f6 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -467,6 +467,7 @@ int show_interrupts(struct seq_file *p, void *v);
 struct irq_desc;
 
 extern int early_irq_init(void);
+extern int arch_probe_nr_irqs(void);
 extern int arch_early_irq_init(void);
 extern int arch_init_chip_data(struct irq_desc *desc, int cpu);
 
-- 
cgit v1.2.3


From 3e5d8f978435bb9ba4dfe3f4514e65e7885db1a9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 13 Jan 2009 20:41:35 +0900
Subject: x86: make percpu symbols zerobased on SMP

[ Based on original patch from Christoph Lameter and Mike Travis. ]

This patch makes percpu symbols zerobased on x86_64 SMP by adding
PERCPU_VADDR() to vmlinux.lds.h which helps setting explicit vaddr on
the percpu output section and using it in vmlinux_64.lds.S.  A new
PHDR is added as existing ones cannot contain sections near address
zero.  PERCPU_VADDR() also adds a new symbol __per_cpu_load which
always points to the vaddr of the loaded percpu data.init region.

The following adjustments have been made to accomodate the address
change.

* code to locate percpu gdt_page in head_64.S is updated to add the
  load address to the gdt_page offset.

* __per_cpu_load is used in places where access to the init data area
  is necessary.

* pda->data_offset is initialized soon after C code is entered as zero
  value doesn't work anymore.

This patch is mostly taken from Mike Travis' "x86_64: Base percpu
variables at zero" patch.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-generic/sections.h    |  2 +-
 include/asm-generic/vmlinux.lds.h | 51 ++++++++++++++++++++++++++++++++++-----
 2 files changed, 46 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index 79a7ff925bf..4ce48e87853 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -9,7 +9,7 @@ extern char __bss_start[], __bss_stop[];
 extern char __init_begin[], __init_end[];
 extern char _sinittext[], _einittext[];
 extern char _end[];
-extern char __per_cpu_start[], __per_cpu_end[];
+extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
 extern char __kprobes_text_start[], __kprobes_text_end[];
 extern char __initdata_begin[], __initdata_end[];
 extern char __start_rodata[], __end_rodata[];
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index c61fab1dd2f..fc2f55f2dcd 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -430,12 +430,51 @@
   	*(.initcall7.init)						\
   	*(.initcall7s.init)
 
-#define PERCPU(align)							\
-	. = ALIGN(align);						\
-	VMLINUX_SYMBOL(__per_cpu_start) = .;				\
-	.data.percpu  : AT(ADDR(.data.percpu) - LOAD_OFFSET) {		\
+#define PERCPU_PROLOG(vaddr)						\
+	VMLINUX_SYMBOL(__per_cpu_load) = .;				\
+	.data.percpu vaddr : AT(__per_cpu_load - LOAD_OFFSET) {		\
+		VMLINUX_SYMBOL(__per_cpu_start) = .;
+
+#define PERCPU_EPILOG(phdr)						\
+		VMLINUX_SYMBOL(__per_cpu_end) = .;			\
+	} phdr								\
+	. = __per_cpu_load + SIZEOF(.data.percpu);
+
+/**
+ * PERCPU_VADDR - define output section for percpu area
+ * @vaddr: explicit base address (optional)
+ * @phdr: destination PHDR (optional)
+ *
+ * Macro which expands to output section for percpu area.  If @vaddr
+ * is not blank, it specifies explicit base address and all percpu
+ * symbols will be offset from the given address.  If blank, @vaddr
+ * always equals @laddr + LOAD_OFFSET.
+ *
+ * @phdr defines the output PHDR to use if not blank.  Be warned that
+ * output PHDR is sticky.  If @phdr is specified, the next output
+ * section in the linker script will go there too.  @phdr should have
+ * a leading colon.
+ *
+ * This macro defines three symbols, __per_cpu_load, __per_cpu_start
+ * and __per_cpu_end.  The first one is the vaddr of loaded percpu
+ * init data.  __per_cpu_start equals @vaddr and __per_cpu_end is the
+ * end offset.
+ */
+#define PERCPU_VADDR(vaddr, phdr)					\
+	PERCPU_PROLOG(vaddr)						\
 		*(.data.percpu.page_aligned)				\
 		*(.data.percpu)						\
 		*(.data.percpu.shared_aligned)				\
-	}								\
-	VMLINUX_SYMBOL(__per_cpu_end) = .;
+	PERCPU_EPILOG(phdr)
+
+/**
+ * PERCPU - define output section for percpu area, simple version
+ * @align: required alignment
+ *
+ * Align to @align and outputs output section for percpu area.  This
+ * macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and
+ * __per_cpu_start will be identical.
+ */
+#define PERCPU(align)							\
+	. = ALIGN(align);						\
+	PERCPU_VADDR( , )
-- 
cgit v1.2.3


From 1a51e3a0aed18767cf2762e95456ecfeb0bca5e6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 13 Jan 2009 20:41:35 +0900
Subject: x86: fold pda into percpu area on SMP

[ Based on original patch from Christoph Lameter and Mike Travis. ]

Currently pdas and percpu areas are allocated separately.  %gs points
to local pda and percpu area can be reached using pda->data_offset.
This patch folds pda into percpu area.

Due to strange gcc requirement, pda needs to be at the beginning of
the percpu area so that pda->stack_canary is at %gs:40.  To achieve
this, a new percpu output section macro - PERCPU_VADDR_PREALLOC() - is
added and used to reserve pda sized chunk at the start of the percpu
area.

After this change, for boot cpu, %gs first points to pda in the
data.init area and later during setup_per_cpu_areas() gets updated to
point to the actual pda.  This means that setup_per_cpu_areas() need
to reload %gs for CPU0 while clearing pda area for other cpus as cpu0
already has modified it when control reaches setup_per_cpu_areas().

This patch also removes now unnecessary get_local_pda() and its call
sites.

A lot of this patch is taken from Mike Travis' "x86_64: Fold pda into
per cpu area" patch.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-generic/vmlinux.lds.h | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index fc2f55f2dcd..e53319cf29c 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -441,9 +441,10 @@
 	. = __per_cpu_load + SIZEOF(.data.percpu);
 
 /**
- * PERCPU_VADDR - define output section for percpu area
+ * PERCPU_VADDR_PREALLOC - define output section for percpu area with prealloc
  * @vaddr: explicit base address (optional)
  * @phdr: destination PHDR (optional)
+ * @prealloc: the size of prealloc area
  *
  * Macro which expands to output section for percpu area.  If @vaddr
  * is not blank, it specifies explicit base address and all percpu
@@ -455,11 +456,33 @@
  * section in the linker script will go there too.  @phdr should have
  * a leading colon.
  *
+ * If @prealloc is non-zero, the specified number of bytes will be
+ * reserved at the start of percpu area.  As the prealloc area is
+ * likely to break alignment, this macro puts areas in increasing
+ * alignment order.
+ *
  * This macro defines three symbols, __per_cpu_load, __per_cpu_start
  * and __per_cpu_end.  The first one is the vaddr of loaded percpu
  * init data.  __per_cpu_start equals @vaddr and __per_cpu_end is the
  * end offset.
  */
+#define PERCPU_VADDR_PREALLOC(vaddr, segment, prealloc)			\
+	PERCPU_PROLOG(vaddr)						\
+		. += prealloc;						\
+		*(.data.percpu)						\
+		*(.data.percpu.shared_aligned)				\
+		*(.data.percpu.page_aligned)				\
+	PERCPU_EPILOG(segment)
+
+/**
+ * PERCPU_VADDR - define output section for percpu area
+ * @vaddr: explicit base address (optional)
+ * @phdr: destination PHDR (optional)
+ *
+ * Macro which expands to output section for percpu area.  Mostly
+ * identical to PERCPU_VADDR_PREALLOC(@vaddr, @phdr, 0) other than
+ * using slighly different layout.
+ */
 #define PERCPU_VADDR(vaddr, phdr)					\
 	PERCPU_PROLOG(vaddr)						\
 		*(.data.percpu.page_aligned)				\
-- 
cgit v1.2.3


From 6dbde3530850d4d8bfc1b6bd4006d92786a2787f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 15 Jan 2009 22:15:53 +0900
Subject: percpu: add optimized generic percpu accessors

It is an optimization and a cleanup, and adds the following new
generic percpu methods:

  percpu_read()
  percpu_write()
  percpu_add()
  percpu_sub()
  percpu_and()
  percpu_or()
  percpu_xor()

and implements support for them on x86. (other architectures will fall
back to a default implementation)

The advantage is that for example to read a local percpu variable,
instead of this sequence:

 return __get_cpu_var(var);

 ffffffff8102ca2b:	48 8b 14 fd 80 09 74 	mov    -0x7e8bf680(,%rdi,8),%rdx
 ffffffff8102ca32:	81
 ffffffff8102ca33:	48 c7 c0 d8 59 00 00 	mov    $0x59d8,%rax
 ffffffff8102ca3a:	48 8b 04 10          	mov    (%rax,%rdx,1),%rax

We can get a single instruction by using the optimized variants:

 return percpu_read(var);

 ffffffff8102ca3f:	65 48 8b 05 91 8f fd 	mov    %gs:0x7efd8f91(%rip),%rax

I also cleaned up the x86-specific APIs and made the x86 code use
these new generic percpu primitives.

tj: * fixed generic percpu_sub() definition as Roel Kluin pointed out
    * added percpu_and() for completeness's sake
    * made generic percpu ops atomic against preemption

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/asm-generic/percpu.h | 52 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index b0e63c672eb..00f45ff081a 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -80,4 +80,56 @@ extern void setup_per_cpu_areas(void);
 #define DECLARE_PER_CPU(type, name) extern PER_CPU_ATTRIBUTES \
 					__typeof__(type) per_cpu_var(name)
 
+/*
+ * Optional methods for optimized non-lvalue per-cpu variable access.
+ *
+ * @var can be a percpu variable or a field of it and its size should
+ * equal char, int or long.  percpu_read() evaluates to a lvalue and
+ * all others to void.
+ *
+ * These operations are guaranteed to be atomic w.r.t. preemption.
+ * The generic versions use plain get/put_cpu_var().  Archs are
+ * encouraged to implement single-instruction alternatives which don't
+ * require preemption protection.
+ */
+#ifndef percpu_read
+# define percpu_read(var)						\
+  ({									\
+	typeof(per_cpu_var(var)) __tmp_var__;				\
+	__tmp_var__ = get_cpu_var(var);					\
+	put_cpu_var(var);						\
+	__tmp_var__;							\
+  })
+#endif
+
+#define __percpu_generic_to_op(var, val, op)				\
+do {									\
+	get_cpu_var(var) op val;					\
+	put_cpu_var(var);						\
+} while (0)
+
+#ifndef percpu_write
+# define percpu_write(var, val)		__percpu_generic_to_op(var, (val), =)
+#endif
+
+#ifndef percpu_add
+# define percpu_add(var, val)		__percpu_generic_to_op(var, (val), +=)
+#endif
+
+#ifndef percpu_sub
+# define percpu_sub(var, val)		__percpu_generic_to_op(var, (val), -=)
+#endif
+
+#ifndef percpu_and
+# define percpu_and(var, val)		__percpu_generic_to_op(var, (val), &=)
+#endif
+
+#ifndef percpu_or
+# define percpu_or(var, val)		__percpu_generic_to_op(var, (val), |=)
+#endif
+
+#ifndef percpu_xor
+# define percpu_xor(var, val)		__percpu_generic_to_op(var, (val), ^=)
+#endif
+
 #endif /* _ASM_GENERIC_PERCPU_H_ */
-- 
cgit v1.2.3


From 145cd30bac885dffad9db9d487baad07b68a3d04 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 17 Jan 2009 14:42:50 +0900
Subject: linker script: add missing VMLINUX_SYMBOL

The newly added PERCPU_*() macros define and use __per_cpu_load but
VMLINUX_SYMBOL() was missing from usages causing build failures on
archs where linker visible symbol is different from C symbols
(e.g. blackfin).

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/asm-generic/vmlinux.lds.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index e53319cf29c..aa6b9b1b30b 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -432,13 +432,14 @@
 
 #define PERCPU_PROLOG(vaddr)						\
 	VMLINUX_SYMBOL(__per_cpu_load) = .;				\
-	.data.percpu vaddr : AT(__per_cpu_load - LOAD_OFFSET) {		\
+	.data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load)		\
+				- LOAD_OFFSET) {			\
 		VMLINUX_SYMBOL(__per_cpu_start) = .;
 
 #define PERCPU_EPILOG(phdr)						\
 		VMLINUX_SYMBOL(__per_cpu_end) = .;			\
 	} phdr								\
-	. = __per_cpu_load + SIZEOF(.data.percpu);
+	. = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu);
 
 /**
  * PERCPU_VADDR_PREALLOC - define output section for percpu area with prealloc
-- 
cgit v1.2.3


From 0bd74fa8e29dcad98f7e8ffe01ec05fb3326abaf Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Mon, 19 Jan 2009 12:21:27 +0900
Subject: percpu: refactor percpu.h

Impact: cleanup

Refactor the DEFINE_PER_CPU_* macros and add .data.percpu.first
section.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/asm-generic/vmlinux.lds.h |  1 +
 include/linux/percpu.h            | 41 ++++++++++++++++++++++-----------------
 2 files changed, 24 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index aa6b9b1b30b..32bbf50d305 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -486,6 +486,7 @@
  */
 #define PERCPU_VADDR(vaddr, phdr)					\
 	PERCPU_PROLOG(vaddr)						\
+		*(.data.percpu.first)					\
 		*(.data.percpu.page_aligned)				\
 		*(.data.percpu)						\
 		*(.data.percpu.shared_aligned)				\
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 9f2a3751873..0e24202b5a4 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -9,34 +9,39 @@
 #include <asm/percpu.h>
 
 #ifdef CONFIG_SMP
-#define DEFINE_PER_CPU(type, name)					\
-	__attribute__((__section__(".data.percpu")))			\
-	PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
+#define PER_CPU_BASE_SECTION ".data.percpu"
 
 #ifdef MODULE
-#define SHARED_ALIGNED_SECTION ".data.percpu"
+#define PER_CPU_SHARED_ALIGNED_SECTION ""
 #else
-#define SHARED_ALIGNED_SECTION ".data.percpu.shared_aligned"
+#define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned"
 #endif
+#define PER_CPU_FIRST_SECTION ".first"
 
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)			\
-	__attribute__((__section__(SHARED_ALIGNED_SECTION)))		\
-	PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name		\
-	____cacheline_aligned_in_smp
+#else
+
+#define PER_CPU_BASE_SECTION ".data"
+#define PER_CPU_SHARED_ALIGNED_SECTION ""
+#define PER_CPU_FIRST_SECTION ""
+
+#endif
 
-#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name)			\
-	__attribute__((__section__(".data.percpu.page_aligned")))	\
+#define DEFINE_PER_CPU_SECTION(type, name, section)			\
+	__attribute__((__section__(PER_CPU_BASE_SECTION section)))	\
 	PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
-#else
+
 #define DEFINE_PER_CPU(type, name)					\
-	PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
+	DEFINE_PER_CPU_SECTION(type, name, "")
 
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)		      \
-	DEFINE_PER_CPU(type, name)
+#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)			\
+	DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
+	____cacheline_aligned_in_smp
 
-#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name)		      \
-	DEFINE_PER_CPU(type, name)
-#endif
+#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name)				\
+	DEFINE_PER_CPU_SECTION(type, name, ".page_aligned")
+
+#define DEFINE_PER_CPU_FIRST(type, name)				\
+	DEFINE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION)
 
 #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
 #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
-- 
cgit v1.2.3


From 6b7c38d55587f43bcd2cbce3a98b1c0826982090 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 19 Jan 2009 12:21:28 +0900
Subject: linker script: kill PERCPU_VADDR_PREALLOC()

Impact: cleanup

With .data.percpu.first in place, PERCPU_VADDR_PREALLOC() is no longer
necessary.  Kill it.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/asm-generic/vmlinux.lds.h | 45 +++++++--------------------------------
 1 file changed, 8 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 32bbf50d305..53e21f36a80 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -430,22 +430,10 @@
   	*(.initcall7.init)						\
   	*(.initcall7s.init)
 
-#define PERCPU_PROLOG(vaddr)						\
-	VMLINUX_SYMBOL(__per_cpu_load) = .;				\
-	.data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load)		\
-				- LOAD_OFFSET) {			\
-		VMLINUX_SYMBOL(__per_cpu_start) = .;
-
-#define PERCPU_EPILOG(phdr)						\
-		VMLINUX_SYMBOL(__per_cpu_end) = .;			\
-	} phdr								\
-	. = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu);
-
 /**
- * PERCPU_VADDR_PREALLOC - define output section for percpu area with prealloc
+ * PERCPU_VADDR - define output section for percpu area
  * @vaddr: explicit base address (optional)
  * @phdr: destination PHDR (optional)
- * @prealloc: the size of prealloc area
  *
  * Macro which expands to output section for percpu area.  If @vaddr
  * is not blank, it specifies explicit base address and all percpu
@@ -457,40 +445,23 @@
  * section in the linker script will go there too.  @phdr should have
  * a leading colon.
  *
- * If @prealloc is non-zero, the specified number of bytes will be
- * reserved at the start of percpu area.  As the prealloc area is
- * likely to break alignment, this macro puts areas in increasing
- * alignment order.
- *
  * This macro defines three symbols, __per_cpu_load, __per_cpu_start
  * and __per_cpu_end.  The first one is the vaddr of loaded percpu
  * init data.  __per_cpu_start equals @vaddr and __per_cpu_end is the
  * end offset.
  */
-#define PERCPU_VADDR_PREALLOC(vaddr, segment, prealloc)			\
-	PERCPU_PROLOG(vaddr)						\
-		. += prealloc;						\
-		*(.data.percpu)						\
-		*(.data.percpu.shared_aligned)				\
-		*(.data.percpu.page_aligned)				\
-	PERCPU_EPILOG(segment)
-
-/**
- * PERCPU_VADDR - define output section for percpu area
- * @vaddr: explicit base address (optional)
- * @phdr: destination PHDR (optional)
- *
- * Macro which expands to output section for percpu area.  Mostly
- * identical to PERCPU_VADDR_PREALLOC(@vaddr, @phdr, 0) other than
- * using slighly different layout.
- */
 #define PERCPU_VADDR(vaddr, phdr)					\
-	PERCPU_PROLOG(vaddr)						\
+	VMLINUX_SYMBOL(__per_cpu_load) = .;				\
+	.data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load)		\
+				- LOAD_OFFSET) {			\
+		VMLINUX_SYMBOL(__per_cpu_start) = .;			\
 		*(.data.percpu.first)					\
 		*(.data.percpu.page_aligned)				\
 		*(.data.percpu)						\
 		*(.data.percpu.shared_aligned)				\
-	PERCPU_EPILOG(phdr)
+		VMLINUX_SYMBOL(__per_cpu_end) = .;			\
+	} phdr								\
+	. = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu);
 
 /**
  * PERCPU - define output section for percpu area, simple version
-- 
cgit v1.2.3


From 5a611268b69f05262936dd177205acbce4471358 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Mon, 26 Jan 2009 08:44:05 -0500
Subject: generic, x86: fix __per_cpu_load relocation

This patch fixes this linker error:

 WARNING: Absolute relocations present
 Offset     Info     Type     Sym.Value Sym.Name
 c0a4e07d 00e78001   R_386_32 c0ab0000  __per_cpu_load

Now, __per_cpu_load is a section-relative symbol:

 c0aa4000 D __per_cpu_load
 c0aa4000 A __per_cpu_load_abs

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-generic/vmlinux.lds.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 53e21f36a80..f3180a85c66 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -451,17 +451,18 @@
  * end offset.
  */
 #define PERCPU_VADDR(vaddr, phdr)					\
-	VMLINUX_SYMBOL(__per_cpu_load) = .;				\
-	.data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load)		\
+	VMLINUX_SYMBOL(__per_cpu_load_abs) = .;				\
+	.data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load_abs)	\
 				- LOAD_OFFSET) {			\
 		VMLINUX_SYMBOL(__per_cpu_start) = .;			\
+		VMLINUX_SYMBOL(__per_cpu_load) = LOADADDR(.data.percpu) + LOAD_OFFSET;\
 		*(.data.percpu.first)					\
 		*(.data.percpu.page_aligned)				\
 		*(.data.percpu)						\
 		*(.data.percpu.shared_aligned)				\
 		VMLINUX_SYMBOL(__per_cpu_end) = .;			\
 	} phdr								\
-	. = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu);
+	. = VMLINUX_SYMBOL(__per_cpu_load_abs) + SIZEOF(.data.percpu);
 
 /**
  * PERCPU - define output section for percpu area, simple version
-- 
cgit v1.2.3