aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-03-14 09:50:10 +0100
committerIngo Molnar <mingo@elte.hu>2009-03-14 09:50:10 +0100
commitc550033ced484d8d333bc1edc0a482728680e689 (patch)
treef5e10a2879a2a446be8d4aefb385cd6845c19c72
parenta98fe7f3425c6b4e90de16f8da63b0429a8fed08 (diff)
parent7a46c594bf7f1f2eeb1e12d4b857d5f581957a92 (diff)
Merge branch 'core/percpu' into x86/core
-rw-r--r--arch/arm/kernel/vmlinux.lds.S1
-rw-r--r--arch/ia64/kernel/vmlinux.lds.S12
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S9
-rw-r--r--arch/x86/include/asm/percpu.h8
-rw-r--r--arch/x86/kernel/setup_percpu.c63
-rw-r--r--include/linux/percpu.h6
-rw-r--r--kernel/sched.c5
-rw-r--r--mm/allocpercpu.c2
-rw-r--r--mm/percpu.c130
9 files changed, 138 insertions, 98 deletions
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 85598f7da40..1602373e539 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -64,6 +64,7 @@ SECTIONS
__initramfs_end = .;
#endif
. = ALIGN(4096);
+ __per_cpu_load = .;
__per_cpu_start = .;
*(.data.percpu.page_aligned)
*(.data.percpu)
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index f45e4e508ec..3765efc5f96 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -213,17 +213,9 @@ SECTIONS
{ *(.data.cacheline_aligned) }
/* Per-cpu data: */
- percpu : { } :percpu
. = ALIGN(PERCPU_PAGE_SIZE);
- __phys_per_cpu_start = .;
- .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET)
- {
- __per_cpu_start = .;
- *(.data.percpu.page_aligned)
- *(.data.percpu)
- *(.data.percpu.shared_aligned)
- __per_cpu_end = .;
- }
+ PERCPU_VADDR(PERCPU_ADDR, :percpu)
+ __phys_per_cpu_start = __per_cpu_load;
. = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits
* into percpu page size
*/
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 295ccc5e86b..67f07f45338 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -181,14 +181,7 @@ SECTIONS
__initramfs_end = .;
}
#endif
- . = ALIGN(PAGE_SIZE);
- .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
- __per_cpu_start = .;
- *(.data.percpu.page_aligned)
- *(.data.percpu)
- *(.data.percpu.shared_aligned)
- __per_cpu_end = .;
- }
+ PERCPU(PAGE_SIZE)
. = ALIGN(8);
.machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) {
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 8f1d2fbec1d..aee103b26d0 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -43,14 +43,6 @@
#else /* ...!ASSEMBLY */
#include <linux/stringify.h>
-#include <asm/sections.h>
-
-#define __addr_to_pcpu_ptr(addr) \
- (void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr \
- + (unsigned long)__per_cpu_start)
-#define __pcpu_ptr_to_addr(ptr) \
- (void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr \
- - (unsigned long)__per_cpu_start)
#ifdef CONFIG_SMP
#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index efa615f2bf4..400331b50a5 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -233,8 +233,8 @@ proceed:
"%zu bytes\n", vm.addr, static_size);
ret = pcpu_setup_first_chunk(pcpur_get_page, static_size,
- PERCPU_FIRST_CHUNK_RESERVE,
- PMD_SIZE, dyn_size, vm.addr, NULL);
+ PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
+ PMD_SIZE, vm.addr, NULL);
goto out_free_ar;
enomem:
@@ -257,31 +257,13 @@ static ssize_t __init setup_pcpu_remap(size_t static_size)
* Embedding allocator
*
* The first chunk is sized to just contain the static area plus
- * module and dynamic reserves, and allocated as a contiguous area
- * using bootmem allocator and used as-is without being mapped into
- * vmalloc area. This enables the first chunk to piggy back on the
- * linear physical PMD mapping and doesn't add any additional pressure
- * to TLB. Note that if the needed size is smaller than the minimum
- * unit size, the leftover is returned to the bootmem allocator.
+ * module and dynamic reserves and embedded into linear physical
+ * mapping so that it can use PMD mapping without additional TLB
+ * pressure.
*/
-static void *pcpue_ptr __initdata;
-static size_t pcpue_size __initdata;
-static size_t pcpue_unit_size __initdata;
-
-static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
-{
- size_t off = (size_t)pageno << PAGE_SHIFT;
-
- if (off >= pcpue_size)
- return NULL;
-
- return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off);
-}
-
static ssize_t __init setup_pcpu_embed(size_t static_size)
{
- unsigned int cpu;
- size_t dyn_size;
+ size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
/*
* If large page isn't supported, there's no benefit in doing
@@ -291,33 +273,8 @@ static ssize_t __init setup_pcpu_embed(size_t static_size)
if (!cpu_has_pse || pcpu_need_numa())
return -EINVAL;
- /* allocate and copy */
- pcpue_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
- PERCPU_DYNAMIC_RESERVE);
- pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
- dyn_size = pcpue_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
-
- pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size,
- PAGE_SIZE);
- if (!pcpue_ptr)
- return -ENOMEM;
-
- for_each_possible_cpu(cpu) {
- void *ptr = pcpue_ptr + cpu * pcpue_unit_size;
-
- free_bootmem(__pa(ptr + pcpue_size),
- pcpue_unit_size - pcpue_size);
- memcpy(ptr, __per_cpu_load, static_size);
- }
-
- /* we're ready, commit */
- pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n",
- pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size);
-
- return pcpu_setup_first_chunk(pcpue_get_page, static_size,
- PERCPU_FIRST_CHUNK_RESERVE,
- pcpue_unit_size, dyn_size,
- pcpue_ptr, NULL);
+ return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
+ reserve - PERCPU_FIRST_CHUNK_RESERVE, -1);
}
/*
@@ -375,8 +332,8 @@ static ssize_t __init setup_pcpu_4k(size_t static_size)
pcpu4k_nr_static_pages, static_size);
ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
- PERCPU_FIRST_CHUNK_RESERVE, -1, -1, NULL,
- pcpu4k_populate_pte);
+ PERCPU_FIRST_CHUNK_RESERVE, -1,
+ -1, NULL, pcpu4k_populate_pte);
goto out_free_ar;
enomem:
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 54a968b4b92..ee5615d6521 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -107,10 +107,14 @@ typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr);
extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
size_t static_size, size_t reserved_size,
- ssize_t unit_size, ssize_t dyn_size,
+ ssize_t dyn_size, ssize_t unit_size,
void *base_addr,
pcpu_populate_pte_fn_t populate_pte_fn);
+extern ssize_t __init pcpu_embed_first_chunk(
+ size_t static_size, size_t reserved_size,
+ ssize_t dyn_size, ssize_t unit_size);
+
/*
* Use this to get to a cpu's version of the per-cpu object
* dynamically allocated. Non-atomic access to the current CPU's
diff --git a/kernel/sched.c b/kernel/sched.c
index 0a76d0b6f21..61e63562f27 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9599,10 +9599,11 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
cpu = task_cpu(tsk);
ca = task_ca(tsk);
- for (; ca; ca = ca->parent) {
+ do {
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
*cpuusage += cputime;
- }
+ ca = ca->parent;
+ } while (ca);
}
struct cgroup_subsys cpuacct_subsys = {
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c
index 3653c570232..1882923bc70 100644
--- a/mm/allocpercpu.c
+++ b/mm/allocpercpu.c
@@ -120,7 +120,7 @@ void *__alloc_percpu(size_t size, size_t align)
* on it. Larger alignment should only be used for module
* percpu sections on SMP for which this path isn't used.
*/
- WARN_ON_ONCE(align > __alignof__(unsigned long long));
+ WARN_ON_ONCE(align > SMP_CACHE_BYTES);
if (unlikely(!pdata))
return NULL;
diff --git a/mm/percpu.c b/mm/percpu.c
index bfe6a3afaf4..1aa5d8fbca1 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -46,7 +46,8 @@
* - define CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
*
* - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate
- * regular address to percpu pointer and back
+ * regular address to percpu pointer and back if they need to be
+ * different from the default
*
* - use pcpu_setup_first_chunk() during percpu area initialization to
* setup the first chunk containing the kernel static percpu area
@@ -67,11 +68,24 @@
#include <linux/workqueue.h>
#include <asm/cacheflush.h>
+#include <asm/sections.h>
#include <asm/tlbflush.h>
#define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */
#define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */
+/* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
+#ifndef __addr_to_pcpu_ptr
+#define __addr_to_pcpu_ptr(addr) \
+ (void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr \
+ + (unsigned long)__per_cpu_start)
+#endif
+#ifndef __pcpu_ptr_to_addr
+#define __pcpu_ptr_to_addr(ptr) \
+ (void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr \
+ - (unsigned long)__per_cpu_start)
+#endif
+
struct pcpu_chunk {
struct list_head list; /* linked to pcpu_slot lists */
struct rb_node rb_node; /* key is chunk->vm->addr */
@@ -1013,8 +1027,8 @@ EXPORT_SYMBOL_GPL(free_percpu);
* @get_page_fn: callback to fetch page pointer
* @static_size: the size of static percpu area in bytes
* @reserved_size: the size of reserved percpu area in bytes
- * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto
* @dyn_size: free size for dynamic allocation in bytes, -1 for auto
+ * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto
* @base_addr: mapped address, NULL for auto
* @populate_pte_fn: callback to allocate pagetable, NULL if unnecessary
*
@@ -1039,14 +1053,14 @@ EXPORT_SYMBOL_GPL(free_percpu);
* limited offset range for symbol relocations to guarantee module
* percpu symbols fall inside the relocatable range.
*
+ * @dyn_size, if non-negative, determines the number of bytes
+ * available for dynamic allocation in the first chunk. Specifying
+ * non-negative value makes percpu leave alone the area beyond
+ * @static_size + @reserved_size + @dyn_size.
+ *
* @unit_size, if non-negative, specifies unit size and must be
* aligned to PAGE_SIZE and equal to or larger than @static_size +
- * @reserved_size + @dyn_size.
- *
- * @dyn_size, if non-negative, limits the number of bytes available
- * for dynamic allocation in the first chunk. Specifying non-negative
- * value make percpu leave alone the area beyond @static_size +
- * @reserved_size + @dyn_size.
+ * @reserved_size + if non-negative, @dyn_size.
*
* Non-null @base_addr means that the caller already allocated virtual
* region for the first chunk and mapped it. percpu must not mess
@@ -1069,12 +1083,14 @@ EXPORT_SYMBOL_GPL(free_percpu);
*/
size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
size_t static_size, size_t reserved_size,
- ssize_t unit_size, ssize_t dyn_size,
+ ssize_t dyn_size, ssize_t unit_size,
void *base_addr,
pcpu_populate_pte_fn_t populate_pte_fn)
{
static struct vm_struct first_vm;
static int smap[2], dmap[2];
+ size_t size_sum = static_size + reserved_size +
+ (dyn_size >= 0 ? dyn_size : 0);
struct pcpu_chunk *schunk, *dchunk = NULL;
unsigned int cpu;
int nr_pages;
@@ -1085,20 +1101,18 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC);
BUG_ON(!static_size);
if (unit_size >= 0) {
- BUG_ON(unit_size < static_size + reserved_size +
- (dyn_size >= 0 ? dyn_size : 0));
+ BUG_ON(unit_size < size_sum);
BUG_ON(unit_size & ~PAGE_MASK);
- } else {
- BUG_ON(dyn_size >= 0);
+ BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE);
+ } else
BUG_ON(base_addr);
- }
BUG_ON(base_addr && populate_pte_fn);
if (unit_size >= 0)
pcpu_unit_pages = unit_size >> PAGE_SHIFT;
else
pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT,
- PFN_UP(static_size + reserved_size));
+ PFN_UP(size_sum));
pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size;
@@ -1224,3 +1238,89 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0);
return pcpu_unit_size;
}
+
+/*
+ * Embedding first chunk setup helper.
+ */
+static void *pcpue_ptr __initdata;
+static size_t pcpue_size __initdata;
+static size_t pcpue_unit_size __initdata;
+
+static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
+{
+ size_t off = (size_t)pageno << PAGE_SHIFT;
+
+ if (off >= pcpue_size)
+ return NULL;
+
+ return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off);
+}
+
+/**
+ * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
+ * @static_size: the size of static percpu area in bytes
+ * @reserved_size: the size of reserved percpu area in bytes
+ * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
+ * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto
+ *
+ * This is a helper to ease setting up embedded first percpu chunk and
+ * can be called where pcpu_setup_first_chunk() is expected.
+ *
+ * If this function is used to setup the first chunk, it is allocated
+ * as a contiguous area using bootmem allocator and used as-is without
+ * being mapped into vmalloc area. This enables the first chunk to
+ * piggy back on the linear physical mapping which often uses larger
+ * page size.
+ *
+ * When @dyn_size is positive, dynamic area might be larger than
+ * specified to fill page alignment. Also, when @dyn_size is auto,
+ * @dyn_size does not fill the whole first chunk but only what's
+ * necessary for page alignment after static and reserved areas.
+ *
+ * If the needed size is smaller than the minimum or specified unit
+ * size, the leftover is returned to the bootmem allocator.
+ *
+ * RETURNS:
+ * The determined pcpu_unit_size which can be used to initialize
+ * percpu access on success, -errno on failure.
+ */
+ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
+ ssize_t dyn_size, ssize_t unit_size)
+{
+ unsigned int cpu;
+
+ /* determine parameters and allocate */
+ pcpue_size = PFN_ALIGN(static_size + reserved_size +
+ (dyn_size >= 0 ? dyn_size : 0));
+ if (dyn_size != 0)
+ dyn_size = pcpue_size - static_size - reserved_size;
+
+ if (unit_size >= 0) {
+ BUG_ON(unit_size < pcpue_size);
+ pcpue_unit_size = unit_size;
+ } else
+ pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
+
+ pcpue_ptr = __alloc_bootmem_nopanic(
+ num_possible_cpus() * pcpue_unit_size,
+ PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+ if (!pcpue_ptr)
+ return -ENOMEM;
+
+ /* return the leftover and copy */
+ for_each_possible_cpu(cpu) {
+ void *ptr = pcpue_ptr + cpu * pcpue_unit_size;
+
+ free_bootmem(__pa(ptr + pcpue_size),
+ pcpue_unit_size - pcpue_size);
+ memcpy(ptr, __per_cpu_load, static_size);
+ }
+
+ /* we're ready, commit */
+ pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n",
+ pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size);
+
+ return pcpu_setup_first_chunk(pcpue_get_page, static_size,
+ reserved_size, dyn_size,
+ pcpue_unit_size, pcpue_ptr, NULL);
+}