From 2dbb51c49f4fecb8330e43247a0edfbc4b2b8974 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 23 Jul 2008 21:26:52 -0700 Subject: mm: make defensive checks around PFN values registered for memory usage There are a number of different views to how much memory is currently active. There is the arch-independent zone-sizing view, the bootmem allocator and memory models view. Architectures register this information at different times and is not necessarily in sync particularly with respect to some SPARSEMEM limitations. This patch introduces mminit_validate_memmodel_limits() which is able to validate and correct PFN ranges with respect to the memory model. It is only SPARSEMEM that currently validates itself. Signed-off-by: Mel Gorman Cc: Christoph Lameter Cc: Andy Whitcroft Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/bootmem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'mm/bootmem.c') diff --git a/mm/bootmem.c b/mm/bootmem.c index 8d9f60e06f6..9f4bbc5da73 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -91,6 +91,7 @@ static unsigned long __init init_bootmem_core(pg_data_t *pgdat, bootmem_data_t *bdata = pgdat->bdata; unsigned long mapsize; + mminit_validate_memmodel_limits(&start, &end); bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart)); bdata->node_boot_start = PFN_PHYS(start); bdata->node_low_pfn = end; -- cgit v1.2.3 From b61bfa3c462671c48a51fb5c31af337c5a996a04 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Jul 2008 21:26:55 -0700 Subject: mm: move bootmem descriptors definition to a single place There are a lot of places that define either a single bootmem descriptor or an array of them. Use only one central array with MAX_NUMNODES items instead. Signed-off-by: Johannes Weiner Acked-by: Ralf Baechle Cc: Ingo Molnar Cc: Richard Henderson Cc: Russell King Cc: Tony Luck Cc: Hirokazu Takata Cc: Geert Uytterhoeven Cc: Kyle McMartin Cc: Paul Mackerras Cc: Paul Mundt Cc: David S. Miller Cc: Yinghai Lu Cc: Christoph Lameter Cc: Mel Gorman Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/bootmem.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'mm/bootmem.c') diff --git a/mm/bootmem.c b/mm/bootmem.c index 9f4bbc5da73..35b3cb66703 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -36,6 +36,8 @@ static LIST_HEAD(bdata_list); unsigned long saved_max_pfn; #endif +bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata; + /* return the number of _pages_ that will be allocated for the boot bitmap */ unsigned long __init bootmem_bootmap_pages(unsigned long pages) { -- cgit v1.2.3 From 6b312c0e6e2f44b020e12953d1dd37eed60e3609 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Jul 2008 21:26:58 -0700 Subject: mm: fix free_all_bootmem_core alignment check The check for node_boot_start is bogus because we start freeing at the corresponding pfn. So check if the pfn is properly aligned instead in a more readable way and adjust the documentation. Also remove an unneeded accounting variable. Signed-off-by: Johannes Weiner Cc: Ingo Molnar Cc: Yinghai Lu Cc: Christoph Lameter Cc: Mel Gorman Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/bootmem.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'mm/bootmem.c') diff --git a/mm/bootmem.c b/mm/bootmem.c index 35b3cb66703..319a79bce7c 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -377,7 +377,7 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) struct page *page; unsigned long pfn; bootmem_data_t *bdata = pgdat->bdata; - unsigned long i, count, total = 0; + unsigned long i, count; unsigned long idx; unsigned long *map; int gofast = 0; @@ -389,10 +389,13 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) pfn = PFN_DOWN(bdata->node_boot_start); idx = bdata->node_low_pfn - pfn; map = bdata->node_bootmem_map; - /* Check physaddr is O(LOG2(BITS_PER_LONG)) page aligned */ - if (bdata->node_boot_start == 0 || - ffs(bdata->node_boot_start) - PAGE_SHIFT > ffs(BITS_PER_LONG)) + /* + * Check if we are aligned to BITS_PER_LONG pages. If so, we might + * be able to free page orders of that size at once. + */ + if (!(pfn & (BITS_PER_LONG-1))) gofast = 1; + for (i = 0; i < idx; ) { unsigned long v = ~map[i / BITS_PER_LONG]; @@ -420,23 +423,19 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) } pfn += BITS_PER_LONG; } - total += count; /* * Now free the allocator bitmap itself, it's not * needed anymore: */ page = virt_to_page(bdata->node_bootmem_map); - count = 0; idx = (get_mapsize(bdata) + PAGE_SIZE-1) >> PAGE_SHIFT; - for (i = 0; i < idx; i++, page++) { + for (i = 0; i < idx; i++, page++) __free_pages_bootmem(page, 0); - count++; - } - total += count; + count += i; bdata->node_bootmem_map = NULL; - return total; + return count; } unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn, -- cgit v1.2.3 From 8ae04463077324ed9f6b04ab3a5b17ae1ee4dd35 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Jul 2008 21:26:59 -0700 Subject: mm: normalize internal argument passing of bootmem data All _core functions only need the bootmem data, not the whole node descriptor. Adjust the two functions that take the node descriptor unneededly. Signed-off-by: Johannes Weiner Cc: Ingo Molnar Cc: Yinghai Lu Cc: Christoph Lameter Cc: Mel Gorman Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/bootmem.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'mm/bootmem.c') diff --git a/mm/bootmem.c b/mm/bootmem.c index 319a79bce7c..251c66c5d96 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -87,10 +87,9 @@ static unsigned long __init get_mapsize(bootmem_data_t *bdata) /* * Called once to set up the allocator itself. */ -static unsigned long __init init_bootmem_core(pg_data_t *pgdat, +static unsigned long __init init_bootmem_core(bootmem_data_t *bdata, unsigned long mapstart, unsigned long start, unsigned long end) { - bootmem_data_t *bdata = pgdat->bdata; unsigned long mapsize; mminit_validate_memmodel_limits(&start, &end); @@ -372,11 +371,10 @@ found: return ret; } -static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) +static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) { struct page *page; unsigned long pfn; - bootmem_data_t *bdata = pgdat->bdata; unsigned long i, count; unsigned long idx; unsigned long *map; @@ -441,7 +439,7 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn) { - return init_bootmem_core(pgdat, freepfn, startpfn, endpfn); + return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn); } int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, @@ -466,14 +464,14 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) { register_page_bootmem_info_node(pgdat); - return free_all_bootmem_core(pgdat); + return free_all_bootmem_core(pgdat->bdata); } unsigned long __init init_bootmem(unsigned long start, unsigned long pages) { max_low_pfn = pages; min_low_pfn = start; - return init_bootmem_core(NODE_DATA(0), start, 0, pages); + return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages); } #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE @@ -504,7 +502,7 @@ void __init free_bootmem(unsigned long addr, unsigned long size) unsigned long __init free_all_bootmem(void) { - return free_all_bootmem_core(NODE_DATA(0)); + return free_all_bootmem_core(NODE_DATA(0)->bdata); } void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, -- cgit v1.2.3 From ffc6421f0720f433b5b35b89ff56e998eabff93b Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Jul 2008 21:26:59 -0700 Subject: mm: unexport __alloc_bootmem_core() This function has no external callers, so unexport it. Also fix its naming inconsistency. Signed-off-by: Johannes Weiner Cc: Ingo Molnar Cc: Yinghai Lu Cc: Christoph Lameter Cc: Mel Gorman Cc: Andy Whitcroft Cc: Mel Gorman Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/bootmem.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'mm/bootmem.c') diff --git a/mm/bootmem.c b/mm/bootmem.c index 251c66c5d96..4bc6ae2fbaa 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -234,9 +234,9 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, * * NOTE: This function is _not_ reentrant. */ -void * __init -__alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, - unsigned long align, unsigned long goal, unsigned long limit) +static void * __init +alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, + unsigned long align, unsigned long goal, unsigned long limit) { unsigned long areasize, preferred; unsigned long i, start = 0, incr, eidx, end_pfn; @@ -245,7 +245,7 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, void *node_bootmem_map; if (!size) { - printk("__alloc_bootmem_core(): zero-sized request\n"); + printk("alloc_bootmem_core(): zero-sized request\n"); BUG(); } BUG_ON(align & (align-1)); @@ -512,7 +512,7 @@ void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, void *ptr; list_for_each_entry(bdata, &bdata_list, list) { - ptr = __alloc_bootmem_core(bdata, size, align, goal, 0); + ptr = alloc_bootmem_core(bdata, size, align, goal, 0); if (ptr) return ptr; } @@ -540,7 +540,7 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, { void *ptr; - ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); + ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); if (ptr) return ptr; @@ -559,8 +559,8 @@ void * __init alloc_bootmem_section(unsigned long size, goal = PFN_PHYS(pfn); limit = PFN_PHYS(section_nr_to_pfn(section_nr + 1)) - 1; pgdat = NODE_DATA(early_pfn_to_nid(pfn)); - ptr = __alloc_bootmem_core(pgdat->bdata, size, SMP_CACHE_BYTES, goal, - limit); + ptr = alloc_bootmem_core(pgdat->bdata, size, SMP_CACHE_BYTES, goal, + limit); if (!ptr) return NULL; @@ -589,8 +589,8 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, void *ptr; list_for_each_entry(bdata, &bdata_list, list) { - ptr = __alloc_bootmem_core(bdata, size, align, goal, - ARCH_LOW_ADDRESS_LIMIT); + ptr = alloc_bootmem_core(bdata, size, align, goal, + ARCH_LOW_ADDRESS_LIMIT); if (ptr) return ptr; } @@ -606,6 +606,6 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) { - return __alloc_bootmem_core(pgdat->bdata, size, align, goal, - ARCH_LOW_ADDRESS_LIMIT); + return alloc_bootmem_core(pgdat->bdata, size, align, goal, + ARCH_LOW_ADDRESS_LIMIT); } -- cgit v1.2.3 From b54bbf7b81170f03597c17dd0b559e3006bc9868 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 23 Jul 2008 21:27:45 -0700 Subject: mm: introduce non panic alloc_bootmem Straight forward variant of the existing __alloc_bootmem_node, only subsequent patch when allocating giant hugepages at boot -- don't want to panic if we can't allocate as many as the user asked for. Signed-off-by: Andi Kleen Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/bootmem.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'mm/bootmem.c') diff --git a/mm/bootmem.c b/mm/bootmem.c index 4bc6ae2fbaa..9ac972535ff 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -578,6 +578,18 @@ void * __init alloc_bootmem_section(unsigned long size, } #endif +void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, + unsigned long align, unsigned long goal) +{ + void *ptr; + + ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); + if (ptr) + return ptr; + + return __alloc_bootmem_nopanic(size, align, goal); +} + #ifndef ARCH_LOW_ADDRESS_LIMIT #define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL #endif -- cgit v1.2.3 From 223e8dc9249c9e15f6c8b638d73fcad78ccb0a88 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Jul 2008 21:28:00 -0700 Subject: bootmem: reorder code to match new bootmem structure This only reorders functions so that further patches will be easier to read. No code changed. Signed-off-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/bootmem.c | 356 +++++++++++++++++++++++++++++------------------------------ 1 file changed, 177 insertions(+), 179 deletions(-) (limited to 'mm/bootmem.c') diff --git a/mm/bootmem.c b/mm/bootmem.c index 9ac972535ff..24eacf52c50 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -38,6 +38,19 @@ unsigned long saved_max_pfn; bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata; +/* + * Given an initialised bdata, it returns the size of the boot bitmap + */ +static unsigned long __init get_mapsize(bootmem_data_t *bdata) +{ + unsigned long mapsize; + unsigned long start = PFN_DOWN(bdata->node_boot_start); + unsigned long end = bdata->node_low_pfn; + + mapsize = ((end - start) + 7) / 8; + return ALIGN(mapsize, sizeof(long)); +} + /* return the number of _pages_ that will be allocated for the boot bitmap */ unsigned long __init bootmem_bootmap_pages(unsigned long pages) { @@ -71,19 +84,6 @@ static void __init link_bootmem(bootmem_data_t *bdata) list_add_tail(&bdata->list, &bdata_list); } -/* - * Given an initialised bdata, it returns the size of the boot bitmap - */ -static unsigned long __init get_mapsize(bootmem_data_t *bdata) -{ - unsigned long mapsize; - unsigned long start = PFN_DOWN(bdata->node_boot_start); - unsigned long end = bdata->node_low_pfn; - - mapsize = ((end - start) + 7) / 8; - return ALIGN(mapsize, sizeof(long)); -} - /* * Called once to set up the allocator itself. */ @@ -108,6 +108,146 @@ static unsigned long __init init_bootmem_core(bootmem_data_t *bdata, return mapsize; } +unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn, + unsigned long startpfn, unsigned long endpfn) +{ + return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn); +} + +unsigned long __init init_bootmem(unsigned long start, unsigned long pages) +{ + max_low_pfn = pages; + min_low_pfn = start; + return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages); +} + +static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) +{ + struct page *page; + unsigned long pfn; + unsigned long i, count; + unsigned long idx; + unsigned long *map; + int gofast = 0; + + BUG_ON(!bdata->node_bootmem_map); + + count = 0; + /* first extant page of the node */ + pfn = PFN_DOWN(bdata->node_boot_start); + idx = bdata->node_low_pfn - pfn; + map = bdata->node_bootmem_map; + /* + * Check if we are aligned to BITS_PER_LONG pages. If so, we might + * be able to free page orders of that size at once. + */ + if (!(pfn & (BITS_PER_LONG-1))) + gofast = 1; + + for (i = 0; i < idx; ) { + unsigned long v = ~map[i / BITS_PER_LONG]; + + if (gofast && v == ~0UL) { + int order; + + page = pfn_to_page(pfn); + count += BITS_PER_LONG; + order = ffs(BITS_PER_LONG) - 1; + __free_pages_bootmem(page, order); + i += BITS_PER_LONG; + page += BITS_PER_LONG; + } else if (v) { + unsigned long m; + + page = pfn_to_page(pfn); + for (m = 1; m && i < idx; m<<=1, page++, i++) { + if (v & m) { + count++; + __free_pages_bootmem(page, 0); + } + } + } else { + i += BITS_PER_LONG; + } + pfn += BITS_PER_LONG; + } + + /* + * Now free the allocator bitmap itself, it's not + * needed anymore: + */ + page = virt_to_page(bdata->node_bootmem_map); + idx = (get_mapsize(bdata) + PAGE_SIZE-1) >> PAGE_SHIFT; + for (i = 0; i < idx; i++, page++) + __free_pages_bootmem(page, 0); + count += i; + bdata->node_bootmem_map = NULL; + + return count; +} + +unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) +{ + register_page_bootmem_info_node(pgdat); + return free_all_bootmem_core(pgdat->bdata); +} + +unsigned long __init free_all_bootmem(void) +{ + return free_all_bootmem_core(NODE_DATA(0)->bdata); +} + +static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, + unsigned long size) +{ + unsigned long sidx, eidx; + unsigned long i; + + BUG_ON(!size); + + /* out range */ + if (addr + size < bdata->node_boot_start || + PFN_DOWN(addr) > bdata->node_low_pfn) + return; + /* + * round down end of usable mem, partially free pages are + * considered reserved. + */ + + if (addr >= bdata->node_boot_start && addr < bdata->last_success) + bdata->last_success = addr; + + /* + * Round up to index to the range. + */ + if (PFN_UP(addr) > PFN_DOWN(bdata->node_boot_start)) + sidx = PFN_UP(addr) - PFN_DOWN(bdata->node_boot_start); + else + sidx = 0; + + eidx = PFN_DOWN(addr + size - bdata->node_boot_start); + if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start)) + eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); + + for (i = sidx; i < eidx; i++) { + if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map))) + BUG(); + } +} + +void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, + unsigned long size) +{ + free_bootmem_core(pgdat->bdata, physaddr, size); +} + +void __init free_bootmem(unsigned long addr, unsigned long size) +{ + bootmem_data_t *bdata; + list_for_each_entry(bdata, &bdata_list, list) + free_bootmem_core(bdata, addr, size); +} + /* * Marks a particular physical memory range as unallocatable. Usable RAM * might be used for boot-time allocations - or it might get added @@ -183,43 +323,36 @@ static void __init reserve_bootmem_core(bootmem_data_t *bdata, } } -static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, - unsigned long size) +int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, + unsigned long size, int flags) { - unsigned long sidx, eidx; - unsigned long i; - - BUG_ON(!size); - - /* out range */ - if (addr + size < bdata->node_boot_start || - PFN_DOWN(addr) > bdata->node_low_pfn) - return; - /* - * round down end of usable mem, partially free pages are - * considered reserved. - */ - - if (addr >= bdata->node_boot_start && addr < bdata->last_success) - bdata->last_success = addr; + int ret; - /* - * Round up to index to the range. - */ - if (PFN_UP(addr) > PFN_DOWN(bdata->node_boot_start)) - sidx = PFN_UP(addr) - PFN_DOWN(bdata->node_boot_start); - else - sidx = 0; + ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); + if (ret < 0) + return -ENOMEM; + reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); + return 0; +} - eidx = PFN_DOWN(addr + size - bdata->node_boot_start); - if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start)) - eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); +#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE +int __init reserve_bootmem(unsigned long addr, unsigned long size, + int flags) +{ + bootmem_data_t *bdata; + int ret; - for (i = sidx; i < eidx; i++) { - if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map))) - BUG(); + list_for_each_entry(bdata, &bdata_list, list) { + ret = can_reserve_bootmem_core(bdata, addr, size, flags); + if (ret < 0) + return ret; } + list_for_each_entry(bdata, &bdata_list, list) + reserve_bootmem_core(bdata, addr, size, flags); + + return 0; } +#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ /* * We 'merge' subsequent allocations to save space. We might 'lose' @@ -371,140 +504,6 @@ found: return ret; } -static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) -{ - struct page *page; - unsigned long pfn; - unsigned long i, count; - unsigned long idx; - unsigned long *map; - int gofast = 0; - - BUG_ON(!bdata->node_bootmem_map); - - count = 0; - /* first extant page of the node */ - pfn = PFN_DOWN(bdata->node_boot_start); - idx = bdata->node_low_pfn - pfn; - map = bdata->node_bootmem_map; - /* - * Check if we are aligned to BITS_PER_LONG pages. If so, we might - * be able to free page orders of that size at once. - */ - if (!(pfn & (BITS_PER_LONG-1))) - gofast = 1; - - for (i = 0; i < idx; ) { - unsigned long v = ~map[i / BITS_PER_LONG]; - - if (gofast && v == ~0UL) { - int order; - - page = pfn_to_page(pfn); - count += BITS_PER_LONG; - order = ffs(BITS_PER_LONG) - 1; - __free_pages_bootmem(page, order); - i += BITS_PER_LONG; - page += BITS_PER_LONG; - } else if (v) { - unsigned long m; - - page = pfn_to_page(pfn); - for (m = 1; m && i < idx; m<<=1, page++, i++) { - if (v & m) { - count++; - __free_pages_bootmem(page, 0); - } - } - } else { - i += BITS_PER_LONG; - } - pfn += BITS_PER_LONG; - } - - /* - * Now free the allocator bitmap itself, it's not - * needed anymore: - */ - page = virt_to_page(bdata->node_bootmem_map); - idx = (get_mapsize(bdata) + PAGE_SIZE-1) >> PAGE_SHIFT; - for (i = 0; i < idx; i++, page++) - __free_pages_bootmem(page, 0); - count += i; - bdata->node_bootmem_map = NULL; - - return count; -} - -unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn, - unsigned long startpfn, unsigned long endpfn) -{ - return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn); -} - -int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, - unsigned long size, int flags) -{ - int ret; - - ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); - if (ret < 0) - return -ENOMEM; - reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); - - return 0; -} - -void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, - unsigned long size) -{ - free_bootmem_core(pgdat->bdata, physaddr, size); -} - -unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) -{ - register_page_bootmem_info_node(pgdat); - return free_all_bootmem_core(pgdat->bdata); -} - -unsigned long __init init_bootmem(unsigned long start, unsigned long pages) -{ - max_low_pfn = pages; - min_low_pfn = start; - return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages); -} - -#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE -int __init reserve_bootmem(unsigned long addr, unsigned long size, - int flags) -{ - bootmem_data_t *bdata; - int ret; - - list_for_each_entry(bdata, &bdata_list, list) { - ret = can_reserve_bootmem_core(bdata, addr, size, flags); - if (ret < 0) - return ret; - } - list_for_each_entry(bdata, &bdata_list, list) - reserve_bootmem_core(bdata, addr, size, flags); - - return 0; -} -#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ - -void __init free_bootmem(unsigned long addr, unsigned long size) -{ - bootmem_data_t *bdata; - list_for_each_entry(bdata, &bdata_list, list) - free_bootmem_core(bdata, addr, size); -} - -unsigned long __init free_all_bootmem(void) -{ - return free_all_bootmem_core(NODE_DATA(0)->bdata); -} - void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal) { @@ -534,7 +533,6 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, return NULL; } - void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) { -- cgit v1.2.3 From 57cfc29efac6670355ee0e107c8dbae8237d406b Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Jul 2008 21:28:00 -0700 Subject: bootmem: clean up bootmem.c file header Change the description, move a misplaced comment about the allocator itself and add me to the list of copyright holders. Signed-off-by: Johannes Weiner Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/bootmem.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'mm/bootmem.c') diff --git a/mm/bootmem.c b/mm/bootmem.c index 24eacf52c50..286e12c536a 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -1,12 +1,12 @@ /* - * linux/mm/bootmem.c + * bootmem - A boot-time physical memory allocator and configurator * * Copyright (C) 1999 Ingo Molnar - * Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999 + * 1999 Kanoj Sarcar, SGI + * 2008 Johannes Weiner * - * simple boot-time physical memory area allocator and - * free memory collector. It's used to deal with reserved - * system memory and memory holes as well. + * Access to this subsystem has to be serialized externally (which is true + * for the boot process anyway). */ #include #include @@ -19,10 +19,6 @@ #include "internal.h" -/* - * Access to this subsystem has to be serialized externally. (this is - * true for the boot process anyway) - */ unsigned long max_low_pfn; unsigned long min_low_pfn; unsigned long max_pfn; -- cgit v1.2.3 From a66fd7daec1f40c1f0eac466f0da9206b615fe2a Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Jul 2008 21:28:01 -0700 Subject: bootmem: add documentation to API functions Signed-off-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/bootmem.c | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 149 insertions(+), 1 deletion(-) (limited to 'mm/bootmem.c') diff --git a/mm/bootmem.c b/mm/bootmem.c index 286e12c536a..105ad4cff2e 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -47,7 +47,10 @@ static unsigned long __init get_mapsize(bootmem_data_t *bdata) return ALIGN(mapsize, sizeof(long)); } -/* return the number of _pages_ that will be allocated for the boot bitmap */ +/** + * bootmem_bootmap_pages - calculate bitmap size in pages + * @pages: number of pages the bitmap has to represent + */ unsigned long __init bootmem_bootmap_pages(unsigned long pages) { unsigned long mapsize; @@ -104,12 +107,28 @@ static unsigned long __init init_bootmem_core(bootmem_data_t *bdata, return mapsize; } +/** + * init_bootmem_node - register a node as boot memory + * @pgdat: node to register + * @freepfn: pfn where the bitmap for this node is to be placed + * @startpfn: first pfn on the node + * @endpfn: first pfn after the node + * + * Returns the number of bytes needed to hold the bitmap for this node. + */ unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn) { return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn); } +/** + * init_bootmem - register boot memory + * @start: pfn where the bitmap is to be placed + * @pages: number of available physical pages + * + * Returns the number of bytes needed to hold the bitmap. + */ unsigned long __init init_bootmem(unsigned long start, unsigned long pages) { max_low_pfn = pages; @@ -182,12 +201,23 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) return count; } +/** + * free_all_bootmem_node - release a node's free pages to the buddy allocator + * @pgdat: node to be released + * + * Returns the number of pages actually released. + */ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) { register_page_bootmem_info_node(pgdat); return free_all_bootmem_core(pgdat->bdata); } +/** + * free_all_bootmem - release free pages to the buddy allocator + * + * Returns the number of pages actually released. + */ unsigned long __init free_all_bootmem(void) { return free_all_bootmem_core(NODE_DATA(0)->bdata); @@ -231,12 +261,32 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, } } +/** + * free_bootmem_node - mark a page range as usable + * @pgdat: node the range resides on + * @physaddr: starting address of the range + * @size: size of the range in bytes + * + * Partial pages will be considered reserved and left as they are. + * + * Only physical pages that actually reside on @pgdat are marked. + */ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, unsigned long size) { free_bootmem_core(pgdat->bdata, physaddr, size); } +/** + * free_bootmem - mark a page range as usable + * @addr: starting address of the range + * @size: size of the range in bytes + * + * Partial pages will be considered reserved and left as they are. + * + * All physical pages within the range are marked, no matter what + * node they reside on. + */ void __init free_bootmem(unsigned long addr, unsigned long size) { bootmem_data_t *bdata; @@ -319,6 +369,17 @@ static void __init reserve_bootmem_core(bootmem_data_t *bdata, } } +/** + * reserve_bootmem_node - mark a page range as reserved + * @pgdat: node the range resides on + * @physaddr: starting address of the range + * @size: size of the range in bytes + * @flags: reservation flags (see linux/bootmem.h) + * + * Partial pages will be reserved. + * + * Only physical pages that actually reside on @pgdat are marked. + */ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, unsigned long size, int flags) { @@ -332,6 +393,17 @@ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, } #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE +/** + * reserve_bootmem - mark a page range as usable + * @addr: starting address of the range + * @size: size of the range in bytes + * @flags: reservation flags (see linux/bootmem.h) + * + * Partial pages will be reserved. + * + * All physical pages within the range are marked, no matter what + * node they reside on. + */ int __init reserve_bootmem(unsigned long addr, unsigned long size, int flags) { @@ -500,6 +572,19 @@ found: return ret; } +/** + * __alloc_bootmem_nopanic - allocate boot memory without panicking + * @size: size of the request in bytes + * @align: alignment of the region + * @goal: preferred starting address of the region + * + * The goal is dropped if it can not be satisfied and the allocation will + * fall back to memory below @goal. + * + * Allocation may happen on any node in the system. + * + * Returns NULL on failure. + */ void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal) { @@ -514,6 +599,19 @@ void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, return NULL; } +/** + * __alloc_bootmem - allocate boot memory + * @size: size of the request in bytes + * @align: alignment of the region + * @goal: preferred starting address of the region + * + * The goal is dropped if it can not be satisfied and the allocation will + * fall back to memory below @goal. + * + * Allocation may happen on any node in the system. + * + * The function panics if the request can not be satisfied. + */ void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal) { @@ -529,6 +627,21 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, return NULL; } +/** + * __alloc_bootmem_node - allocate boot memory from a specific node + * @pgdat: node to allocate from + * @size: size of the request in bytes + * @align: alignment of the region + * @goal: preferred starting address of the region + * + * The goal is dropped if it can not be satisfied and the allocation will + * fall back to memory below @goal. + * + * Allocation may fall back to any node in the system if the specified node + * can not hold the requested memory. + * + * The function panics if the request can not be satisfied. + */ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) { @@ -542,6 +655,13 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, } #ifdef CONFIG_SPARSEMEM +/** + * alloc_bootmem_section - allocate boot memory from a specific section + * @size: size of the request in bytes + * @section_nr: sparse map section to allocate from + * + * Return NULL on failure. + */ void * __init alloc_bootmem_section(unsigned long size, unsigned long section_nr) { @@ -588,6 +708,19 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, #define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL #endif +/** + * __alloc_bootmem_low - allocate low boot memory + * @size: size of the request in bytes + * @align: alignment of the region + * @goal: preferred starting address of the region + * + * The goal is dropped if it can not be satisfied and the allocation will + * fall back to memory below @goal. + * + * Allocation may happen on any node in the system. + * + * The function panics if the request can not be satisfied. + */ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, unsigned long goal) { @@ -609,6 +742,21 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, return NULL; } +/** + * __alloc_bootmem_low_node - allocate low boot memory from a specific node + * @pgdat: node to allocate from + * @size: size of the request in bytes + * @align: alignment of the region + * @goal: preferred starting address of the region + * + * The goal is dropped if it can not be satisfied and the allocation will + * fall back to memory below @goal. + * + * Allocation may fall back to any node in the system if the specified node + * can not hold the requested memory. + * + * The function panics if the request can not be satisfied. + */ void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) { -- cgit v1.2.3 From 2e5237daf0cc3c8d87762f53f704dc54fa91dcf6 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Jul 2008 21:28:02 -0700 Subject: bootmem: add debugging framework Introduce the bootmem_debug kernel parameter that enables very verbose diagnostics regarding all range operations of bootmem as well as the initialization and release of nodes. [akpm@linux-foundation.org: fix printk warnings] Signed-off-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/bootmem.c | 51 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 7 deletions(-) (limited to 'mm/bootmem.c') diff --git a/mm/bootmem.c b/mm/bootmem.c index 105ad4cff2e..4e085ee1d98 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -34,6 +34,22 @@ unsigned long saved_max_pfn; bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata; +static int bootmem_debug; + +static int __init bootmem_debug_setup(char *buf) +{ + bootmem_debug = 1; + return 0; +} +early_param("bootmem_debug", bootmem_debug_setup); + +#define bdebug(fmt, args...) ({ \ + if (unlikely(bootmem_debug)) \ + printk(KERN_INFO \ + "bootmem::%s " fmt, \ + __FUNCTION__, ## args); \ +}) + /* * Given an initialised bdata, it returns the size of the boot bitmap */ @@ -104,6 +120,9 @@ static unsigned long __init init_bootmem_core(bootmem_data_t *bdata, mapsize = get_mapsize(bdata); memset(bdata->node_bootmem_map, 0xff, mapsize); + bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n", + bdata - bootmem_node_data, start, mapstart, end, mapsize); + return mapsize; } @@ -198,6 +217,8 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) count += i; bdata->node_bootmem_map = NULL; + bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count); + return count; } @@ -255,6 +276,10 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start)) eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); + bdebug("nid=%td start=%lx end=%lx\n", bdata - bootmem_node_data, + sidx + PFN_DOWN(bdata->node_boot_start), + eidx + PFN_DOWN(bdata->node_boot_start)); + for (i = sidx; i < eidx; i++) { if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map))) BUG(); @@ -360,13 +385,16 @@ static void __init reserve_bootmem_core(bootmem_data_t *bdata, if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start)) eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); - for (i = sidx; i < eidx; i++) { - if (test_and_set_bit(i, bdata->node_bootmem_map)) { -#ifdef CONFIG_DEBUG_BOOTMEM - printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE); -#endif - } - } + bdebug("nid=%td start=%lx end=%lx flags=%x\n", + bdata - bootmem_node_data, + sidx + PFN_DOWN(bdata->node_boot_start), + eidx + PFN_DOWN(bdata->node_boot_start), + flags); + + for (i = sidx; i < eidx; i++) + if (test_and_set_bit(i, bdata->node_bootmem_map)) + bdebug("hm, page %lx reserved twice.\n", + PFN_DOWN(bdata->node_boot_start) + i); } /** @@ -455,6 +483,10 @@ alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, if (!bdata->node_bootmem_map) return NULL; + bdebug("nid=%td size=%lx [%lu pages] align=%lx goal=%lx limit=%lx\n", + bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT, + align, goal, limit); + /* bdata->node_boot_start is supposed to be (12+6)bits alignment on x86_64 ? */ node_boot_start = bdata->node_boot_start; node_bootmem_map = bdata->node_bootmem_map; @@ -562,6 +594,11 @@ found: ret = phys_to_virt(start * PAGE_SIZE + node_boot_start); } + bdebug("nid=%td start=%lx end=%lx\n", + bdata - bootmem_node_data, + start + PFN_DOWN(bdata->node_boot_start), + start + areasize + PFN_DOWN(bdata->node_boot_start)); + /* * Reserve the area now: */ -- cgit v1.2.3 From df049a5f41a3b2eee2131221959e3b558ba7c705 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Jul 2008 21:28:02 -0700 Subject: bootmem: revisit bitmap size calculations Reincarnate get_mapsize as bootmap_bytes and implement bootmem_bootmap_pages on top of it. Adjust users of these helpers and make free_all_bootmem_core use bootmem_bootmap_pages instead of open-coding it. Signed-off-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/bootmem.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) (limited to 'mm/bootmem.c') diff --git a/mm/bootmem.c b/mm/bootmem.c index 4e085ee1d98..484849bfc8c 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -50,17 +50,11 @@ early_param("bootmem_debug", bootmem_debug_setup); __FUNCTION__, ## args); \ }) -/* - * Given an initialised bdata, it returns the size of the boot bitmap - */ -static unsigned long __init get_mapsize(bootmem_data_t *bdata) +static unsigned long __init bootmap_bytes(unsigned long pages) { - unsigned long mapsize; - unsigned long start = PFN_DOWN(bdata->node_boot_start); - unsigned long end = bdata->node_low_pfn; + unsigned long bytes = (pages + 7) / 8; - mapsize = ((end - start) + 7) / 8; - return ALIGN(mapsize, sizeof(long)); + return ALIGN(bytes, sizeof(long)); } /** @@ -69,13 +63,9 @@ static unsigned long __init get_mapsize(bootmem_data_t *bdata) */ unsigned long __init bootmem_bootmap_pages(unsigned long pages) { - unsigned long mapsize; - - mapsize = (pages+7)/8; - mapsize = (mapsize + ~PAGE_MASK) & PAGE_MASK; - mapsize >>= PAGE_SHIFT; + unsigned long bytes = bootmap_bytes(pages); - return mapsize; + return PAGE_ALIGN(bytes) >> PAGE_SHIFT; } /* @@ -117,7 +107,7 @@ static unsigned long __init init_bootmem_core(bootmem_data_t *bdata, * Initially all pages are reserved - setup_arch() has to * register free RAM areas explicitly. */ - mapsize = get_mapsize(bdata); + mapsize = bootmap_bytes(end - start); memset(bdata->node_bootmem_map, 0xff, mapsize); bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n", @@ -160,7 +150,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) struct page *page; unsigned long pfn; unsigned long i, count; - unsigned long idx; + unsigned long idx, pages; unsigned long *map; int gofast = 0; @@ -211,7 +201,8 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) * needed anymore: */ page = virt_to_page(bdata->node_bootmem_map); - idx = (get_mapsize(bdata) + PAGE_SIZE-1) >> PAGE_SHIFT; + pages = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); + idx = bootmem_bootmap_pages(pages); for (i = 0; i < idx; i++, page++) __free_pages_bootmem(page, 0); count += i; -- cgit v1.2.3 From 636cc40cb79f511d9caa27ef098a83e4fa4971fb Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Jul 2008 21:28:03 -0700 Subject: bootmem: revisit bootmem descriptor list handling link_bootmem handles an insertion of a new descriptor into the sorted list in more or less three explicit branches; empty list, insert in between and append. These cases can be expressed implicite. Also mark the sorted list as initdata as it can be thrown away after boot as well. Signed-off-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/bootmem.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'mm/bootmem.c') diff --git a/mm/bootmem.c b/mm/bootmem.c index 484849bfc8c..9da7d409781 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -23,7 +23,6 @@ unsigned long max_low_pfn; unsigned long min_low_pfn; unsigned long max_pfn; -static LIST_HEAD(bdata_list); #ifdef CONFIG_CRASH_DUMP /* * If we have booted due to a crash, max_pfn will be a very low value. We need @@ -34,6 +33,8 @@ unsigned long saved_max_pfn; bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata; +static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list); + static int bootmem_debug; static int __init bootmem_debug_setup(char *buf) @@ -73,20 +74,16 @@ unsigned long __init bootmem_bootmap_pages(unsigned long pages) */ static void __init link_bootmem(bootmem_data_t *bdata) { - bootmem_data_t *ent; + struct list_head *iter; - if (list_empty(&bdata_list)) { - list_add(&bdata->list, &bdata_list); - return; - } - /* insert in order */ - list_for_each_entry(ent, &bdata_list, list) { - if (bdata->node_boot_start < ent->node_boot_start) { - list_add_tail(&bdata->list, &ent->list); - return; - } + list_for_each(iter, &bdata_list) { + bootmem_data_t *ent; + + ent = list_entry(iter, bootmem_data_t, list); + if (bdata->node_boot_start < ent->node_boot_start) + break; } - list_add_tail(&bdata->list, &bdata_list); + list_add_tail(&bdata->list, iter); } /* -- cgit v1.2.3 From 41546c17418fba08ece978bad72a33072715b8f3 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Jul 2008 21:28:03 -0700 Subject: bootmem: clean up free_all_bootmem_core Rewrite the code in a more concise way using less variables. [akpm@linux-foundation.org: fix printk warnings] Signed-off-by: Johannes Weiner Cc: Ingo Molnar Cc: Yinghai Lu Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/bootmem.c | 83 ++++++++++++++++++++++++++++-------------------------------- 1 file changed, 38 insertions(+), 45 deletions(-) (limited to 'mm/bootmem.c') diff --git a/mm/bootmem.c b/mm/bootmem.c index 9da7d409781..300d126ec53 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -144,66 +144,59 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages) static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) { + int aligned; struct page *page; - unsigned long pfn; - unsigned long i, count; - unsigned long idx, pages; - unsigned long *map; - int gofast = 0; - - BUG_ON(!bdata->node_bootmem_map); - - count = 0; - /* first extant page of the node */ - pfn = PFN_DOWN(bdata->node_boot_start); - idx = bdata->node_low_pfn - pfn; - map = bdata->node_bootmem_map; + unsigned long start, end, pages, count = 0; + + if (!bdata->node_bootmem_map) + return 0; + + start = PFN_DOWN(bdata->node_boot_start); + end = bdata->node_low_pfn; + /* - * Check if we are aligned to BITS_PER_LONG pages. If so, we might - * be able to free page orders of that size at once. + * If the start is aligned to the machines wordsize, we might + * be able to free pages in bulks of that order. */ - if (!(pfn & (BITS_PER_LONG-1))) - gofast = 1; + aligned = !(start & (BITS_PER_LONG - 1)); + + bdebug("nid=%td start=%lx end=%lx aligned=%d\n", + bdata - bootmem_node_data, start, end, aligned); + + while (start < end) { + unsigned long *map, idx, vec; - for (i = 0; i < idx; ) { - unsigned long v = ~map[i / BITS_PER_LONG]; + map = bdata->node_bootmem_map; + idx = start - PFN_DOWN(bdata->node_boot_start); + vec = ~map[idx / BITS_PER_LONG]; - if (gofast && v == ~0UL) { - int order; + if (aligned && vec == ~0UL && start + BITS_PER_LONG < end) { + int order = ilog2(BITS_PER_LONG); - page = pfn_to_page(pfn); + __free_pages_bootmem(pfn_to_page(start), order); count += BITS_PER_LONG; - order = ffs(BITS_PER_LONG) - 1; - __free_pages_bootmem(page, order); - i += BITS_PER_LONG; - page += BITS_PER_LONG; - } else if (v) { - unsigned long m; - - page = pfn_to_page(pfn); - for (m = 1; m && i < idx; m<<=1, page++, i++) { - if (v & m) { - count++; + } else { + unsigned long off = 0; + + while (vec && off < BITS_PER_LONG) { + if (vec & 1) { + page = pfn_to_page(start + off); __free_pages_bootmem(page, 0); + count++; } + vec >>= 1; + off++; } - } else { - i += BITS_PER_LONG; } - pfn += BITS_PER_LONG; + start += BITS_PER_LONG; } - /* - * Now free the allocator bitmap itself, it's not - * needed anymore: - */ page = virt_to_page(bdata->node_bootmem_map); pages = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); - idx = bootmem_bootmap_pages(pages); - for (i = 0; i < idx; i++, page++) - __free_pages_bootmem(page, 0); - count += i; - bdata->node_bootmem_map = NULL; + pages = bootmem_bootmap_pages(pages); + count += pages; + while (pages--) + __free_pages_bootmem(page++, 0); bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count); -- cgit v1.2.3 From 5f2809e69c7128f86316048221cf45146f69a4a0 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Jul 2008 21:28:05 -0700 Subject: bootmem: clean up alloc_bootmem_core alloc_bootmem_core has become quite nasty to read over time. This is a clean rewrite that keeps the semantics. bdata->last_pos has been dropped. bdata->last_success has been renamed to hint_idx and it is now an index relative to the node's range. Since further block searching might start at this index, it is now set to the end of a succeeded allocation rather than its beginning. bdata->last_offset has been renamed to last_end_off to be more clear that it represents the ending address of the last allocation relative to the node. [y-goto@jp.fujitsu.com: fix new alloc_bootmem_core()] Signed-off-by: Johannes Weiner Signed-off-by: Yasunori Goto Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/bootmem.c | 212 +++++++++++++++++++++-------------------------------------- 1 file changed, 76 insertions(+), 136 deletions(-) (limited to 'mm/bootmem.c') diff --git a/mm/bootmem.c b/mm/bootmem.c index 300d126ec53..94ea612decc 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -242,8 +242,9 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, * considered reserved. */ - if (addr >= bdata->node_boot_start && addr < bdata->last_success) - bdata->last_success = addr; + if (addr >= bdata->node_boot_start && + PFN_DOWN(addr - bdata->node_boot_start) < bdata->hint_idx) + bdata->hint_idx = PFN_DOWN(addr - bdata->node_boot_start); /* * Round up to index to the range. @@ -431,36 +432,16 @@ int __init reserve_bootmem(unsigned long addr, unsigned long size, } #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ -/* - * We 'merge' subsequent allocations to save space. We might 'lose' - * some fraction of a page if allocations cannot be satisfied due to - * size constraints on boxes where there is physical RAM space - * fragmentation - in these cases (mostly large memory boxes) this - * is not a problem. - * - * On low memory boxes we get it right in 100% of the cases. - * - * alignment has to be a power of 2 value. - * - * NOTE: This function is _not_ reentrant. - */ -static void * __init -alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, - unsigned long align, unsigned long goal, unsigned long limit) +static void * __init alloc_bootmem_core(struct bootmem_data *bdata, + unsigned long size, unsigned long align, + unsigned long goal, unsigned long limit) { - unsigned long areasize, preferred; - unsigned long i, start = 0, incr, eidx, end_pfn; - void *ret; - unsigned long node_boot_start; - void *node_bootmem_map; - - if (!size) { - printk("alloc_bootmem_core(): zero-sized request\n"); - BUG(); - } - BUG_ON(align & (align-1)); + unsigned long min, max, start, sidx, midx, step; + + BUG_ON(!size); + BUG_ON(align & (align - 1)); + BUG_ON(limit && goal + size > limit); - /* on nodes without memory - bootmem_map is NULL */ if (!bdata->node_bootmem_map) return NULL; @@ -468,126 +449,85 @@ alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT, align, goal, limit); - /* bdata->node_boot_start is supposed to be (12+6)bits alignment on x86_64 ? */ - node_boot_start = bdata->node_boot_start; - node_bootmem_map = bdata->node_bootmem_map; - if (align) { - node_boot_start = ALIGN(bdata->node_boot_start, align); - if (node_boot_start > bdata->node_boot_start) - node_bootmem_map = (unsigned long *)bdata->node_bootmem_map + - PFN_DOWN(node_boot_start - bdata->node_boot_start)/BITS_PER_LONG; - } + min = PFN_DOWN(bdata->node_boot_start); + max = bdata->node_low_pfn; - if (limit && node_boot_start >= limit) + goal >>= PAGE_SHIFT; + limit >>= PAGE_SHIFT; + + if (limit && max > limit) + max = limit; + if (max <= min) return NULL; - end_pfn = bdata->node_low_pfn; - limit = PFN_DOWN(limit); - if (limit && end_pfn > limit) - end_pfn = limit; + step = max(align >> PAGE_SHIFT, 1UL); - eidx = end_pfn - PFN_DOWN(node_boot_start); + if (goal && min < goal && goal < max) + start = ALIGN(goal, step); + else + start = ALIGN(min, step); - /* - * We try to allocate bootmem pages above 'goal' - * first, then we try to allocate lower pages. - */ - preferred = 0; - if (goal && PFN_DOWN(goal) < end_pfn) { - if (goal > node_boot_start) - preferred = goal - node_boot_start; - - if (bdata->last_success > node_boot_start && - bdata->last_success - node_boot_start >= preferred) - if (!limit || (limit && limit > bdata->last_success)) - preferred = bdata->last_success - node_boot_start; - } + sidx = start - PFN_DOWN(bdata->node_boot_start); + midx = max - PFN_DOWN(bdata->node_boot_start); - preferred = PFN_DOWN(ALIGN(preferred, align)); - areasize = (size + PAGE_SIZE-1) / PAGE_SIZE; - incr = align >> PAGE_SHIFT ? : 1; + if (bdata->hint_idx > sidx) { + /* Make sure we retry on failure */ + goal = 1; + sidx = ALIGN(bdata->hint_idx, step); + } -restart_scan: - for (i = preferred; i < eidx;) { - unsigned long j; + while (1) { + int merge; + void *region; + unsigned long eidx, i, start_off, end_off; +find_block: + sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx); + sidx = ALIGN(sidx, step); + eidx = sidx + PFN_UP(size); - i = find_next_zero_bit(node_bootmem_map, eidx, i); - i = ALIGN(i, incr); - if (i >= eidx) + if (sidx >= midx || eidx > midx) break; - if (test_bit(i, node_bootmem_map)) { - i += incr; - continue; - } - for (j = i + 1; j < i + areasize; ++j) { - if (j >= eidx) - goto fail_block; - if (test_bit(j, node_bootmem_map)) - goto fail_block; - } - start = i; - goto found; - fail_block: - i = ALIGN(j, incr); - if (i == j) - i += incr; - } - - if (preferred > 0) { - preferred = 0; - goto restart_scan; - } - return NULL; -found: - bdata->last_success = PFN_PHYS(start) + node_boot_start; - BUG_ON(start >= eidx); + for (i = sidx; i < eidx; i++) + if (test_bit(i, bdata->node_bootmem_map)) { + sidx = ALIGN(i, step); + if (sidx == i) + sidx += step; + goto find_block; + } - /* - * Is the next page of the previous allocation-end the start - * of this allocation's buffer? If yes then we can 'merge' - * the previous partial page with this allocation. - */ - if (align < PAGE_SIZE && - bdata->last_offset && bdata->last_pos+1 == start) { - unsigned long offset, remaining_size; - offset = ALIGN(bdata->last_offset, align); - BUG_ON(offset > PAGE_SIZE); - remaining_size = PAGE_SIZE - offset; - if (size < remaining_size) { - areasize = 0; - /* last_pos unchanged */ - bdata->last_offset = offset + size; - ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + - offset + node_boot_start); - } else { - remaining_size = size - remaining_size; - areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE; - ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + - offset + node_boot_start); - bdata->last_pos = start + areasize - 1; - bdata->last_offset = remaining_size; - } - bdata->last_offset &= ~PAGE_MASK; - } else { - bdata->last_pos = start + areasize - 1; - bdata->last_offset = size & ~PAGE_MASK; - ret = phys_to_virt(start * PAGE_SIZE + node_boot_start); + if (bdata->last_end_off && + PFN_DOWN(bdata->last_end_off) + 1 == sidx) + start_off = ALIGN(bdata->last_end_off, align); + else + start_off = PFN_PHYS(sidx); + + merge = PFN_DOWN(start_off) < sidx; + end_off = start_off + size; + + bdata->last_end_off = end_off; + bdata->hint_idx = PFN_UP(end_off); + + /* + * Reserve the area now: + */ + for (i = PFN_DOWN(start_off) + merge; + i < PFN_UP(end_off); i++) + if (test_and_set_bit(i, bdata->node_bootmem_map)) + BUG(); + + region = phys_to_virt(bdata->node_boot_start + start_off); + memset(region, 0, size); + return region; } - bdebug("nid=%td start=%lx end=%lx\n", - bdata - bootmem_node_data, - start + PFN_DOWN(bdata->node_boot_start), - start + areasize + PFN_DOWN(bdata->node_boot_start)); + if (goal) { + goal = 0; + sidx = 0; + goto find_block; + } - /* - * Reserve the area now: - */ - for (i = start; i < start + areasize; i++) - if (unlikely(test_and_set_bit(i, node_bootmem_map))) - BUG(); - memset(ret, 0, size); - return ret; + return NULL; } /** -- cgit v1.2.3 From d747fa4bcebcf3696607b86a6b0dafa644be0676 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Jul 2008 21:28:05 -0700 Subject: bootmem: free/reserve helpers Factor out the common operation of marking a range on the bitmap. [akpm@linux-foundation.org: fix various warnings] Signed-off-by: Johannes Weiner Cc: Ingo Molnar Cc: Yinghai Lu Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/bootmem.c | 65 ++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 22 deletions(-) (limited to 'mm/bootmem.c') diff --git a/mm/bootmem.c b/mm/bootmem.c index 94ea612decc..9d03ff65135 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -225,6 +225,44 @@ unsigned long __init free_all_bootmem(void) return free_all_bootmem_core(NODE_DATA(0)->bdata); } +static void __init __free(bootmem_data_t *bdata, + unsigned long sidx, unsigned long eidx) +{ + unsigned long idx; + + bdebug("nid=%td start=%lx end=%lx\n", bdata - bootmem_node_data, + sidx + PFN_DOWN(bdata->node_boot_start), + eidx + PFN_DOWN(bdata->node_boot_start)); + + for (idx = sidx; idx < eidx; idx++) + if (!test_and_clear_bit(idx, bdata->node_bootmem_map)) + BUG(); +} + +static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx, + unsigned long eidx, int flags) +{ + unsigned long idx; + int exclusive = flags & BOOTMEM_EXCLUSIVE; + + bdebug("nid=%td start=%lx end=%lx flags=%x\n", + bdata - bootmem_node_data, + sidx + PFN_DOWN(bdata->node_boot_start), + eidx + PFN_DOWN(bdata->node_boot_start), + flags); + + for (idx = sidx; idx < eidx; idx++) + if (test_and_set_bit(idx, bdata->node_bootmem_map)) { + if (exclusive) { + __free(bdata, sidx, idx); + return -EBUSY; + } + bdebug("silent double reserve of PFN %lx\n", + idx + PFN_DOWN(bdata->node_boot_start)); + } + return 0; +} + static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size) { @@ -258,14 +296,7 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start)) eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); - bdebug("nid=%td start=%lx end=%lx\n", bdata - bootmem_node_data, - sidx + PFN_DOWN(bdata->node_boot_start), - eidx + PFN_DOWN(bdata->node_boot_start)); - - for (i = sidx; i < eidx; i++) { - if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map))) - BUG(); - } + __free(bdata, sidx, eidx); } /** @@ -367,16 +398,7 @@ static void __init reserve_bootmem_core(bootmem_data_t *bdata, if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start)) eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); - bdebug("nid=%td start=%lx end=%lx flags=%x\n", - bdata - bootmem_node_data, - sidx + PFN_DOWN(bdata->node_boot_start), - eidx + PFN_DOWN(bdata->node_boot_start), - flags); - - for (i = sidx; i < eidx; i++) - if (test_and_set_bit(i, bdata->node_bootmem_map)) - bdebug("hm, page %lx reserved twice.\n", - PFN_DOWN(bdata->node_boot_start) + i); + return __reserve(bdata, sidx, eidx, flags); } /** @@ -511,10 +533,9 @@ find_block: /* * Reserve the area now: */ - for (i = PFN_DOWN(start_off) + merge; - i < PFN_UP(end_off); i++) - if (test_and_set_bit(i, bdata->node_bootmem_map)) - BUG(); + if (__reserve(bdata, PFN_DOWN(start_off) + merge, + PFN_UP(end_off), BOOTMEM_EXCLUSIVE)) + BUG(); region = phys_to_virt(bdata->node_boot_start + start_off); memset(region, 0, size); -- cgit v1.2.3 From e2bf3cae515090fefe28329e71230dfe7ab873b1 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Jul 2008 21:28:06 -0700 Subject: bootmem: factor out the marking of a PFN range Introduce new helpers that mark a range that resides completely on a node or node-agnostic ranges that might also span node boundaries. The free/reserve API functions will then directly use these helpers. Note that the free/reserve semantics become more strict: while the prior code took basically arbitrary range arguments and marked the PFNs that happen to fall into that range, the new code requires node-specific ranges to be completely on the node. The node-agnostic requests might span node boundaries as long as the nodes are contiguous. Passing ranges that do not satisfy these criteria is a bug. [akpm@linux-foundation.org: fix printk warnings] Signed-off-by: Johannes Weiner Cc: Ingo Molnar Cc: Yinghai Lu Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/bootmem.c | 188 ++++++++++++++++++++++------------------------------------- 1 file changed, 69 insertions(+), 119 deletions(-) (limited to 'mm/bootmem.c') diff --git a/mm/bootmem.c b/mm/bootmem.c index 9d03ff65135..e5415a5414a 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -234,6 +234,9 @@ static void __init __free(bootmem_data_t *bdata, sidx + PFN_DOWN(bdata->node_boot_start), eidx + PFN_DOWN(bdata->node_boot_start)); + if (bdata->hint_idx > sidx) + bdata->hint_idx = sidx; + for (idx = sidx; idx < eidx; idx++) if (!test_and_clear_bit(idx, bdata->node_bootmem_map)) BUG(); @@ -263,40 +266,57 @@ static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx, return 0; } -static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, - unsigned long size) +static int __init mark_bootmem_node(bootmem_data_t *bdata, + unsigned long start, unsigned long end, + int reserve, int flags) { unsigned long sidx, eidx; - unsigned long i; - BUG_ON(!size); + bdebug("nid=%td start=%lx end=%lx reserve=%d flags=%x\n", + bdata - bootmem_node_data, start, end, reserve, flags); - /* out range */ - if (addr + size < bdata->node_boot_start || - PFN_DOWN(addr) > bdata->node_low_pfn) - return; - /* - * round down end of usable mem, partially free pages are - * considered reserved. - */ + BUG_ON(start < PFN_DOWN(bdata->node_boot_start)); + BUG_ON(end > bdata->node_low_pfn); - if (addr >= bdata->node_boot_start && - PFN_DOWN(addr - bdata->node_boot_start) < bdata->hint_idx) - bdata->hint_idx = PFN_DOWN(addr - bdata->node_boot_start); + sidx = start - PFN_DOWN(bdata->node_boot_start); + eidx = end - PFN_DOWN(bdata->node_boot_start); - /* - * Round up to index to the range. - */ - if (PFN_UP(addr) > PFN_DOWN(bdata->node_boot_start)) - sidx = PFN_UP(addr) - PFN_DOWN(bdata->node_boot_start); + if (reserve) + return __reserve(bdata, sidx, eidx, flags); else - sidx = 0; + __free(bdata, sidx, eidx); + return 0; +} + +static int __init mark_bootmem(unsigned long start, unsigned long end, + int reserve, int flags) +{ + unsigned long pos; + bootmem_data_t *bdata; + + pos = start; + list_for_each_entry(bdata, &bdata_list, list) { + int err; + unsigned long max; + + if (pos < PFN_DOWN(bdata->node_boot_start)) { + BUG_ON(pos != start); + continue; + } + + max = min(bdata->node_low_pfn, end); - eidx = PFN_DOWN(addr + size - bdata->node_boot_start); - if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start)) - eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); + err = mark_bootmem_node(bdata, pos, max, reserve, flags); + if (reserve && err) { + mark_bootmem(start, pos, 0, 0); + return err; + } - __free(bdata, sidx, eidx); + if (max == end) + return 0; + pos = bdata->node_low_pfn; + } + BUG(); } /** @@ -307,12 +327,17 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, * * Partial pages will be considered reserved and left as they are. * - * Only physical pages that actually reside on @pgdat are marked. + * The range must reside completely on the specified node. */ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, unsigned long size) { - free_bootmem_core(pgdat->bdata, physaddr, size); + unsigned long start, end; + + start = PFN_UP(physaddr); + end = PFN_DOWN(physaddr + size); + + mark_bootmem_node(pgdat->bdata, start, end, 0, 0); } /** @@ -322,83 +347,16 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, * * Partial pages will be considered reserved and left as they are. * - * All physical pages within the range are marked, no matter what - * node they reside on. + * The range must be contiguous but may span node boundaries. */ void __init free_bootmem(unsigned long addr, unsigned long size) { - bootmem_data_t *bdata; - list_for_each_entry(bdata, &bdata_list, list) - free_bootmem_core(bdata, addr, size); -} - -/* - * Marks a particular physical memory range as unallocatable. Usable RAM - * might be used for boot-time allocations - or it might get added - * to the free page pool later on. - */ -static int __init can_reserve_bootmem_core(bootmem_data_t *bdata, - unsigned long addr, unsigned long size, int flags) -{ - unsigned long sidx, eidx; - unsigned long i; - - BUG_ON(!size); - - /* out of range, don't hold other */ - if (addr + size < bdata->node_boot_start || - PFN_DOWN(addr) > bdata->node_low_pfn) - return 0; - - /* - * Round up to index to the range. - */ - if (addr > bdata->node_boot_start) - sidx= PFN_DOWN(addr - bdata->node_boot_start); - else - sidx = 0; - - eidx = PFN_UP(addr + size - bdata->node_boot_start); - if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start)) - eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); - - for (i = sidx; i < eidx; i++) { - if (test_bit(i, bdata->node_bootmem_map)) { - if (flags & BOOTMEM_EXCLUSIVE) - return -EBUSY; - } - } - - return 0; - -} - -static void __init reserve_bootmem_core(bootmem_data_t *bdata, - unsigned long addr, unsigned long size, int flags) -{ - unsigned long sidx, eidx; - unsigned long i; - - BUG_ON(!size); - - /* out of range */ - if (addr + size < bdata->node_boot_start || - PFN_DOWN(addr) > bdata->node_low_pfn) - return; - - /* - * Round up to index to the range. - */ - if (addr > bdata->node_boot_start) - sidx= PFN_DOWN(addr - bdata->node_boot_start); - else - sidx = 0; + unsigned long start, end; - eidx = PFN_UP(addr + size - bdata->node_boot_start); - if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start)) - eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); + start = PFN_UP(addr); + end = PFN_DOWN(addr + size); - return __reserve(bdata, sidx, eidx, flags); + mark_bootmem(start, end, 0, 0); } /** @@ -410,18 +368,17 @@ static void __init reserve_bootmem_core(bootmem_data_t *bdata, * * Partial pages will be reserved. * - * Only physical pages that actually reside on @pgdat are marked. + * The range must reside completely on the specified node. */ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, unsigned long size, int flags) { - int ret; + unsigned long start, end; - ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); - if (ret < 0) - return -ENOMEM; - reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); - return 0; + start = PFN_DOWN(physaddr); + end = PFN_UP(physaddr + size); + + return mark_bootmem_node(pgdat->bdata, start, end, 1, flags); } #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE @@ -433,24 +390,17 @@ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, * * Partial pages will be reserved. * - * All physical pages within the range are marked, no matter what - * node they reside on. + * The range must be contiguous but may span node boundaries. */ int __init reserve_bootmem(unsigned long addr, unsigned long size, int flags) { - bootmem_data_t *bdata; - int ret; + unsigned long start, end; - list_for_each_entry(bdata, &bdata_list, list) { - ret = can_reserve_bootmem_core(bdata, addr, size, flags); - if (ret < 0) - return ret; - } - list_for_each_entry(bdata, &bdata_list, list) - reserve_bootmem_core(bdata, addr, size, flags); + start = PFN_DOWN(addr); + end = PFN_UP(addr + size); - return 0; + return mark_bootmem(start, end, 1, flags); } #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ @@ -663,7 +613,7 @@ void * __init alloc_bootmem_section(unsigned long size, if (start_nr != section_nr || end_nr != section_nr) { printk(KERN_WARNING "alloc_bootmem failed on section %ld.\n", section_nr); - free_bootmem_core(pgdat->bdata, __pa(ptr), size); + free_bootmem_node(pgdat, __pa(ptr), size); ptr = NULL; } -- cgit v1.2.3 From 0f3caba211babef6e3fbde1ba76ddc79321bc92f Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Jul 2008 21:28:07 -0700 Subject: bootmem: respect goal more likely The old node-agnostic code tried allocating on all nodes starting from the one with the lowest range. alloc_bootmem_core retried without the goal if it could not satisfy it and so the goal was only respected at all when it happened to be on the first (lowest page numbers) node (or theoretically if allocations failed on all nodes before to the one holding the goal). Introduce a non-panicking helper that starts allocating from the node holding the goal and falls back only after all thes tries failed, thus moving the goal fallback code out of alloc_bootmem_core. Make all other allocation functions benefit from this new helper. Signed-off-by: Johannes Weiner Cc: Ingo Molnar Cc: Yinghai Lu Cc: Andi Kleen Cc: Yasunori Goto Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/bootmem.c | 92 +++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 54 insertions(+), 38 deletions(-) (limited to 'mm/bootmem.c') diff --git a/mm/bootmem.c b/mm/bootmem.c index e5415a5414a..89646f77b42 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -408,6 +408,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, unsigned long align, unsigned long goal, unsigned long limit) { + unsigned long fallback = 0; unsigned long min, max, start, sidx, midx, step; BUG_ON(!size); @@ -443,8 +444,11 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata, midx = max - PFN_DOWN(bdata->node_boot_start); if (bdata->hint_idx > sidx) { - /* Make sure we retry on failure */ - goal = 1; + /* + * Handle the valid case of sidx being zero and still + * catch the fallback below. + */ + fallback = sidx + 1; sidx = ALIGN(bdata->hint_idx, step); } @@ -492,10 +496,39 @@ find_block: return region; } + if (fallback) { + sidx = ALIGN(fallback - 1, step); + fallback = 0; + goto find_block; + } + + return NULL; +} + +static void * __init ___alloc_bootmem_nopanic(unsigned long size, + unsigned long align, + unsigned long goal, + unsigned long limit) +{ + bootmem_data_t *bdata; + +restart: + list_for_each_entry(bdata, &bdata_list, list) { + void *region; + + if (goal && bdata->node_low_pfn <= PFN_DOWN(goal)) + continue; + if (limit && bdata->node_boot_start >= limit) + break; + + region = alloc_bootmem_core(bdata, size, align, goal, limit); + if (region) + return region; + } + if (goal) { goal = 0; - sidx = 0; - goto find_block; + goto restart; } return NULL; @@ -515,16 +548,23 @@ find_block: * Returns NULL on failure. */ void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, - unsigned long goal) + unsigned long goal) { - bootmem_data_t *bdata; - void *ptr; + return ___alloc_bootmem_nopanic(size, align, goal, 0); +} - list_for_each_entry(bdata, &bdata_list, list) { - ptr = alloc_bootmem_core(bdata, size, align, goal, 0); - if (ptr) - return ptr; - } +static void * __init ___alloc_bootmem(unsigned long size, unsigned long align, + unsigned long goal, unsigned long limit) +{ + void *mem = ___alloc_bootmem_nopanic(size, align, goal, limit); + + if (mem) + return mem; + /* + * Whoops, we cannot satisfy the allocation request. + */ + printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size); + panic("Out of memory"); return NULL; } @@ -544,16 +584,7 @@ void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal) { - void *mem = __alloc_bootmem_nopanic(size,align,goal); - - if (mem) - return mem; - /* - * Whoops, we cannot satisfy the allocation request. - */ - printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size); - panic("Out of memory"); - return NULL; + return ___alloc_bootmem(size, align, goal, 0); } /** @@ -653,22 +684,7 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, unsigned long goal) { - bootmem_data_t *bdata; - void *ptr; - - list_for_each_entry(bdata, &bdata_list, list) { - ptr = alloc_bootmem_core(bdata, size, align, goal, - ARCH_LOW_ADDRESS_LIMIT); - if (ptr) - return ptr; - } - - /* - * Whoops, we cannot satisfy the allocation request. - */ - printk(KERN_ALERT "low bootmem alloc of %lu bytes failed!\n", size); - panic("Out of low memory"); - return NULL; + return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT); } /** -- cgit v1.2.3 From 4cc278b721d5bf3569dfc5f1100253042e097bc3 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Jul 2008 21:28:08 -0700 Subject: bootmem: Make __alloc_bootmem_low_node fall back to other nodes __alloc_bootmem_node already does this, make the interface consistent. Signed-off-by: Johannes Weiner Cc: Ingo Molnar Cc: Yinghai Lu Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/bootmem.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'mm/bootmem.c') diff --git a/mm/bootmem.c b/mm/bootmem.c index 89646f77b42..459da4710b8 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -587,6 +587,19 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, return ___alloc_bootmem(size, align, goal, 0); } +static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, + unsigned long size, unsigned long align, + unsigned long goal, unsigned long limit) +{ + void *ptr; + + ptr = alloc_bootmem_core(bdata, size, align, goal, limit); + if (ptr) + return ptr; + + return ___alloc_bootmem(size, align, goal, limit); +} + /** * __alloc_bootmem_node - allocate boot memory from a specific node * @pgdat: node to allocate from @@ -605,13 +618,7 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) { - void *ptr; - - ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); - if (ptr) - return ptr; - - return __alloc_bootmem(size, align, goal); + return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); } #ifdef CONFIG_SPARSEMEM @@ -705,6 +712,6 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) { - return alloc_bootmem_core(pgdat->bdata, size, align, goal, - ARCH_LOW_ADDRESS_LIMIT); + return ___alloc_bootmem_node(pgdat->bdata, size, align, + goal, ARCH_LOW_ADDRESS_LIMIT); } -- cgit v1.2.3 From 75a56cfe9fdb064d1db1cfbc564315fddb756fb1 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Jul 2008 21:28:09 -0700 Subject: bootmem: revisit alloc_bootmem_section Since alloc_bootmem_core does no goal-fallback anymore and just returns NULL if the allocation fails, we might now use it in alloc_bootmem_section without all the fixup code for a misplaced allocation. Also, the limit can be the first PFN of the next section as the semantics is that the limit is _above_ the allocated region, not within. Signed-off-by: Johannes Weiner Cc: Yasunori Goto Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/bootmem.c | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) (limited to 'mm/bootmem.c') diff --git a/mm/bootmem.c b/mm/bootmem.c index 459da4710b8..282b786c2b1 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -632,30 +632,15 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, void * __init alloc_bootmem_section(unsigned long size, unsigned long section_nr) { - void *ptr; - unsigned long limit, goal, start_nr, end_nr, pfn; - struct pglist_data *pgdat; + bootmem_data_t *bdata; + unsigned long pfn, goal, limit; pfn = section_nr_to_pfn(section_nr); - goal = PFN_PHYS(pfn); - limit = PFN_PHYS(section_nr_to_pfn(section_nr + 1)) - 1; - pgdat = NODE_DATA(early_pfn_to_nid(pfn)); - ptr = alloc_bootmem_core(pgdat->bdata, size, SMP_CACHE_BYTES, goal, - limit); - - if (!ptr) - return NULL; - - start_nr = pfn_to_section_nr(PFN_DOWN(__pa(ptr))); - end_nr = pfn_to_section_nr(PFN_DOWN(__pa(ptr) + size)); - if (start_nr != section_nr || end_nr != section_nr) { - printk(KERN_WARNING "alloc_bootmem failed on section %ld.\n", - section_nr); - free_bootmem_node(pgdat, __pa(ptr), size); - ptr = NULL; - } + goal = pfn << PAGE_SHIFT; + limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT; + bdata = &bootmem_node_data[early_pfn_to_nid(pfn)]; - return ptr; + return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit); } #endif -- cgit v1.2.3 From 3560e249abda6bee41a07a7bf0383a6e193e2839 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Jul 2008 21:28:09 -0700 Subject: bootmem: replace node_boot_start in struct bootmem_data Almost all users of this field need a PFN instead of a physical address, so replace node_boot_start with node_min_pfn. [Lee.Schermerhorn@hp.com: fix spurious BUG_ON() in mark_bootmem()] Signed-off-by: Johannes Weiner Cc: Signed-off-by: Lee Schermerhorn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/bootmem.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) (limited to 'mm/bootmem.c') diff --git a/mm/bootmem.c b/mm/bootmem.c index 282b786c2b1..4af15d0340a 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -80,7 +80,7 @@ static void __init link_bootmem(bootmem_data_t *bdata) bootmem_data_t *ent; ent = list_entry(iter, bootmem_data_t, list); - if (bdata->node_boot_start < ent->node_boot_start) + if (bdata->node_min_pfn < ent->node_min_pfn) break; } list_add_tail(&bdata->list, iter); @@ -96,7 +96,7 @@ static unsigned long __init init_bootmem_core(bootmem_data_t *bdata, mminit_validate_memmodel_limits(&start, &end); bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart)); - bdata->node_boot_start = PFN_PHYS(start); + bdata->node_min_pfn = start; bdata->node_low_pfn = end; link_bootmem(bdata); @@ -151,7 +151,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) if (!bdata->node_bootmem_map) return 0; - start = PFN_DOWN(bdata->node_boot_start); + start = bdata->node_min_pfn; end = bdata->node_low_pfn; /* @@ -167,7 +167,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) unsigned long *map, idx, vec; map = bdata->node_bootmem_map; - idx = start - PFN_DOWN(bdata->node_boot_start); + idx = start - bdata->node_min_pfn; vec = ~map[idx / BITS_PER_LONG]; if (aligned && vec == ~0UL && start + BITS_PER_LONG < end) { @@ -192,7 +192,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) } page = virt_to_page(bdata->node_bootmem_map); - pages = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); + pages = bdata->node_low_pfn - bdata->node_min_pfn; pages = bootmem_bootmap_pages(pages); count += pages; while (pages--) @@ -231,8 +231,8 @@ static void __init __free(bootmem_data_t *bdata, unsigned long idx; bdebug("nid=%td start=%lx end=%lx\n", bdata - bootmem_node_data, - sidx + PFN_DOWN(bdata->node_boot_start), - eidx + PFN_DOWN(bdata->node_boot_start)); + sidx + bdata->node_min_pfn, + eidx + bdata->node_min_pfn); if (bdata->hint_idx > sidx) bdata->hint_idx = sidx; @@ -250,8 +250,8 @@ static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx, bdebug("nid=%td start=%lx end=%lx flags=%x\n", bdata - bootmem_node_data, - sidx + PFN_DOWN(bdata->node_boot_start), - eidx + PFN_DOWN(bdata->node_boot_start), + sidx + bdata->node_min_pfn, + eidx + bdata->node_min_pfn, flags); for (idx = sidx; idx < eidx; idx++) @@ -261,7 +261,7 @@ static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx, return -EBUSY; } bdebug("silent double reserve of PFN %lx\n", - idx + PFN_DOWN(bdata->node_boot_start)); + idx + bdata->node_min_pfn); } return 0; } @@ -275,11 +275,11 @@ static int __init mark_bootmem_node(bootmem_data_t *bdata, bdebug("nid=%td start=%lx end=%lx reserve=%d flags=%x\n", bdata - bootmem_node_data, start, end, reserve, flags); - BUG_ON(start < PFN_DOWN(bdata->node_boot_start)); + BUG_ON(start < bdata->node_min_pfn); BUG_ON(end > bdata->node_low_pfn); - sidx = start - PFN_DOWN(bdata->node_boot_start); - eidx = end - PFN_DOWN(bdata->node_boot_start); + sidx = start - bdata->node_min_pfn; + eidx = end - bdata->node_min_pfn; if (reserve) return __reserve(bdata, sidx, eidx, flags); @@ -299,7 +299,8 @@ static int __init mark_bootmem(unsigned long start, unsigned long end, int err; unsigned long max; - if (pos < PFN_DOWN(bdata->node_boot_start)) { + if (pos < bdata->node_min_pfn || + pos >= bdata->node_low_pfn) { BUG_ON(pos != start); continue; } @@ -422,7 +423,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata, bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT, align, goal, limit); - min = PFN_DOWN(bdata->node_boot_start); + min = bdata->node_min_pfn; max = bdata->node_low_pfn; goal >>= PAGE_SHIFT; @@ -440,8 +441,8 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata, else start = ALIGN(min, step); - sidx = start - PFN_DOWN(bdata->node_boot_start); - midx = max - PFN_DOWN(bdata->node_boot_start); + sidx = start - bdata->node_min_pfn;; + midx = max - bdata->node_min_pfn; if (bdata->hint_idx > sidx) { /* @@ -491,7 +492,8 @@ find_block: PFN_UP(end_off), BOOTMEM_EXCLUSIVE)) BUG(); - region = phys_to_virt(bdata->node_boot_start + start_off); + region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) + + start_off); memset(region, 0, size); return region; } @@ -518,7 +520,7 @@ restart: if (goal && bdata->node_low_pfn <= PFN_DOWN(goal)) continue; - if (limit && bdata->node_boot_start >= limit) + if (limit && bdata->node_min_pfn >= PFN_DOWN(limit)) break; region = alloc_bootmem_core(bdata, size, align, goal, limit); -- cgit v1.2.3