From e123dd3f0ec1664576456ea1ea045591a0a95f0c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 13 Apr 2008 11:51:06 -0700 Subject: mm: make mem_map allocation continuous vmemmap allocation currently has this layout: [ffffe20000000000-ffffe200001fffff] PMD ->ffff810001400000 on node 0 [ffffe20000200000-ffffe200003fffff] PMD ->ffff810001800000 on node 0 [ffffe20000400000-ffffe200005fffff] PMD ->ffff810001c00000 on node 0 [ffffe20000600000-ffffe200007fffff] PMD ->ffff810002000000 on node 0 [ffffe20000800000-ffffe200009fffff] PMD ->ffff810002400000 on node 0 ... note that there is a 2M hole between them - not optimal. the root cause is that usemap (24 bytes) will be allocated after every 2M mem_map, and it will push next vmemmap (2M) to the next (2M) alignment. solution: try to allocate the mem_map continously. after the patch, we get: [ffffe20000000000-ffffe200001fffff] PMD ->ffff810001400000 on node 0 [ffffe20000200000-ffffe200003fffff] PMD ->ffff810001600000 on node 0 [ffffe20000400000-ffffe200005fffff] PMD ->ffff810001800000 on node 0 [ffffe20000600000-ffffe200007fffff] PMD ->ffff810001a00000 on node 0 [ffffe20000800000-ffffe200009fffff] PMD ->ffff810001c00000 on node 0 ... which is the ideal layout. and usemap will share a page because of they are allocated continuously too: sparse_early_usemap_alloc: usemap = ffff810024e00000 size = 24 sparse_early_usemap_alloc: usemap = ffff810024e00080 size = 24 sparse_early_usemap_alloc: usemap = ffff810024e00100 size = 24 sparse_early_usemap_alloc: usemap = ffff810024e00180 size = 24 ... so we make the bootmem allocation more compact and use less memory for usemap => mission accomplished ;-) Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- mm/sparse.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/mm/sparse.c b/mm/sparse.c index 98d6b39c347..458109b99e6 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -304,22 +304,48 @@ void __init sparse_init(void) unsigned long pnum; struct page *map; unsigned long *usemap; + unsigned long **usemap_map; + int size; + + /* + * map is using big page (aka 2M in x86 64 bit) + * usemap is less one page (aka 24 bytes) + * so alloc 2M (with 2M align) and 24 bytes in turn will + * make next 2M slip to one more 2M later. + * then in big system, the memory will have a lot of holes... + * here try to allocate 2M pages continously. + * + * powerpc need to call sparse_init_one_section right after each + * sparse_early_mem_map_alloc, so allocate usemap_map at first. + */ + size = sizeof(unsigned long *) * NR_MEM_SECTIONS; + usemap_map = alloc_bootmem(size); + if (!usemap_map) + panic("can not allocate usemap_map\n"); for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { if (!present_section_nr(pnum)) continue; + usemap_map[pnum] = sparse_early_usemap_alloc(pnum); + } - map = sparse_early_mem_map_alloc(pnum); - if (!map) + for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { + if (!present_section_nr(pnum)) continue; - usemap = sparse_early_usemap_alloc(pnum); + usemap = usemap_map[pnum]; if (!usemap) continue; + map = sparse_early_mem_map_alloc(pnum); + if (!map) + continue; + sparse_init_one_section(__nr_to_section(pnum), pnum, map, usemap); } + + free_bootmem(__pa(usemap_map), size); } #ifdef CONFIG_MEMORY_HOTPLUG -- cgit v1.2.3 From ad09315cad17458e51c7f1f8b371cb942c54b955 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 10 Mar 2008 23:23:42 -0700 Subject: mm: fix alloc_bootmem_core to use fast searching for all nodes Make the nodes other than node 0 use bdata->last_success for fast search too. We need to use __alloc_bootmem_core() for vmemmap allocation for other nodes when numa and sparsemem/vmemmap are enabled. Also, make fail_block path increase i with incr only after ALIGN to avoid extra increase when size is larger than align. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- mm/bootmem.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/mm/bootmem.c b/mm/bootmem.c index 2ccea700968..3c012fb5874 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -238,28 +238,32 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, * We try to allocate bootmem pages above 'goal' * first, then we try to allocate lower pages. */ - if (goal && goal >= bdata->node_boot_start && PFN_DOWN(goal) < end_pfn) { - preferred = goal - bdata->node_boot_start; + preferred = 0; + if (goal && PFN_DOWN(goal) < end_pfn) { + if (goal > bdata->node_boot_start) + preferred = goal - bdata->node_boot_start; if (bdata->last_success >= preferred) if (!limit || (limit && limit > bdata->last_success)) preferred = bdata->last_success; - } else - preferred = 0; + } preferred = PFN_DOWN(ALIGN(preferred, align)) + offset; areasize = (size + PAGE_SIZE-1) / PAGE_SIZE; incr = align >> PAGE_SHIFT ? : 1; restart_scan: - for (i = preferred; i < eidx; i += incr) { + for (i = preferred; i < eidx;) { unsigned long j; + i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i); i = ALIGN(i, incr); if (i >= eidx) break; - if (test_bit(i, bdata->node_bootmem_map)) + if (test_bit(i, bdata->node_bootmem_map)) { + i += incr; continue; + } for (j = i + 1; j < i + areasize; ++j) { if (j >= eidx) goto fail_block; @@ -270,6 +274,8 @@ restart_scan: goto found; fail_block: i = ALIGN(j, incr); + if (i == j) + i += incr; } if (preferred > offset) { -- cgit v1.2.3 From 9a2dc04cf070ee98e014a172695782ff42015fc4 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 18 Mar 2008 12:44:48 -0700 Subject: mm: offset align in alloc_bootmem() need offset alignment when node_boot_start's alignment is less than the alignment required. use local node_boot_start to match alignment - so don't add extra operation in search loop. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- mm/bootmem.c | 60 ++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/mm/bootmem.c b/mm/bootmem.c index 3c012fb5874..0f30bc873ec 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -206,9 +206,11 @@ void * __init __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, unsigned long align, unsigned long goal, unsigned long limit) { - unsigned long offset, remaining_size, areasize, preferred; + unsigned long areasize, preferred; unsigned long i, start = 0, incr, eidx, end_pfn; void *ret; + unsigned long node_boot_start; + void *node_bootmem_map; if (!size) { printk("__alloc_bootmem_core(): zero-sized request\n"); @@ -216,23 +218,29 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, } BUG_ON(align & (align-1)); - if (limit && bdata->node_boot_start >= limit) - return NULL; - /* on nodes without memory - bootmem_map is NULL */ if (!bdata->node_bootmem_map) return NULL; + /* bdata->node_boot_start is supposed to be (12+6)bits alignment on x86_64 ? */ + node_boot_start = bdata->node_boot_start; + node_bootmem_map = bdata->node_bootmem_map; + if (align) { + node_boot_start = ALIGN(bdata->node_boot_start, align); + if (node_boot_start > bdata->node_boot_start) + node_bootmem_map = (unsigned long *)bdata->node_bootmem_map + + PFN_DOWN(node_boot_start - bdata->node_boot_start)/BITS_PER_LONG; + } + + if (limit && node_boot_start >= limit) + return NULL; + end_pfn = bdata->node_low_pfn; limit = PFN_DOWN(limit); if (limit && end_pfn > limit) end_pfn = limit; - eidx = end_pfn - PFN_DOWN(bdata->node_boot_start); - offset = 0; - if (align && (bdata->node_boot_start & (align - 1UL)) != 0) - offset = align - (bdata->node_boot_start & (align - 1UL)); - offset = PFN_DOWN(offset); + eidx = end_pfn - PFN_DOWN(node_boot_start); /* * We try to allocate bootmem pages above 'goal' @@ -240,15 +248,16 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, */ preferred = 0; if (goal && PFN_DOWN(goal) < end_pfn) { - if (goal > bdata->node_boot_start) - preferred = goal - bdata->node_boot_start; + if (goal > node_boot_start) + preferred = goal - node_boot_start; - if (bdata->last_success >= preferred) + if (bdata->last_success > node_boot_start && + bdata->last_success - node_boot_start >= preferred) if (!limit || (limit && limit > bdata->last_success)) - preferred = bdata->last_success; + preferred = bdata->last_success - node_boot_start; } - preferred = PFN_DOWN(ALIGN(preferred, align)) + offset; + preferred = PFN_DOWN(ALIGN(preferred, align)); areasize = (size + PAGE_SIZE-1) / PAGE_SIZE; incr = align >> PAGE_SHIFT ? : 1; @@ -256,18 +265,18 @@ restart_scan: for (i = preferred; i < eidx;) { unsigned long j; - i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i); + i = find_next_zero_bit(node_bootmem_map, eidx, i); i = ALIGN(i, incr); if (i >= eidx) break; - if (test_bit(i, bdata->node_bootmem_map)) { + if (test_bit(i, node_bootmem_map)) { i += incr; continue; } for (j = i + 1; j < i + areasize; ++j) { if (j >= eidx) goto fail_block; - if (test_bit(j, bdata->node_bootmem_map)) + if (test_bit(j, node_bootmem_map)) goto fail_block; } start = i; @@ -278,14 +287,14 @@ restart_scan: i += incr; } - if (preferred > offset) { - preferred = offset; + if (preferred > 0) { + preferred = 0; goto restart_scan; } return NULL; found: - bdata->last_success = PFN_PHYS(start); + bdata->last_success = PFN_PHYS(start) + node_boot_start; BUG_ON(start >= eidx); /* @@ -295,6 +304,7 @@ found: */ if (align < PAGE_SIZE && bdata->last_offset && bdata->last_pos+1 == start) { + unsigned long offset, remaining_size; offset = ALIGN(bdata->last_offset, align); BUG_ON(offset > PAGE_SIZE); remaining_size = PAGE_SIZE - offset; @@ -303,14 +313,12 @@ found: /* last_pos unchanged */ bdata->last_offset = offset + size; ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + - offset + - bdata->node_boot_start); + offset + node_boot_start); } else { remaining_size = size - remaining_size; areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE; ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + - offset + - bdata->node_boot_start); + offset + node_boot_start); bdata->last_pos = start + areasize - 1; bdata->last_offset = remaining_size; } @@ -318,14 +326,14 @@ found: } else { bdata->last_pos = start + areasize - 1; bdata->last_offset = size & ~PAGE_MASK; - ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start); + ret = phys_to_virt(start * PAGE_SIZE + node_boot_start); } /* * Reserve the area now: */ for (i = start; i < start + areasize; i++) - if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map))) + if (unlikely(test_and_set_bit(i, node_bootmem_map))) BUG(); memset(ret, 0, size); return ret; -- cgit v1.2.3 From a5645a61b3b7e7d7de15e1a642ead600150ce94d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 18 Mar 2008 12:49:12 -0700 Subject: mm: allow reserve_bootmem() cross nodes split reserve_bootmem_core() into two functions, one which checks conflicts, and one which sets the bits. and make reserve_bootmem to loop bdata_list to cross the nodes. user could be crashkernel and ramdisk..., in case the range provided by those externalities crosses the nodes. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- mm/bootmem.c | 92 +++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 69 insertions(+), 23 deletions(-) diff --git a/mm/bootmem.c b/mm/bootmem.c index 0f30bc873ec..b6791646143 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -111,44 +111,74 @@ static unsigned long __init init_bootmem_core(pg_data_t *pgdat, * might be used for boot-time allocations - or it might get added * to the free page pool later on. */ -static int __init reserve_bootmem_core(bootmem_data_t *bdata, +static int __init can_reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size, int flags) { unsigned long sidx, eidx; unsigned long i; - int ret; + + BUG_ON(!size); + + /* out of range, don't hold other */ + if (addr + size < bdata->node_boot_start || + PFN_DOWN(addr) > bdata->node_low_pfn) + return 0; /* - * round up, partially reserved pages are considered - * fully reserved. + * Round up to index to the range. */ + if (addr > bdata->node_boot_start) + sidx= PFN_DOWN(addr - bdata->node_boot_start); + else + sidx = 0; + + eidx = PFN_UP(addr + size - bdata->node_boot_start); + if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start)) + eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); + + for (i = sidx; i < eidx; i++) { + if (test_bit(i, bdata->node_bootmem_map)) { + if (flags & BOOTMEM_EXCLUSIVE) + return -EBUSY; + } + } + + return 0; + +} + +static void __init reserve_bootmem_core(bootmem_data_t *bdata, + unsigned long addr, unsigned long size, int flags) +{ + unsigned long sidx, eidx; + unsigned long i; + BUG_ON(!size); - BUG_ON(PFN_DOWN(addr) >= bdata->node_low_pfn); - BUG_ON(PFN_UP(addr + size) > bdata->node_low_pfn); - BUG_ON(addr < bdata->node_boot_start); - sidx = PFN_DOWN(addr - bdata->node_boot_start); + /* out of range */ + if (addr + size < bdata->node_boot_start || + PFN_DOWN(addr) > bdata->node_low_pfn) + return; + + /* + * Round up to index to the range. + */ + if (addr > bdata->node_boot_start) + sidx= PFN_DOWN(addr - bdata->node_boot_start); + else + sidx = 0; + eidx = PFN_UP(addr + size - bdata->node_boot_start); + if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start)) + eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); - for (i = sidx; i < eidx; i++) + for (i = sidx; i < eidx; i++) { if (test_and_set_bit(i, bdata->node_bootmem_map)) { #ifdef CONFIG_DEBUG_BOOTMEM printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE); #endif - if (flags & BOOTMEM_EXCLUSIVE) { - ret = -EBUSY; - goto err; - } } - - return 0; - -err: - /* unreserve memory we accidentally reserved */ - for (i--; i >= sidx; i--) - clear_bit(i, bdata->node_bootmem_map); - - return ret; + } } static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, @@ -415,6 +445,11 @@ unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn, void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, unsigned long size, int flags) { + int ret; + + ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); + if (ret < 0) + return; reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); } @@ -440,7 +475,18 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages) int __init reserve_bootmem(unsigned long addr, unsigned long size, int flags) { - return reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size, flags); + bootmem_data_t *bdata; + int ret; + + list_for_each_entry(bdata, &bdata_list, list) { + ret = can_reserve_bootmem_core(bdata, addr, size, flags); + if (ret < 0) + return ret; + } + list_for_each_entry(bdata, &bdata_list, list) + reserve_bootmem_core(bdata, addr, size, flags); + + return 0; } #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ -- cgit v1.2.3 From 8b3cd09ed23049fcb02479c6286744b36324ac9d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 18 Mar 2008 12:50:21 -0700 Subject: x86_64: make reserve_bootmem_generic() use new reserve_bootmem() "mm: make reserve_bootmem can crossed the nodes" provides new reserve_bootmem(), let reserve_bootmem_generic() use that. reserve_bootmem_generic() is used to reserve initramdisk, so this way we can make sure even when bootloader or kexec load ranges cross the node memory boundaries, reserve_bootmem still works. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0cca6266303..7dc4fbc2d6b 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -810,7 +810,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) void __init reserve_bootmem_generic(unsigned long phys, unsigned len) { #ifdef CONFIG_NUMA - int nid = phys_to_nid(phys); + int nid, next_nid; #endif unsigned long pfn = phys >> PAGE_SHIFT; @@ -829,10 +829,16 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len) /* Should check here against the e820 map to avoid double free */ #ifdef CONFIG_NUMA - reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT); + nid = phys_to_nid(phys); + next_nid = phys_to_nid(phys + len - 1); + if (nid == next_nid) + reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT); + else + reserve_bootmem(phys, len, BOOTMEM_DEFAULT); #else reserve_bootmem(phys, len, BOOTMEM_DEFAULT); #endif + if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { dma_reserve += len / PAGE_SIZE; set_dma_reserve(dma_reserve); -- cgit v1.2.3 From 1a27fc0a42162964d758e9d36d2d1b49c082a67c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 18 Mar 2008 12:52:37 -0700 Subject: x86_64: fix setup_node_bootmem to support big mem excluding with memmap typical case: four sockets system, every node has 4g ram, and we are using: memmap=10g$4g to mask out memory on node1 and node2 when numa is enabled, early_node_mem is used to get node_data and node_bootmap. if it can not get memory from the same node with find_e820_area(), it will use alloc_bootmem to get buff from previous nodes. so check it and print out some info about it. need to move early_res_to_bootmem into every setup_node_bootmem. and it takes range that node has. otherwise alloc_bootmem could return addr that reserved early. depends on "mm: make reserve_bootmem can crossed the nodes". Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820_64.c | 13 +++++++++---- arch/x86/kernel/setup_64.c | 3 +-- arch/x86/mm/numa_64.c | 42 ++++++++++++++++++++++++++++++++++++------ include/asm-x86/e820_64.h | 2 +- 4 files changed, 47 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 79f0d52fa99..645ee5e32a2 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -106,14 +106,19 @@ void __init free_early(unsigned long start, unsigned long end) early_res[j - 1].end = 0; } -void __init early_res_to_bootmem(void) +void __init early_res_to_bootmem(unsigned long start, unsigned long end) { int i; + unsigned long final_start, final_end; for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { struct early_res *r = &early_res[i]; - printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i, - r->start, r->end - 1, r->name); - reserve_bootmem_generic(r->start, r->end - r->start); + final_start = max(start, r->start); + final_end = min(end, r->end); + if (final_start >= final_end) + continue; + printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i, + final_start, final_end - 1, r->name); + reserve_bootmem_generic(final_start, final_end - final_start); } } diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index b04e2c011e1..60e64c8eee9 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -190,6 +190,7 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); e820_register_active_regions(0, start_pfn, end_pfn); free_bootmem_with_active_regions(0, end_pfn); + early_res_to_bootmem(0, end_pfn<node_start_pfn = start_pfn; NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; - /* Find a place for the bootmem map */ + /* + * Find a place for the bootmem map + * nodedata_phys could be on other nodes by alloc_bootmem, + * so need to sure bootmap_start not to be small, otherwise + * early_node_mem will get that with find_e820_area instead + * of alloc_bootmem, that could clash with reserved range + */ bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); - bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); + nid = phys_to_nid(nodedata_phys); + if (nid == nodeid) + bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); + else + bootmap_start = round_up(start, PAGE_SIZE); /* * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like * to use that to align to PAGE_SIZE @@ -245,10 +256,29 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, free_bootmem_with_active_regions(nodeid, end); - reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size, - BOOTMEM_DEFAULT); - reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, - bootmap_pages< Date: Sat, 12 Apr 2008 01:19:24 -0700 Subject: x86_64/mm: check and print vmemmap allocation continuous On big systems with lots of memory, don't print out too much during bootup, and make it easy to find if it is continuous. on 256G 8 sockets system will get [ffffe20000000000-ffffe20002bfffff] PMD -> [ffff810001400000-ffff810003ffffff] on node 0 [ffffe2001c700000-ffffe2001c7fffff] potential offnode page_structs [ffffe20002c00000-ffffe2001c7fffff] PMD -> [ffff81000c000000-ffff8100255fffff] on node 0 [ffffe20038700000-ffffe200387fffff] potential offnode page_structs [ffffe2001c800000-ffffe200387fffff] PMD -> [ffff810820200000-ffff81083c1fffff] on node 1 [ffffe20040000000-ffffe2007fffffff] PUD ->ffff811027a00000 on node 2 [ffffe20038800000-ffffe2003fffffff] PMD -> [ffff811020200000-ffff8110279fffff] on node 2 [ffffe20054700000-ffffe200547fffff] potential offnode page_structs [ffffe20040000000-ffffe200547fffff] PMD -> [ffff811027c00000-ffff81103c3fffff] on node 2 [ffffe20070700000-ffffe200707fffff] potential offnode page_structs [ffffe20054800000-ffffe200707fffff] PMD -> [ffff811820200000-ffff81183c1fffff] on node 3 [ffffe20080000000-ffffe200bfffffff] PUD ->ffff81202fa00000 on node 4 [ffffe20070800000-ffffe2007fffffff] PMD -> [ffff812020200000-ffff81202f9fffff] on node 4 [ffffe2008c700000-ffffe2008c7fffff] potential offnode page_structs [ffffe20080000000-ffffe2008c7fffff] PMD -> [ffff81202fc00000-ffff81203c3fffff] on node 4 [ffffe200a8700000-ffffe200a87fffff] potential offnode page_structs [ffffe2008c800000-ffffe200a87fffff] PMD -> [ffff812820200000-ffff81283c1fffff] on node 5 [ffffe200c0000000-ffffe200ffffffff] PUD ->ffff813037a00000 on node 6 [ffffe200a8800000-ffffe200bfffffff] PMD -> [ffff813020200000-ffff8130379fffff] on node 6 [ffffe200c4700000-ffffe200c47fffff] potential offnode page_structs [ffffe200c0000000-ffffe200c47fffff] PMD -> [ffff813037c00000-ffff81303c3fffff] on node 6 [ffffe200c4800000-ffffe200e07fffff] PMD -> [ffff813820200000-ffff81383c1fffff] on node 7 instead of a very long print out... Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/init_64.c | 28 ++++++++++++++++++++++++++-- include/linux/mm.h | 1 + mm/sparse.c | 5 +++++ 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 7dc4fbc2d6b..5fbb8652cf5 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -932,6 +932,10 @@ const char *arch_vma_name(struct vm_area_struct *vma) /* * Initialise the sparsemem vmemmap using huge-pages at the PMD level. */ +static long __meminitdata addr_start, addr_end; +static void __meminitdata *p_start, *p_end; +static int __meminitdata node_start; + int __meminit vmemmap_populate(struct page *start_page, unsigned long size, int node) { @@ -966,12 +970,32 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) PAGE_KERNEL_LARGE); set_pmd(pmd, __pmd(pte_val(entry))); - printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n", - addr, addr + PMD_SIZE - 1, p, node); + /* check to see if we have contiguous blocks */ + if (p_end != p || node_start != node) { + if (p_start) + printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n", + addr_start, addr_end-1, p_start, p_end-1, node_start); + addr_start = addr; + node_start = node; + p_start = p; + } + addr_end = addr + PMD_SIZE; + p_end = p + PMD_SIZE; } else { vmemmap_verify((pte_t *)pmd, node, addr, next); } } return 0; } + +void __meminit vmemmap_populate_print_last(void) +{ + if (p_start) { + printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n", + addr_start, addr_end-1, p_start, p_end-1, node_start); + p_start = NULL; + p_end = NULL; + node_start = 0; + } +} #endif diff --git a/include/linux/mm.h b/include/linux/mm.h index b695875d63e..286d3152160 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1229,6 +1229,7 @@ void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); int vmemmap_populate_basepages(struct page *start_page, unsigned long pages, int node); int vmemmap_populate(struct page *start_page, unsigned long pages, int node); +void vmemmap_populate_print_last(void); #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/mm/sparse.c b/mm/sparse.c index 458109b99e6..7e9191381f8 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -295,6 +295,9 @@ struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) return NULL; } +void __attribute__((weak)) __meminit vmemmap_populate_print_last(void) +{ +} /* * Allocate the accumulated non-linear sections, allocate a mem_map * for each and record the physical to section mapping. @@ -345,6 +348,8 @@ void __init sparse_init(void) usemap); } + vmemmap_populate_print_last(); + free_bootmem(__pa(usemap_map), size); } -- cgit v1.2.3