From 8f0f996e80b980fba07d11961d96a5fefb60976a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:56 +0200 Subject: ftrace: dont write protect kernel text Dynamic ftrace cant work when the kernel has its text write protected. This patch keeps the kernel from being write protected when dynamic ftrace is in place. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/init_32.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/mm/init_32.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ec30d10154b..f96eca21ad8 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -710,6 +710,8 @@ void mark_rodata_ro(void) unsigned long start = PFN_ALIGN(_text); unsigned long size = PFN_ALIGN(_etext) - start; +#ifndef CONFIG_DYNAMIC_FTRACE + /* Dynamic tracing modifies the kernel text section */ set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); printk(KERN_INFO "Write protecting the kernel text: %luk\n", size >> 10); @@ -722,6 +724,8 @@ void mark_rodata_ro(void) printk(KERN_INFO "Testing CPA: write protecting again\n"); set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT); #endif +#endif /* CONFIG_DYNAMIC_FTRACE */ + start += size; size = (unsigned long)__end_rodata - start; set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); -- cgit v1.2.3 From 4e50e62ce52f39b5f45e82f68b18254458b429bb Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 12 May 2008 15:43:38 +0200 Subject: x86: eliminate duplicate consistency checks in init_32.c Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/init_32.c | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'arch/x86/mm/init_32.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ec30d10154b..4834c0fcb6b 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -571,17 +571,6 @@ void __init mem_init(void) #endif bad_ppro = ppro_with_ram_bug(); -#ifdef CONFIG_HIGHMEM - /* check that fixmap and pkmap do not overlap */ - if (PKMAP_BASE + LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { - printk(KERN_ERR - "fixmap and kmap areas overlap - this will crash\n"); - printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n", - PKMAP_BASE, PKMAP_BASE + LAST_PKMAP*PAGE_SIZE, - FIXADDR_START); - BUG(); - } -#endif /* this will put all low memory onto the freelists */ totalram_pages += free_all_bootmem(); @@ -614,7 +603,6 @@ void __init mem_init(void) (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) ); -#if 1 /* double-sanity-check paranoia */ printk(KERN_INFO "virtual kernel memory layout:\n" " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" #ifdef CONFIG_HIGHMEM @@ -655,7 +643,6 @@ void __init mem_init(void) #endif BUG_ON(VMALLOC_START > VMALLOC_END); BUG_ON((unsigned long)high_memory > VMALLOC_START); -#endif /* double-sanity-check paranoia */ if (boot_cpu_data.wp_works_ok < 0) test_wp_bit(); -- cgit v1.2.3 From d0ec2c6f2c2f0478b34ae78b3e65f60a561ac807 Mon Sep 17 00:00:00 2001 From: "Huang, Ying" Date: Mon, 2 Jun 2008 14:26:18 +0800 Subject: x86: reserve highmem pages via reserve_early This patch makes early reserved highmem pages become reserved pages. This can be used for highmem pages allocated by bootloader such as EFI memory map, linked list of setup_data, etc. Signed-off-by: Huang Ying Cc: andi@firstfloor.org Cc: mingo@redhat.com Signed-off-by: Thomas Gleixner --- arch/x86/mm/init_32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/mm/init_32.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ec30d10154b..0e7bb5e8167 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -289,7 +289,8 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) { - if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { + if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn)) && + !page_is_reserved_early(pfn)) { ClearPageReserved(page); init_page_count(page); __free_page(page); -- cgit v1.2.3 From ce0c0e50f94e8c55b00a722e8c6e8d6c802be211 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 2 May 2008 11:46:49 +0200 Subject: x86, generic: CPA add statistics about state of direct mapping v4 Add information about the mapping state of the direct mapping to /proc/meminfo. I chose /proc/meminfo because that is where all the other memory statistics are too and it is a generally useful metric even outside debugging situations. A lot of split kernel pages means the kernel will run slower. This way we can see how many large pages are really used for it and how many are split. Useful for general insight into the kernel. v2: Add hotplug locking to 64bit to plug a very obscure theoretical race. 32bit doesn't need it because it doesn't support hotadd for lowmem. Fix some typos v3: Rename dpages_cnt Add CONFIG ifdef for count update as requested by tglx Expand description v4: Fix stupid bugs added in v3 Move update_page_count to pageattr.c Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/mm/init_32.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ec30d10154b..0269ac230bf 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -162,6 +162,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) pgd_t *pgd; pmd_t *pmd; pte_t *pte; + unsigned pages_2m = 0, pages_4k = 0; pgd_idx = pgd_index(PAGE_OFFSET); pgd = pgd_base + pgd_idx; @@ -197,6 +198,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) is_kernel_text(addr2)) prot = PAGE_KERNEL_LARGE_EXEC; + pages_2m++; set_pmd(pmd, pfn_pmd(pfn, prot)); pfn += PTRS_PER_PTE; @@ -213,11 +215,14 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) if (is_kernel_text(addr)) prot = PAGE_KERNEL_EXEC; + pages_4k++; set_pte(pte, pfn_pte(pfn, prot)); } max_pfn_mapped = pfn; } } + update_page_count(PG_LEVEL_2M, pages_2m); + update_page_count(PG_LEVEL_4K, pages_4k); } static inline int page_kills_ppro(unsigned long pagenr) -- cgit v1.2.3 From d2dbf343329dc777d77488743465f7be4245971d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 13 Jun 2008 02:00:56 -0700 Subject: x86: clean up reserve_bootmem_generic() and port it to 32-bit 1. add reserve_bootmem_generic for 32bit 2. change len to unsigned long 3. make early_res_to_bootmem to use it Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/mm/init_32.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 0e7bb5e8167..abadb1da70d 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -785,3 +785,9 @@ void free_initrd_mem(unsigned long start, unsigned long end) free_init_pages("initrd memory", start, end); } #endif + +int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, + int flags) +{ + return reserve_bootmem(phys, len, flags); +} -- cgit v1.2.3 From b5bc6c0e55000dab86b73f838f5ad02908b23755 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 14 Jun 2008 18:32:52 -0700 Subject: x86, mm: use add_highpages_with_active_regions() for high pages init v2 use early_node_map to init high pages, so we can remove page_is_ram() and page_is_reserved_early() in the big loop with add_one_highpage also remove page_is_reserved_early(), it is not needed anymore. v2: fix the build of other platforms Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 62 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 11 deletions(-) (limited to 'arch/x86/mm/init_32.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index abadb1da70d..ba07a489230 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -287,10 +287,10 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) pkmap_page_table = pte; } -void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) +static void __init +add_one_highpage_init(struct page *page, int pfn, int bad_ppro) { - if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn)) && - !page_is_reserved_early(pfn)) { + if (!(bad_ppro && page_kills_ppro(pfn))) { ClearPageReserved(page); init_page_count(page); __free_page(page); @@ -299,18 +299,58 @@ void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) SetPageReserved(page); } +struct add_highpages_data { + unsigned long start_pfn; + unsigned long end_pfn; + int bad_ppro; +}; + +static void __init add_highpages_work_fn(unsigned long start_pfn, + unsigned long end_pfn, void *datax) +{ + int node_pfn; + struct page *page; + unsigned long final_start_pfn, final_end_pfn; + struct add_highpages_data *data; + int bad_ppro; + + data = (struct add_highpages_data *)datax; + bad_ppro = data->bad_ppro; + + final_start_pfn = max(start_pfn, data->start_pfn); + final_end_pfn = min(end_pfn, data->end_pfn); + if (final_start_pfn >= final_end_pfn) + return; + + for (node_pfn = final_start_pfn; node_pfn < final_end_pfn; + node_pfn++) { + if (!pfn_valid(node_pfn)) + continue; + page = pfn_to_page(node_pfn); + add_one_highpage_init(page, node_pfn, bad_ppro); + } + +} + +void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn, + unsigned long end_pfn, + int bad_ppro) +{ + struct add_highpages_data data; + + data.start_pfn = start_pfn; + data.end_pfn = end_pfn; + data.bad_ppro = bad_ppro; + + work_with_active_regions(nid, add_highpages_work_fn, &data); +} + #ifndef CONFIG_NUMA static void __init set_highmem_pages_init(int bad_ppro) { - int pfn; + add_highpages_with_active_regions(0, highstart_pfn, highend_pfn, + bad_ppro); - for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) { - /* - * Holes under sparsemem might not have no mem_map[]: - */ - if (pfn_valid(pfn)) - add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); - } totalram_pages += totalhigh_pages; } #endif /* !CONFIG_NUMA */ -- cgit v1.2.3 From cc9f7a0ccf000d4db5fbdc7b0ae48eefea102f69 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 16 Jun 2008 16:11:08 -0700 Subject: x86: kill bad_ppro so don't punish all other cpus without that problem when init highmem Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 43 ++++++++++++------------------------------- 1 file changed, 12 insertions(+), 31 deletions(-) (limited to 'arch/x86/mm/init_32.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ba07a489230..fb5694d788b 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -220,13 +220,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) } } -static inline int page_kills_ppro(unsigned long pagenr) -{ - if (pagenr >= 0x70000 && pagenr <= 0x7003F) - return 1; - return 0; -} - /* * devmem_is_allowed() checks to see if /dev/mem access to a certain address * is valid. The argument is a physical page number. @@ -287,22 +280,17 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) pkmap_page_table = pte; } -static void __init -add_one_highpage_init(struct page *page, int pfn, int bad_ppro) +static void __init add_one_highpage_init(struct page *page, int pfn) { - if (!(bad_ppro && page_kills_ppro(pfn))) { - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - totalhigh_pages++; - } else - SetPageReserved(page); + ClearPageReserved(page); + init_page_count(page); + __free_page(page); + totalhigh_pages++; } struct add_highpages_data { unsigned long start_pfn; unsigned long end_pfn; - int bad_ppro; }; static void __init add_highpages_work_fn(unsigned long start_pfn, @@ -312,10 +300,8 @@ static void __init add_highpages_work_fn(unsigned long start_pfn, struct page *page; unsigned long final_start_pfn, final_end_pfn; struct add_highpages_data *data; - int bad_ppro; data = (struct add_highpages_data *)datax; - bad_ppro = data->bad_ppro; final_start_pfn = max(start_pfn, data->start_pfn); final_end_pfn = min(end_pfn, data->end_pfn); @@ -327,29 +313,26 @@ static void __init add_highpages_work_fn(unsigned long start_pfn, if (!pfn_valid(node_pfn)) continue; page = pfn_to_page(node_pfn); - add_one_highpage_init(page, node_pfn, bad_ppro); + add_one_highpage_init(page, node_pfn); } } void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn, - unsigned long end_pfn, - int bad_ppro) + unsigned long end_pfn) { struct add_highpages_data data; data.start_pfn = start_pfn; data.end_pfn = end_pfn; - data.bad_ppro = bad_ppro; work_with_active_regions(nid, add_highpages_work_fn, &data); } #ifndef CONFIG_NUMA -static void __init set_highmem_pages_init(int bad_ppro) +static void __init set_highmem_pages_init(void) { - add_highpages_with_active_regions(0, highstart_pfn, highend_pfn, - bad_ppro); + add_highpages_with_active_regions(0, highstart_pfn, highend_pfn); totalram_pages += totalhigh_pages; } @@ -358,7 +341,7 @@ static void __init set_highmem_pages_init(int bad_ppro) #else # define kmap_init() do { } while (0) # define permanent_kmaps_init(pgd_base) do { } while (0) -# define set_highmem_pages_init(bad_ppro) do { } while (0) +# define set_highmem_pages_init() do { } while (0) #endif /* CONFIG_HIGHMEM */ pteval_t __PAGE_KERNEL = _PAGE_KERNEL; @@ -605,13 +588,11 @@ static struct kcore_list kcore_mem, kcore_vmalloc; void __init mem_init(void) { int codesize, reservedpages, datasize, initsize; - int tmp, bad_ppro; + int tmp; #ifdef CONFIG_FLATMEM BUG_ON(!mem_map); #endif - bad_ppro = ppro_with_ram_bug(); - #ifdef CONFIG_HIGHMEM /* check that fixmap and pkmap do not overlap */ if (PKMAP_BASE + LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { @@ -634,7 +615,7 @@ void __init mem_init(void) if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) reservedpages++; - set_highmem_pages_init(bad_ppro); + set_highmem_pages_init(); codesize = (unsigned long) &_etext - (unsigned long) &_text; datasize = (unsigned long) &_edata - (unsigned long) &_etext; -- cgit v1.2.3 From d52d53b8a5b258bfaab9223a5e7284fcfdd48577 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 16 Jun 2008 20:10:55 -0700 Subject: RFC x86: try to remove arch_get_ram_range want to remove arch_get_ram_range, and use early_node_map instead. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/mm/init_32.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 65d55056b6e..a0484adbf59 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -298,7 +298,7 @@ struct add_highpages_data { unsigned long end_pfn; }; -static void __init add_highpages_work_fn(unsigned long start_pfn, +static int __init add_highpages_work_fn(unsigned long start_pfn, unsigned long end_pfn, void *datax) { int node_pfn; @@ -311,7 +311,7 @@ static void __init add_highpages_work_fn(unsigned long start_pfn, final_start_pfn = max(start_pfn, data->start_pfn); final_end_pfn = min(end_pfn, data->end_pfn); if (final_start_pfn >= final_end_pfn) - return; + return 0; for (node_pfn = final_start_pfn; node_pfn < final_end_pfn; node_pfn++) { @@ -321,6 +321,8 @@ static void __init add_highpages_work_fn(unsigned long start_pfn, add_one_highpage_init(page, node_pfn); } + return 0; + } void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn, -- cgit v1.2.3 From b2ac82a0909aea0d2620ba4c189f37c567c21fe5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 22 Jun 2008 02:45:39 -0700 Subject: x86: introduce initmem_init for 32 bit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) (limited to 'arch/x86/mm/init_32.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index a0484adbf59..9bc8607d798 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -540,6 +540,96 @@ static void __init set_nx(void) } #endif +#ifndef CONFIG_NEED_MULTIPLE_NODES +extern unsigned long find_max_low_pfn(void); +unsigned long __init initmem_init(unsigned long start_pfn, + unsigned long end_pfn) +{ + /* + * partially used pages are not usable - thus + * we are rounding upwards: + */ + min_low_pfn = PFN_UP(init_pg_tables_end); + + max_low_pfn = find_max_low_pfn(); + +#ifdef CONFIG_HIGHMEM + highstart_pfn = highend_pfn = max_pfn; + if (max_pfn > max_low_pfn) + highstart_pfn = max_low_pfn; + memory_present(0, 0, highend_pfn); + printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", + pages_to_mb(highend_pfn - highstart_pfn)); + num_physpages = highend_pfn; + high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; +#else + memory_present(0, 0, max_low_pfn); + num_physpages = max_low_pfn; + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; +#endif +#ifdef CONFIG_FLATMEM + max_mapnr = num_physpages; +#endif + printk(KERN_NOTICE "%ldMB LOWMEM available.\n", + pages_to_mb(max_low_pfn)); + + setup_bootmem_allocator(); + + return max_low_pfn; +} + +void __init zone_sizes_init(void) +{ + unsigned long max_zone_pfns[MAX_NR_ZONES]; + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); + max_zone_pfns[ZONE_DMA] = + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; + remove_all_active_ranges(); +#ifdef CONFIG_HIGHMEM + max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; + e820_register_active_regions(0, 0, highend_pfn); +#else + e820_register_active_regions(0, 0, max_low_pfn); +#endif + + free_area_init_nodes(max_zone_pfns); +} +#endif /* !CONFIG_NEED_MULTIPLE_NODES */ + +extern void reserve_initrd(void); + +void __init setup_bootmem_allocator(void) +{ + int i; + unsigned long bootmap_size, bootmap; + /* + * Initialize the boot-time allocator (with low memory only): + */ + bootmap_size = bootmem_bootmap_pages(max_low_pfn)<> PAGE_SHIFT, max_low_pfn); + printk(KERN_INFO " mapped low ram: 0 - %08lx\n", + max_pfn_mapped< Date: Sun, 22 Jun 2008 02:46:58 -0700 Subject: x86: introduce reserve_initrd Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86/mm/init_32.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 9bc8607d798..98080782ee4 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -597,8 +597,6 @@ void __init zone_sizes_init(void) } #endif /* !CONFIG_NEED_MULTIPLE_NODES */ -extern void reserve_initrd(void); - void __init setup_bootmem_allocator(void) { int i; @@ -613,9 +611,9 @@ void __init setup_bootmem_allocator(void) if (bootmap == -1L) panic("Cannot find bootmem map of size %ld\n", bootmap_size); reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); -#ifdef CONFIG_BLK_DEV_INITRD + reserve_initrd(); -#endif + bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, max_low_pfn); printk(KERN_INFO " mapped low ram: 0 - %08lx\n", max_pfn_mapped< Date: Mon, 23 Jun 2008 21:00:45 +0200 Subject: x86: move find_max_low_pfn to init_32.c Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 88 insertions(+), 1 deletion(-) (limited to 'arch/x86/mm/init_32.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 98080782ee4..d1017336f1b 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -540,8 +540,95 @@ static void __init set_nx(void) } #endif +/* user-defined highmem size */ +static unsigned int highmem_pages = -1; + +/* + * highmem=size forces highmem to be exactly 'size' bytes. + * This works even on boxes that have no highmem otherwise. + * This also works to reduce highmem size on bigger boxes. + */ +static int __init parse_highmem(char *arg) +{ + if (!arg) + return -EINVAL; + + highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT; + return 0; +} +early_param("highmem", parse_highmem); + +/* + * Determine low and high memory ranges: + */ +unsigned long __init find_max_low_pfn(void) +{ + unsigned long max_low_pfn; + + max_low_pfn = max_pfn; + if (max_low_pfn > MAXMEM_PFN) { + if (highmem_pages == -1) + highmem_pages = max_pfn - MAXMEM_PFN; + if (highmem_pages + MAXMEM_PFN < max_pfn) + max_pfn = MAXMEM_PFN + highmem_pages; + if (highmem_pages + MAXMEM_PFN > max_pfn) { + printk(KERN_WARNING "only %luMB highmem pages " + "available, ignoring highmem size of %uMB.\n", + pages_to_mb(max_pfn - MAXMEM_PFN), + pages_to_mb(highmem_pages)); + highmem_pages = 0; + } + max_low_pfn = MAXMEM_PFN; +#ifndef CONFIG_HIGHMEM + /* Maximum memory usable is what is directly addressable */ + printk(KERN_WARNING "Warning only %ldMB will be used.\n", + MAXMEM>>20); + if (max_pfn > MAX_NONPAE_PFN) + printk(KERN_WARNING + "Use a HIGHMEM64G enabled kernel.\n"); + else + printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); + max_pfn = MAXMEM_PFN; +#else /* !CONFIG_HIGHMEM */ +#ifndef CONFIG_HIGHMEM64G + if (max_pfn > MAX_NONPAE_PFN) { + max_pfn = MAX_NONPAE_PFN; + printk(KERN_WARNING "Warning only 4GB will be used." + "Use a HIGHMEM64G enabled kernel.\n"); + } +#endif /* !CONFIG_HIGHMEM64G */ +#endif /* !CONFIG_HIGHMEM */ + } else { + if (highmem_pages == -1) + highmem_pages = 0; +#ifdef CONFIG_HIGHMEM + if (highmem_pages >= max_pfn) { + printk(KERN_ERR "highmem size specified (%uMB) is " + "bigger than pages available (%luMB)!.\n", + pages_to_mb(highmem_pages), + pages_to_mb(max_pfn)); + highmem_pages = 0; + } + if (highmem_pages) { + if (max_low_pfn - highmem_pages < + 64*1024*1024/PAGE_SIZE){ + printk(KERN_ERR "highmem size %uMB results in " + "smaller than 64MB lowmem, ignoring it.\n" + , pages_to_mb(highmem_pages)); + highmem_pages = 0; + } + max_low_pfn -= highmem_pages; + } +#else + if (highmem_pages) + printk(KERN_ERR "ignoring highmem size on non-highmem" + " kernel!\n"); +#endif + } + return max_low_pfn; +} + #ifndef CONFIG_NEED_MULTIPLE_NODES -extern unsigned long find_max_low_pfn(void); unsigned long __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) { -- cgit v1.2.3 From 2ec65f8b89ea003c27ff7723525a2ee335a2b393 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 23 Jun 2008 03:05:30 -0700 Subject: x86: clean up using max_low_pfn on 32-bit so that max_low_pfn is not changed after it is set. so we can move that early and out of initmem_init. could call find_low_pfn_range just after max_pfn is set. also could move reserve_initrd out of setup_bootmem_allocator so 32bit is more like 64bit. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) (limited to 'arch/x86/mm/init_32.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index d1017336f1b..27b82931294 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -561,9 +561,15 @@ early_param("highmem", parse_highmem); /* * Determine low and high memory ranges: */ -unsigned long __init find_max_low_pfn(void) +void __init find_low_pfn_range(void) { - unsigned long max_low_pfn; + /* it could update max_pfn */ + + /* + * partially used pages are not usable - thus + * we are rounding upwards: + */ + min_low_pfn = PFN_UP(init_pg_tables_end); max_low_pfn = max_pfn; if (max_low_pfn > MAXMEM_PFN) { @@ -625,21 +631,12 @@ unsigned long __init find_max_low_pfn(void) " kernel!\n"); #endif } - return max_low_pfn; } #ifndef CONFIG_NEED_MULTIPLE_NODES -unsigned long __init initmem_init(unsigned long start_pfn, +void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) { - /* - * partially used pages are not usable - thus - * we are rounding upwards: - */ - min_low_pfn = PFN_UP(init_pg_tables_end); - - max_low_pfn = find_max_low_pfn(); - #ifdef CONFIG_HIGHMEM highstart_pfn = highend_pfn = max_pfn; if (max_pfn > max_low_pfn) @@ -661,8 +658,6 @@ unsigned long __init initmem_init(unsigned long start_pfn, pages_to_mb(max_low_pfn)); setup_bootmem_allocator(); - - return max_low_pfn; } void __init zone_sizes_init(void) @@ -699,8 +694,6 @@ void __init setup_bootmem_allocator(void) panic("Cannot find bootmem map of size %ld\n", bootmap_size); reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); - reserve_initrd(); - bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, max_low_pfn); printk(KERN_INFO " mapped low ram: 0 - %08lx\n", max_pfn_mapped< Date: Mon, 23 Jun 2008 03:06:14 -0700 Subject: x86: clean up min_low_pfn for 32bit we already had early_res support, so don't need to track min_low_pfn. keep it to 0 always. also use init_bootmem_node instead of init_bootmem, so don't touch min_low_pfn. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/x86/mm/init_32.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 27b82931294..9bb35cf8bc4 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -565,11 +565,7 @@ void __init find_low_pfn_range(void) { /* it could update max_pfn */ - /* - * partially used pages are not usable - thus - * we are rounding upwards: - */ - min_low_pfn = PFN_UP(init_pg_tables_end); + /* max_low_pfn is 0, we already have early_res support */ max_low_pfn = max_pfn; if (max_low_pfn > MAXMEM_PFN) { @@ -694,7 +690,9 @@ void __init setup_bootmem_allocator(void) panic("Cannot find bootmem map of size %ld\n", bootmap_size); reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); - bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, max_low_pfn); + /* don't touch min_low_pfn */ + bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, + min_low_pfn, max_low_pfn); printk(KERN_INFO " mapped low ram: 0 - %08lx\n", max_pfn_mapped< Date: Mon, 23 Jun 2008 19:51:10 -0700 Subject: x86: move some func calling from setup_arch to paging_init those function depend on paging setup pgtable, so they could access the ram in bootmem region but just get mapped. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'arch/x86/mm/init_32.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 9bb35cf8bc4..20ca29591ab 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -705,6 +705,23 @@ void __init setup_bootmem_allocator(void) } +/* + * The node 0 pgdat is initialized before all of these because + * it's needed for bootmem. node>0 pgdats have their virtual + * space allocated before the pagetables are in place to access + * them, so they can't be cleared then. + * + * This should all compile down to nothing when NUMA is off. + */ +static void __init remapped_pgdat_init(void) +{ + int nid; + + for_each_online_node(nid) { + if (nid != 0) + memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); + } +} /* * paging_init() sets up the page tables - note that the first 8MB are @@ -727,6 +744,18 @@ void __init paging_init(void) __flush_tlb_all(); kmap_init(); + + /* + * NOTE: at this point the bootmem allocator is fully available. + */ + + post_reserve_initrd(); + + remapped_pgdat_init(); + sparse_init(); + zone_sizes_init(); + + paravirt_post_allocator_init(); } /* -- cgit v1.2.3 From 4e29684c40f2a332ba4d05f6482d5807725d5624 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 24 Jun 2008 12:18:14 -0700 Subject: x86: introduce init_memory_mapping for 32bit #1 ... so can we use mem below max_low_pfn earlier. this allows us to move several functions more early instead of waiting to after paging_init. That includes moving relocate_initrd() earlier in the bootup, and kva related early setup done in initmem_init. (in followup patches) Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 141 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 112 insertions(+), 29 deletions(-) (limited to 'arch/x86/mm/init_32.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 20ca29591ab..619058e6bff 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -57,6 +57,27 @@ unsigned long highstart_pfn, highend_pfn; static noinline int do_test_wp_bit(void); + +static unsigned long __initdata table_start; +static unsigned long __meminitdata table_end; +static unsigned long __meminitdata table_top; + +static int __initdata after_init_bootmem; + +static __init void *alloc_low_page(unsigned long *phys) +{ + unsigned long pfn = table_end++; + void *adr; + + if (pfn >= table_top) + panic("alloc_low_page: ran out of memory"); + + adr = __va(pfn * PAGE_SIZE); + memset(adr, 0, PAGE_SIZE); + *phys = pfn * PAGE_SIZE; + return adr; +} + /* * Creates a middle page table and puts a pointer to it in the * given global directory entry. This only returns the gd entry @@ -68,9 +89,12 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) pmd_t *pmd_table; #ifdef CONFIG_X86_PAE + unsigned long phys; if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { - pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); - + if (after_init_bootmem) + pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); + else + pmd_table = (pmd_t *)alloc_low_page(&phys); paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); pud = pud_offset(pgd, 0); @@ -92,12 +116,16 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { pte_t *page_table = NULL; + if (after_init_bootmem) { #ifdef CONFIG_DEBUG_PAGEALLOC - page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); + page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); #endif - if (!page_table) { - page_table = + if (!page_table) + page_table = (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); + } else { + unsigned long phys; + page_table = (pte_t *)alloc_low_page(&phys); } paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); @@ -155,7 +183,9 @@ static inline int is_kernel_text(unsigned long addr) * of max_low_pfn pages, by creating page tables starting from address * PAGE_OFFSET: */ -static void __init kernel_physical_mapping_init(pgd_t *pgd_base) +static void __init kernel_physical_mapping_init(pgd_t *pgd_base, + unsigned long start, + unsigned long end) { int pgd_idx, pmd_idx, pte_ofs; unsigned long pfn; @@ -163,18 +193,19 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) pmd_t *pmd; pte_t *pte; unsigned pages_2m = 0, pages_4k = 0; + unsigned limit_pfn = end >> PAGE_SHIFT; pgd_idx = pgd_index(PAGE_OFFSET); pgd = pgd_base + pgd_idx; - pfn = 0; + pfn = start >> PAGE_SHIFT; for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { pmd = one_md_table_init(pgd); - if (pfn >= max_low_pfn) + if (pfn >= limit_pfn) continue; for (pmd_idx = 0; - pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; + pmd_idx < PTRS_PER_PMD && pfn < limit_pfn; pmd++, pmd_idx++) { unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET; @@ -418,20 +449,7 @@ static void __init pagetable_init(void) paravirt_pagetable_setup_start(pgd_base); - /* Enable PSE if available */ - if (cpu_has_pse) - set_in_cr4(X86_CR4_PSE); - - /* Enable PGE if available */ - if (cpu_has_pge) { - set_in_cr4(X86_CR4_PGE); - __PAGE_KERNEL |= _PAGE_GLOBAL; - __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL; - } - - kernel_physical_mapping_init(pgd_base); remap_numa_kva(); - /* * Fixed mappings, only the page table structure has to be * created - mappings will be set by set_fixmap(): @@ -703,6 +721,7 @@ void __init setup_bootmem_allocator(void) free_bootmem_with_active_regions(i, max_low_pfn); early_res_to_bootmem(0, max_low_pfn<> PUD_SHIFT; + tables = PAGE_ALIGN(puds * sizeof(pud_t)); + + pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; + tables += PAGE_ALIGN(pmds * sizeof(pmd_t)); + + /* + * RED-PEN putting page tables only on node 0 could + * cause a hotspot and fill up ZONE_DMA. The page tables + * need roughly 0.5KB per GB. + */ + start = 0x7000; + table_start = find_e820_area(start, max_pfn_mapped<>= PAGE_SHIFT; + table_end = table_start; + table_top = table_start + (tables>>PAGE_SHIFT); + + printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", + end, table_start << PAGE_SHIFT, + (table_start << PAGE_SHIFT) + tables); +} + +unsigned long __init_refok init_memory_mapping(unsigned long start, + unsigned long end) +{ + pgd_t *pgd_base = swapper_pg_dir; + + /* + * Find space for the kernel direct mapping tables. + */ + if (!after_init_bootmem) + find_early_table_space(end); + +#ifdef CONFIG_X86_PAE + set_nx(); + if (nx_enabled) + printk(KERN_INFO "NX (Execute Disable) protection: active\n"); +#endif + + /* Enable PSE if available */ + if (cpu_has_pse) + set_in_cr4(X86_CR4_PSE); + + /* Enable PGE if available */ + if (cpu_has_pge) { + set_in_cr4(X86_CR4_PGE); + __PAGE_KERNEL |= _PAGE_GLOBAL; + __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL; + } + + kernel_physical_mapping_init(pgd_base, start, end); + + load_cr3(swapper_pg_dir); + + __flush_tlb_all(); + + if (!after_init_bootmem) + reserve_early(table_start << PAGE_SHIFT, + table_end << PAGE_SHIFT, "PGTABLE"); + + return end >> PAGE_SHIFT; +} + /* * paging_init() sets up the page tables - note that the first 8MB are * already mapped by head.S. @@ -732,15 +822,8 @@ static void __init remapped_pgdat_init(void) */ void __init paging_init(void) { -#ifdef CONFIG_X86_PAE - set_nx(); - if (nx_enabled) - printk(KERN_INFO "NX (Execute Disable) protection: active\n"); -#endif pagetable_init(); - load_cr3(swapper_pg_dir); - __flush_tlb_all(); kmap_init(); -- cgit v1.2.3 From cfb0e53b05402f1ce65053677409a819c1798d34 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 24 Jun 2008 12:18:58 -0700 Subject: x86: introduce init_memory_mapping for 32bit #2 moving relocate_initrd early Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/mm/init_32.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 619058e6bff..ecccb055b08 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -831,9 +831,6 @@ void __init paging_init(void) /* * NOTE: at this point the bootmem allocator is fully available. */ - - post_reserve_initrd(); - remapped_pgdat_init(); sparse_init(); zone_sizes_init(); -- cgit v1.2.3 From 3a58a2a6c879b2e47daafd6e641661c50ac9da5a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 24 Jun 2008 12:19:41 -0700 Subject: x86: introduce init_memory_mapping for 32bit #3 move kva related early backto initmem_init for numa32 Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'arch/x86/mm/init_32.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ecccb055b08..c059c460e32 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -449,7 +449,6 @@ static void __init pagetable_init(void) paravirt_pagetable_setup_start(pgd_base); - remap_numa_kva(); /* * Fixed mappings, only the page table structure has to be * created - mappings will be set by set_fixmap(): @@ -724,24 +723,6 @@ void __init setup_bootmem_allocator(void) after_init_bootmem = 1; } -/* - * The node 0 pgdat is initialized before all of these because - * it's needed for bootmem. node>0 pgdats have their virtual - * space allocated before the pagetables are in place to access - * them, so they can't be cleared then. - * - * This should all compile down to nothing when NUMA is off. - */ -static void __init remapped_pgdat_init(void) -{ - int nid; - - for_each_online_node(nid) { - if (nid != 0) - memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); - } -} - static void __init find_early_table_space(unsigned long end) { unsigned long puds, pmds, tables, start; @@ -831,7 +812,6 @@ void __init paging_init(void) /* * NOTE: at this point the bootmem allocator is fully available. */ - remapped_pgdat_init(); sparse_init(); zone_sizes_init(); -- cgit v1.2.3 From 8207c2570af6f819b61be9ef3fb298d0a8c0e18c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 24 Jun 2008 17:32:48 -0400 Subject: x86: fix pte allocation in "x86: introduce init_memory_mapping for 32bit" The patch "x86: introduce init_memory_mapping for 32bit" does not allocate enough space for PTEs if the CPU does not implement PSE. Signed-off-by: Jeremy Fitzhardinge Acked-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/mm/init_32.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index c059c460e32..156000de6e6 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -733,6 +733,11 @@ static void __init find_early_table_space(unsigned long end) pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; tables += PAGE_ALIGN(pmds * sizeof(pmd_t)); + if (!cpu_has_pse) { + int ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; + tables += PAGE_ALIGN(ptes * sizeof(pte_t)); + } + /* * RED-PEN putting page tables only on node 0 could * cause a hotspot and fill up ZONE_DMA. The page tables -- cgit v1.2.3 From e7b3789524eecc96213dd69d6686efd429235051 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Jun 2008 21:51:28 -0700 Subject: x86: move fix mapping page table range early do that in init_memory_mapping also remove one init_ohci1394_dma_on_all_controllers Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'arch/x86/mm/init_32.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 156000de6e6..b9cf7f70530 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -442,13 +442,10 @@ void __init native_pagetable_setup_done(pgd_t *base) * be partially populated, and so it avoids stomping on any existing * mappings. */ -static void __init pagetable_init(void) +static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base) { - pgd_t *pgd_base = swapper_pg_dir; unsigned long vaddr, end; - paravirt_pagetable_setup_start(pgd_base); - /* * Fixed mappings, only the page table structure has to be * created - mappings will be set by set_fixmap(): @@ -458,6 +455,13 @@ static void __init pagetable_init(void) end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; page_table_range_init(vaddr, end, pgd_base); early_ioremap_reset(); +} + +static void __init pagetable_init(void) +{ + pgd_t *pgd_base = swapper_pg_dir; + + paravirt_pagetable_setup_start(pgd_base); permanent_kmaps_init(pgd_base); @@ -788,6 +792,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, kernel_physical_mapping_init(pgd_base, start, end); + early_ioremap_page_table_range_init(pgd_base); + load_cr3(swapper_pg_dir); __flush_tlb_all(); @@ -799,6 +805,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, return end >> PAGE_SHIFT; } + /* * paging_init() sets up the page tables - note that the first 8MB are * already mapped by head.S. -- cgit v1.2.3 From 7482b0e962e128c5b574aa29761f97164189ef14 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 28 Jun 2008 03:30:39 -0700 Subject: x86: fix init_memory_mapping over boundary v3 some ram-end boundary only has page alignment, instead of 2M alignment. v2: make init_memory_mapping more solid: start could be any value other than 0 v3: fix NON PAE by handling left over in kernel_physical_mapping Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'arch/x86/mm/init_32.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index b9cf7f70530..90ca67be965 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -195,7 +195,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base, unsigned pages_2m = 0, pages_4k = 0; unsigned limit_pfn = end >> PAGE_SHIFT; - pgd_idx = pgd_index(PAGE_OFFSET); + pgd_idx = pgd_index(start + PAGE_OFFSET); pgd = pgd_base + pgd_idx; pfn = start >> PAGE_SHIFT; @@ -218,7 +218,8 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base, * and overlapping MTRRs into large pages can cause * slowdowns. */ - if (cpu_has_pse && !(pgd_idx == 0 && pmd_idx == 0)) { + if (cpu_has_pse && !(pgd_idx == 0 && pmd_idx == 0) && + (pfn + PTRS_PER_PTE) <= limit_pfn) { unsigned int addr2; pgprot_t prot = PAGE_KERNEL_LARGE; @@ -233,13 +234,12 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base, set_pmd(pmd, pfn_pmd(pfn, prot)); pfn += PTRS_PER_PTE; - max_pfn_mapped = pfn; continue; } pte = one_page_table_init(pmd); for (pte_ofs = 0; - pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; + pte_ofs < PTRS_PER_PTE && pfn < limit_pfn; pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) { pgprot_t prot = PAGE_KERNEL; @@ -249,7 +249,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base, pages_4k++; set_pte(pte, pfn_pte(pfn, prot)); } - max_pfn_mapped = pfn; } } update_page_count(PG_LEVEL_2M, pages_2m); @@ -729,7 +728,7 @@ void __init setup_bootmem_allocator(void) static void __init find_early_table_space(unsigned long end) { - unsigned long puds, pmds, tables, start; + unsigned long puds, pmds, ptes, tables, start; puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; tables = PAGE_ALIGN(puds * sizeof(pud_t)); @@ -737,10 +736,15 @@ static void __init find_early_table_space(unsigned long end) pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; tables += PAGE_ALIGN(pmds * sizeof(pmd_t)); - if (!cpu_has_pse) { - int ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; - tables += PAGE_ALIGN(ptes * sizeof(pte_t)); - } + if (cpu_has_pse) { + unsigned long extra; + extra = end - ((end>>21) << 21); + extra += (2UL<<20); + ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; + } else + ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; + + tables += PAGE_ALIGN(ptes * sizeof(pte_t)); /* * RED-PEN putting page tables only on node 0 could -- cgit v1.2.3 From a04ad82d0bff4bb564f290eb50982e02458592d9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 29 Jun 2008 00:39:06 -0700 Subject: x86: fix init_memory_mapping over boundary, v4 use PMD_SHIFT to calculate boundary also adjust size for pre-allocated table size Signed-off-by: Yinghai Lu Cc: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 89 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 67 insertions(+), 22 deletions(-) (limited to 'arch/x86/mm/init_32.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 90ca67be965..aa5e37c9f4b 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -184,8 +184,9 @@ static inline int is_kernel_text(unsigned long addr) * PAGE_OFFSET: */ static void __init kernel_physical_mapping_init(pgd_t *pgd_base, - unsigned long start, - unsigned long end) + unsigned long start_pfn, + unsigned long end_pfn, + int use_pse) { int pgd_idx, pmd_idx, pte_ofs; unsigned long pfn; @@ -193,33 +194,33 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base, pmd_t *pmd; pte_t *pte; unsigned pages_2m = 0, pages_4k = 0; - unsigned limit_pfn = end >> PAGE_SHIFT; - pgd_idx = pgd_index(start + PAGE_OFFSET); - pgd = pgd_base + pgd_idx; - pfn = start >> PAGE_SHIFT; + if (!cpu_has_pse) + use_pse = 0; + pfn = start_pfn; + pgd_idx = pgd_index((pfn<= limit_pfn) - continue; - for (pmd_idx = 0; - pmd_idx < PTRS_PER_PMD && pfn < limit_pfn; + if (pfn >= end_pfn) + continue; +#ifdef CONFIG_X86_PAE + pmd_idx = pmd_index((pfn<>21) << 21); - extra += (2UL<<20); + + extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); + extra += PMD_SIZE; ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; } else ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; tables += PAGE_ALIGN(ptes * sizeof(pte_t)); + /* for fixmap */ + tables += PAGE_SIZE * 2; + /* * RED-PEN putting page tables only on node 0 could * cause a hotspot and fill up ZONE_DMA. The page tables @@ -770,6 +776,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned long end) { pgd_t *pgd_base = swapper_pg_dir; + unsigned long start_pfn, end_pfn; + unsigned long big_page_start; /* * Find space for the kernel direct mapping tables. @@ -794,7 +802,44 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL; } - kernel_physical_mapping_init(pgd_base, start, end); + /* + * Don't use a large page for the first 2/4MB of memory + * because there are often fixed size MTRRs in there + * and overlapping MTRRs into large pages can cause + * slowdowns. + */ + big_page_start = PMD_SIZE; + + if (start < big_page_start) { + start_pfn = start >> PAGE_SHIFT; + end_pfn = min(big_page_start>>PAGE_SHIFT, end>>PAGE_SHIFT); + } else { + /* head is not big page alignment ? */ + start_pfn = start >> PAGE_SHIFT; + end_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); + } + if (start_pfn < end_pfn) + kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, 0); + + /* big page range */ + start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); + if (start_pfn < (big_page_start >> PAGE_SHIFT)) + start_pfn = big_page_start >> PAGE_SHIFT; + end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); + if (start_pfn < end_pfn) + kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, + cpu_has_pse); + + /* tail is not big page alignment ? */ + start_pfn = end_pfn; + if (start_pfn > (big_page_start>>PAGE_SHIFT)) { + end_pfn = end >> PAGE_SHIFT; + if (start_pfn < end_pfn) + kernel_physical_mapping_init(pgd_base, start_pfn, + end_pfn, 0); + } early_ioremap_page_table_range_init(pgd_base); -- cgit v1.2.3 From cb95a13a8ace8612ecab042a838e5aab2ec14ef0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 2 Jul 2008 00:31:02 -0700 Subject: x86: merge zones_sizes_init for numa and non numa on 32-bit move out e820_register_active_regions from non numa zones_sizes_init() and remove numa version zones_sizes_init(). and let 32 bit call remove_all_active_ranges() in setup_arch() directly like 64-bit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/x86/mm/init_32.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index aa5e37c9f4b..8efe872b961 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -660,12 +660,14 @@ void __init initmem_init(unsigned long start_pfn, if (max_pfn > max_low_pfn) highstart_pfn = max_low_pfn; memory_present(0, 0, highend_pfn); + e820_register_active_regions(0, 0, highend_pfn); printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); num_physpages = highend_pfn; high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; #else memory_present(0, 0, max_low_pfn); + e820_register_active_regions(0, 0, max_low_pfn); num_physpages = max_low_pfn; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; #endif @@ -677,25 +679,21 @@ void __init initmem_init(unsigned long start_pfn, setup_bootmem_allocator(); } +#endif /* !CONFIG_NEED_MULTIPLE_NODES */ -void __init zone_sizes_init(void) +static void __init zone_sizes_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); max_zone_pfns[ZONE_DMA] = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; max_zone_pfns[ZONE_NORMAL] = max_low_pfn; - remove_all_active_ranges(); #ifdef CONFIG_HIGHMEM max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; - e820_register_active_regions(0, 0, highend_pfn); -#else - e820_register_active_regions(0, 0, max_low_pfn); #endif free_area_init_nodes(max_zone_pfns); } -#endif /* !CONFIG_NEED_MULTIPLE_NODES */ void __init setup_bootmem_allocator(void) { -- cgit v1.2.3 From ef5e94af16c0c82452e1ea5d387e1203dd2198d6 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 1 Jul 2008 16:46:36 -0700 Subject: x86_32: remove __PAGE_KERNEL(_EXEC) From: Jeremy Fitzhardinge Older x86-32 processors do not support global mappings (PGD), so must only use it if the processor supports it. The _PAGE_KERNEL* flags always have _PAGE_KERNEL set, since logically we always want it set. This is OK even on processors which do not support PGD, since all _PAGE flags are masked with __supported_pte_mask before being turned into a real in-pagetable pte. On 32-bit systems, __supported_pte_mask is initialized to not contain _PAGE_GLOBAL, and it is then added if the CPU is found to support it. The x86-32 code used to use __PAGE_KERNEL/__PAGE_KERNEL_EXEC for this purpose, but they're now redundant and can be removed. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'arch/x86/mm/init_32.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 8efe872b961..b5a0fd5f4c5 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -383,11 +383,6 @@ static void __init set_highmem_pages_init(void) # define set_highmem_pages_init() do { } while (0) #endif /* CONFIG_HIGHMEM */ -pteval_t __PAGE_KERNEL = _PAGE_KERNEL; -EXPORT_SYMBOL(__PAGE_KERNEL); - -pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC; - void __init native_pagetable_setup_start(pgd_t *base) { unsigned long pfn, va; @@ -509,7 +504,7 @@ void zap_low_mappings(void) int nx_enabled; -pteval_t __supported_pte_mask __read_mostly = ~_PAGE_NX; +pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL); EXPORT_SYMBOL_GPL(__supported_pte_mask); #ifdef CONFIG_X86_PAE @@ -796,8 +791,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, /* Enable PGE if available */ if (cpu_has_pge) { set_in_cr4(X86_CR4_PGE); - __PAGE_KERNEL |= _PAGE_GLOBAL; - __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL; + __supported_pte_mask |= _PAGE_GLOBAL; } /* -- cgit v1.2.3 From f361a450bf1ad14e2b003217dbf3958638631265 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 10 Jul 2008 20:38:26 -0700 Subject: x86: introduce max_low_pfn_mapped for 64-bit when more than 4g memory is installed, don't map the big hole below 4g. Signed-off-by: Yinghai Lu Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/mm/init_32.c') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index b5a0fd5f4c5..029e8cffca9 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -50,6 +50,7 @@ unsigned int __VMALLOC_RESERVE = 128 << 20; +unsigned long max_low_pfn_mapped; unsigned long max_pfn_mapped; DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -- cgit v1.2.3