From 8ee2debce32412118cf8c239e0026ace56ea1425 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Fri, 25 Sep 2009 15:20:00 -0700 Subject: x86: Export k8 physical topology To eventually interleave emulated nodes over physical nodes, we need to know the physical topology of the machine without actually registering it. This does the k8 node setup in two parts: detection and registration. NUMA emulation can then used the physical topology detected to setup the address ranges of emulated nodes accordingly. If emulation isn't used, the k8 nodes are registered as normal. Two formals are added to the x86 NUMA setup functions: `acpi' and `k8'. These represent whether ACPI or K8 NUMA has been detected; both cannot be true at the same time. This specifies to the NUMA emulation code whether an underlying physical NUMA topology exists and which interface to use. This patch deals solely with separating the k8 setup path into Northbridge detection and registration steps and leaves the ACPI changes for a subsequent patch. The `acpi' formal is added here, however, to avoid touching all the header files again in the next patch. This approach also ensures emulated nodes will not span physical nodes so the true memory latency is not misrepresented. k8_get_nodes() may now be used to export the k8 physical topology of the machine for NUMA emulation. Signed-off-by: David Rientjes Cc: Andreas Herrmann Cc: Yinghai Lu Cc: Balbir Singh Cc: Ankita Garg Cc: Len Brown LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e09f0e2c14b..fda0032c25c 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -106,6 +106,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 #include #endif @@ -691,6 +692,9 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { void __init setup_arch(char **cmdline_p) { + int acpi = 0; + int k8 = 0; + #ifdef CONFIG_X86_32 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); visws_early_detect(); @@ -937,7 +941,11 @@ void __init setup_arch(char **cmdline_p) acpi_numa_init(); #endif - initmem_init(0, max_pfn); +#ifdef CONFIG_K8_NUMA + k8 = !k8_numa_init(0, max_pfn); +#endif + + initmem_init(0, max_pfn, acpi, k8); #ifdef CONFIG_ACPI_SLEEP /* -- cgit v1.2.3 From 8716273caef7f55f39fe4fc6c69c5f9f197f41f1 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Fri, 25 Sep 2009 15:20:04 -0700 Subject: x86: Export srat physical topology This is the counterpart to "x86: export k8 physical topology" for SRAT. It is not as invasive because the acpi code already seperates node setup into detection and registration steps, with the exception of registering e820 active regions in acpi_numa_memory_affinity_init(). This is now moved to acpi_scan_nodes() if NUMA emulation is disabled or deferred. acpi_numa_init() now returns a value which specifies whether an underlying SRAT was located. If so, that topology can be used by the emulation code to interleave emulated nodes over physical nodes or to register the nodes for ACPI. acpi_get_nodes() may now be used to export the srat physical topology of the machine for NUMA emulation. Signed-off-by: David Rientjes Cc: Andreas Herrmann Cc: Yinghai Lu Cc: Balbir Singh Cc: Ankita Garg Cc: Len Brown LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index fda0032c25c..f8914198270 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -938,11 +938,12 @@ void __init setup_arch(char **cmdline_p) /* * Parse SRAT to discover nodes. */ - acpi_numa_init(); + acpi = acpi_numa_init(); #endif #ifdef CONFIG_K8_NUMA - k8 = !k8_numa_init(0, max_pfn); + if (!acpi) + k8 = !k8_numa_init(0, max_pfn); #endif initmem_init(0, max_pfn, acpi, k8); -- cgit v1.2.3 From b9af7c0d44b8bb71e3af5e94688d076414aa8c87 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 14 Oct 2009 14:46:55 -0700 Subject: x86-64: preserve large page mapping for 1st 2MB kernel txt with CONFIG_DEBUG_RODATA In the first 2MB, kernel text is co-located with kernel static page tables setup by head_64.S. CONFIG_DEBUG_RODATA chops this 2MB large page mapping to small 4KB pages as we mark the kernel text as RO, leaving the static page tables as RW. With CONFIG_DEBUG_RODATA disabled, OLTP run on NHM-EP shows 1% improvement with 2% reduction in system time and 1% improvement in iowait idle time. To recover this, move the kernel static page tables to .data section, so that we don't have to break the first 2MB of kernel text to small pages with CONFIG_DEBUG_RODATA. Signed-off-by: Suresh Siddha LKML-Reference: <20091014220254.063193621@sbs-t61.sc.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head_64.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 780cd928fcd..b55ee4ff509 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -262,11 +262,11 @@ ENTRY(secondary_startup_64) .quad x86_64_start_kernel ENTRY(initial_gs) .quad INIT_PER_CPU_VAR(irq_stack_union) - __FINITDATA ENTRY(stack_start) .quad init_thread_union+THREAD_SIZE-8 .word 0 + __FINITDATA bad_address: jmp bad_address @@ -340,6 +340,7 @@ ENTRY(name) i = i + 1 ; \ .endr + .data /* * This default setting generates an ident mapping at address 0x100000 * and a mapping for the kernel that precisely maps virtual address -- cgit v1.2.3 From 74e081797bd9d2a7d8005fe519e719df343a2ba8 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 14 Oct 2009 14:46:56 -0700 Subject: x86-64: align RODATA kernel section to 2MB with CONFIG_DEBUG_RODATA CONFIG_DEBUG_RODATA chops the large pages spanning boundaries of kernel text/rodata/data to small 4KB pages as they are mapped with different attributes (text as RO, RODATA as RO and NX etc). On x86_64, preserve the large page mappings for kernel text/rodata/data boundaries when CONFIG_DEBUG_RODATA is enabled. This is done by allowing the RODATA section to be hugepage aligned and having same RWX attributes for the 2MB page boundaries Extra Memory pages padding the sections will be freed during the end of the boot and the kernel identity mappings will have different RWX permissions compared to the kernel text mappings. Kernel identity mappings to these physical pages will be mapped with smaller pages but large page mappings are still retained for kernel text,rodata,data mappings. Signed-off-by: Suresh Siddha LKML-Reference: <20091014220254.190119924@sbs-t61.sc.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/vmlinux.lds.S | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 92929fb3f9f..14763790e41 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -41,6 +41,21 @@ ENTRY(phys_startup_64) jiffies_64 = jiffies; #endif +#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) + +#define X64_ALIGN_DEBUG_RODATA_BEGIN . = ALIGN(HPAGE_SIZE); + +#define X64_ALIGN_DEBUG_RODATA_END \ + . = ALIGN(HPAGE_SIZE); \ + __end_rodata_hpage_align = .; + +#else + +#define X64_ALIGN_DEBUG_RODATA_BEGIN +#define X64_ALIGN_DEBUG_RODATA_END + +#endif + PHDRS { text PT_LOAD FLAGS(5); /* R_E */ data PT_LOAD FLAGS(7); /* RWE */ @@ -90,7 +105,9 @@ SECTIONS EXCEPTION_TABLE(16) :text = 0x9090 + X64_ALIGN_DEBUG_RODATA_BEGIN RO_DATA(PAGE_SIZE) + X64_ALIGN_DEBUG_RODATA_END /* Data */ .data : AT(ADDR(.data) - LOAD_OFFSET) { -- cgit v1.2.3 From d6cc1c3af760c1d3f6b42f6e52b08718a6207cf1 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 19 Oct 2009 06:12:04 -0700 Subject: x86-64: add comment for RODATA large page retainment Add a comment explaining why RODATA is aligned to 2 MB. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/kernel/vmlinux.lds.S | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 14763790e41..fd2dabec1df 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -42,7 +42,18 @@ jiffies_64 = jiffies; #endif #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) - +/* + * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA + * we retain large page mappings for boundaries spanning kernel text, rodata + * and data sections. + * + * However, kernel identity mappings will have different RWX permissions + * to the pages mapping to text and to the pages padding (which are freed) the + * text section. Hence kernel identity mappings will be broken to smaller + * pages. For 64-bit, kernel text and kernel identity mappings are different, + * so we can enable protection checks that come with CONFIG_DEBUG_RODATA, + * as well as retain 2MB large page mappings for kernel text. + */ #define X64_ALIGN_DEBUG_RODATA_BEGIN . = ALIGN(HPAGE_SIZE); #define X64_ALIGN_DEBUG_RODATA_END \ -- cgit v1.2.3 From 55ca3cc1746335bb6ef1d3894ddb6d0c729b3518 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 28 Oct 2009 18:46:57 -0800 Subject: x86_64, ftrace: Make ftrace use kernel identity mapping to modify code On x86_64, kernel text mappings are mapped read-only with CONFIG_DEBUG_RODATA. So use the kernel identity mapping instead of the kernel text mapping to modify the kernel text. Signed-off-by: Suresh Siddha Acked-by: Steven Rostedt Tested-by: Steven Rostedt LKML-Reference: <20091029024821.080941108@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 9dbb527e165..944e9820b4b 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -187,9 +187,26 @@ static void wait_for_nmi(void) nmi_wait_count++; } +static inline int +within(unsigned long addr, unsigned long start, unsigned long end) +{ + return addr >= start && addr < end; +} + static int do_ftrace_mod_code(unsigned long ip, void *new_code) { + /* + * On x86_64, kernel text mappings are mapped read-only with + * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead + * of the kernel text mapping to modify the kernel text. + * + * For 32bit kernels, these mappings are same and we can use + * kernel identity mapping to modify code. + */ + if (within(ip, (unsigned long)_text, (unsigned long)_etext)) + ip = (unsigned long)__va(__pa(ip)); + mod_code_ip = (void *)ip; mod_code_newcode = new_code; -- cgit v1.2.3 From 196cf0d67acad70ebb2572da489d5cc7066cdd05 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 10 Nov 2009 18:27:23 -0800 Subject: x86: Make sure wakeup trampoline code is below 1MB Instead of using bootmem, try find_e820_area()/reserve_early(), and call acpi_reserve_memory() early, to allocate the wakeup trampoline code area below 1M. This is more reliable, and it also removes a dependency on bootmem. -v2: change function name to acpi_reserve_wakeup_memory(), as suggested by Rafael. Signed-off-by: Yinghai Lu Acked-by: H. Peter Anvin Acked-by: Rafael J. Wysocki Cc: pm list Cc: Len Brown Cc: Linus Torvalds LKML-Reference: <4AFA210B.3020207@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/sleep.c | 15 +++++++++------ arch/x86/kernel/setup.c | 13 +++++++------ 2 files changed, 16 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index ca93638ba43..4a411450dfa 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -119,29 +119,32 @@ void acpi_restore_state_mem(void) /** - * acpi_reserve_bootmem - do _very_ early ACPI initialisation + * acpi_reserve_wakeup_memory - do _very_ early ACPI initialisation * * We allocate a page from the first 1MB of memory for the wakeup * routine for when we come back from a sleep state. The * runtime allocator allows specification of <16MB pages, but not * <1MB pages. */ -void __init acpi_reserve_bootmem(void) +void __init acpi_reserve_wakeup_memory(void) { + unsigned long mem; + if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) { printk(KERN_ERR "ACPI: Wakeup code way too big, S3 disabled.\n"); return; } - acpi_realmode = (unsigned long)alloc_bootmem_low(WAKEUP_SIZE); + mem = find_e820_area(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE); - if (!acpi_realmode) { + if (mem == -1L) { printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); return; } - - acpi_wakeup_address = virt_to_phys((void *)acpi_realmode); + acpi_realmode = (unsigned long) phys_to_virt(mem); + acpi_wakeup_address = mem; + reserve_early(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP"); } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f8914198270..0a6e94ab833 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -897,6 +897,13 @@ void __init setup_arch(char **cmdline_p) reserve_brk(); +#ifdef CONFIG_ACPI_SLEEP + /* + * Reserve low memory region for sleep support. + * even before init_memory_mapping + */ + acpi_reserve_wakeup_memory(); +#endif init_gbpages(); /* max_pfn_mapped is updated here */ @@ -948,12 +955,6 @@ void __init setup_arch(char **cmdline_p) initmem_init(0, max_pfn, acpi, k8); -#ifdef CONFIG_ACPI_SLEEP - /* - * Reserve low memory region for sleep support. - */ - acpi_reserve_bootmem(); -#endif /* * Find and reserve possible boot-time SMP configuration: */ -- cgit v1.2.3 From 8a50e5135af0c243e117e94e27feb8d149c879b4 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 13 Nov 2009 15:28:13 -0800 Subject: x86-32: Use symbolic constants, safer CPUID when enabling EFER.NX Use symbolic constants rather than hard-coded values when setting EFER.NX in head_32.S, and do a more rigorous test for the validity of the response when probing for the extended CPUID range. Signed-off-by: H. Peter Anvin LKML-Reference: <1258154897-6770-2-git-send-email-hpa@zytor.com> Acked-by: Kees Cook --- arch/x86/kernel/head_32.S | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 050c278481b..7fd318bac59 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include /* Physical address */ @@ -297,25 +299,27 @@ ENTRY(startup_32_smp) orl %edx,%eax movl %eax,%cr4 - btl $5, %eax # check if PAE is enabled - jnc 6f + testb $X86_CR4_PAE, %al # check if PAE is enabled + jz 6f /* Check if extended functions are implemented */ movl $0x80000000, %eax cpuid - cmpl $0x80000000, %eax - jbe 6f + /* Value must be in the range 0x80000001 to 0x8000ffff */ + subl $0x80000001, %eax + cmpl $(0x8000ffff-0x80000001), %eax + ja 6f mov $0x80000001, %eax cpuid /* Execute Disable bit supported? */ - btl $20, %edx + btl $(X86_FEATURE_NX & 31), %edx jnc 6f /* Setup EFER (Extended Feature Enable Register) */ - movl $0xc0000080, %ecx + movl $MSR_EFER, %ecx rdmsr - btsl $11, %eax + btsl $_EFER_NX, %eax /* Make changes effective */ wrmsr -- cgit v1.2.3 From a7c4c0d934c6cbc58de262d090d4a715445453f0 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 13 Nov 2009 15:28:14 -0800 Subject: x86, sleep: Always save the value of EFER Always save the value of EFER, regardless of the state of NX. Since EFER may not actually exist, use rdmsr_safe() to do so. v2: check the return value from rdmsr_safe() instead of relying on the output values being unchanged on error. Signed-off-by: H. Peter Anvin Acked-by: Rafael J. Wysocki Cc: Pavel Machek Cc: Nigel Cunningham LKML-Reference: <1258154897-6770-3-git-send-email-hpa@zytor.com> Acked-by: Kees Cook --- arch/x86/kernel/acpi/sleep.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 4a411450dfa..82e508677b9 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -78,12 +78,9 @@ int acpi_save_state_mem(void) #ifndef CONFIG_64BIT store_gdt((struct desc_ptr *)&header->pmode_gdt); - header->pmode_efer_low = nx_enabled; - if (header->pmode_efer_low & 1) { - /* This is strange, why not save efer, always? */ - rdmsr(MSR_EFER, header->pmode_efer_low, - header->pmode_efer_high); - } + if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low, + &header->pmode_efer_high)) + header->pmode_efer_low = header->pmode_efer_high = 0; #endif /* !CONFIG_64BIT */ header->pmode_cr0 = read_cr0(); -- cgit v1.2.3 From 583140afb989f24d115e80be5c91e503b58ccfc0 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 13 Nov 2009 15:28:15 -0800 Subject: x86, pageattr: Make set_memory_(x|nx) aware of NX support Make set_memory_x/set_memory_nx directly aware of if NX is supported in the system or not, rather than requiring that every caller assesses that support independently. Signed-off-by: H. Peter Anvin Cc: Huang Ying Cc: Venkatesh Pallipadi Cc: Suresh Siddha Cc: Tejun Heo Cc: Tim Starling Cc: Hannes Eder LKML-Reference: <1258154897-6770-4-git-send-email-hpa@zytor.com> Acked-by: Kees Cook --- arch/x86/kernel/machine_kexec_32.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index c1c429d0013..03657e784fd 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -157,8 +157,7 @@ int machine_kexec_prepare(struct kimage *image) { int error; - if (nx_enabled) - set_pages_x(image->control_code_page, 1); + set_pages_x(image->control_code_page, 1); error = machine_kexec_alloc_page_tables(image); if (error) return error; @@ -172,8 +171,7 @@ int machine_kexec_prepare(struct kimage *image) */ void machine_kexec_cleanup(struct kimage *image) { - if (nx_enabled) - set_pages_nx(image->control_code_page, 1); + set_pages_nx(image->control_code_page, 1); machine_kexec_free_page_tables(image); } -- cgit v1.2.3 From 4763ed4d45522b876c97e1f7f4b659d211f75571 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 13 Nov 2009 15:28:16 -0800 Subject: x86, mm: Clean up and simplify NX enablement The 32- and 64-bit code used very different mechanisms for enabling NX, but even the 32-bit code was enabling NX in head_32.S if it is available. Furthermore, we had a bewildering collection of tests for the available of NX. This patch: a) merges the 32-bit set_nx() and the 64-bit check_efer() function into a single x86_configure_nx() function. EFER control is left to the head code. b) eliminates the nx_enabled variable entirely. Things that need to test for NX enablement can verify __supported_pte_mask directly, and cpu_has_nx gives the supported status of NX. Signed-off-by: H. Peter Anvin Cc: Tejun Heo Cc: Brian Gerst Cc: Yinghai Lu Cc: Pekka Enberg Cc: Vegard Nossum Cc: Jeremy Fitzhardinge Cc: Chris Wright LKML-Reference: <1258154897-6770-5-git-send-email-hpa@zytor.com> Acked-by: Kees Cook --- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/setup.c | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cc25c2b4a56..18346da8c59 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1136,7 +1136,7 @@ void __cpuinit cpu_init(void) wrmsrl(MSR_KERNEL_GS_BASE, 0); barrier(); - check_efer(); + x86_configure_nx(); if (cpu != 0) enable_x2apic(); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0a6e94ab833..23b7f46bf84 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -787,21 +787,17 @@ void __init setup_arch(char **cmdline_p) strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; -#ifdef CONFIG_X86_64 /* * Must call this twice: Once just to detect whether hardware doesn't * support NX (so that the early EHCI debug console setup can safely * call set_fixmap(), and then again after parsing early parameters to * honor the respective command line option. */ - check_efer(); -#endif + x86_configure_nx(); parse_early_param(); -#ifdef CONFIG_X86_64 - check_efer(); -#endif + x86_configure_nx(); /* Must be before kernel pagetables are setup */ vmi_activate(); -- cgit v1.2.3 From 4b0f3b81eb33ef18283aa71440cccfede1753ae0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 13 Nov 2009 15:28:17 -0800 Subject: x86, mm: Report state of NX protections during boot It is possible for x86_64 systems to lack the NX bit either due to the hardware lacking support or the BIOS having turned off the CPU capability, so NX status should be reported. Additionally, anyone booting NX-capable CPUs in 32bit mode without PAE will lack NX functionality, so this change provides feedback for that case as well. Signed-off-by: Kees Cook Signed-off-by: H. Peter Anvin LKML-Reference: <1258154897-6770-6-git-send-email-hpa@zytor.com> --- arch/x86/kernel/setup.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 23b7f46bf84..d2043a00abc 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -788,16 +788,17 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = command_line; /* - * Must call this twice: Once just to detect whether hardware doesn't - * support NX (so that the early EHCI debug console setup can safely - * call set_fixmap(), and then again after parsing early parameters to - * honor the respective command line option. + * x86_configure_nx() is called before parse_early_param() to detect + * whether hardware doesn't support NX (so that the early EHCI debug + * console setup can safely call set_fixmap()). It may then be called + * again from within noexec_setup() during parsing early parameters + * to honor the respective command line option. */ x86_configure_nx(); parse_early_param(); - x86_configure_nx(); + x86_report_nx(); /* Must be before kernel pagetables are setup */ vmi_activate(); -- cgit v1.2.3 From 508d85c2c6bc8cba53d2a54d9a306ad64a0a80bf Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 16 Nov 2009 23:04:56 -0800 Subject: x86: When cleaning MTRRs, do not fold WP into UC The current MTRR code treats WP as a form of UC. This really isn't desirable behaviour, except possibly in the case of severe MTRR shortage. Disable this, to allow legitimate uses of WP to remain unmolested. Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin Cc: Linus Torvalds --- arch/x86/kernel/cpu/mtrr/cleanup.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 315738c74aa..6e49f6f91f3 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -689,8 +689,6 @@ static int __init mtrr_need_cleanup(void) continue; if (!size) type = MTRR_NUM_TYPES; - if (type == MTRR_TYPE_WRPROT) - type = MTRR_TYPE_UNCACHABLE; num[type]++; } -- cgit v1.2.3 From 350f8f5631922c7848ec4b530c111cb8c2ff7caa Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 13 Nov 2009 11:54:40 +0000 Subject: x86: Eliminate redundant/contradicting cache line size config options Rather than having X86_L1_CACHE_BYTES and X86_L1_CACHE_SHIFT (with inconsistent defaults), just having the latter suffices as the former can be easily calculated from it. To be consistent, also change X86_INTERNODE_CACHE_BYTES to X86_INTERNODE_CACHE_SHIFT, and set it to 7 (128 bytes) for NUMA to account for last level cache line size (which here matters more than L1 cache line size). Finally, make sure the default value for X86_L1_CACHE_SHIFT, when X86_GENERIC is selected, is being seen before that for the individual CPU model options (other than on x86-64, where GENERIC_CPU is part of the choice construct, X86_GENERIC is a separate option on ix86). Signed-off-by: Jan Beulich Acked-by: Ravikiran Thirumalai Acked-by: Nick Piggin LKML-Reference: <4AFD5710020000780001F8F0@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmlinux.lds.S | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index fd2dabec1df..eeb4f5fbd86 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -135,13 +135,13 @@ SECTIONS PAGE_ALIGNED_DATA(PAGE_SIZE) - CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES) + CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) DATA_DATA CONSTRUCTORS /* rarely changed data like cpu maps */ - READ_MOSTLY_DATA(CONFIG_X86_INTERNODE_CACHE_BYTES) + READ_MOSTLY_DATA(INTERNODE_CACHE_BYTES) /* End of data section */ _edata = .; @@ -165,12 +165,12 @@ SECTIONS *(.vsyscall_0) } :user - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + . = ALIGN(L1_CACHE_BYTES); .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { *(.vsyscall_fn) } - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + . = ALIGN(L1_CACHE_BYTES); .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) { *(.vsyscall_gtod_data) } @@ -194,7 +194,7 @@ SECTIONS } vgetcpu_mode = VVIRT(.vgetcpu_mode); - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + . = ALIGN(L1_CACHE_BYTES); .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) } -- cgit v1.2.3 From 44280733e71ad15377735b42d8538c109c94d7e3 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 22 Nov 2009 17:18:49 -0800 Subject: x86: Change crash kernel to reserve via reserve_early() use find_e820_area()/reserve_early() instead. -v2: address Eric's request, to restore original semantics. will fail, if the provided address can not be used. Signed-off-by: Yinghai Lu Acked-by: Eric W. Biederman LKML-Reference: <4B09E2F9.7040403@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 57 +++++++++++++------------------------------------ 1 file changed, 15 insertions(+), 42 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d2043a00abc..e3eae5965e4 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -487,42 +487,11 @@ static void __init reserve_early_setup_data(void) #ifdef CONFIG_KEXEC -/** - * Reserve @size bytes of crashkernel memory at any suitable offset. - * - * @size: Size of the crashkernel memory to reserve. - * Returns the base address on success, and -1ULL on failure. - */ -static -unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) -{ - const unsigned long long alignment = 16<<20; /* 16M */ - unsigned long long start = 0LL; - - while (1) { - int ret; - - start = find_e820_area(start, ULONG_MAX, size, alignment); - if (start == -1ULL) - return start; - - /* try to reserve it */ - ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE); - if (ret >= 0) - return start; - - start += alignment; - } -} - static inline unsigned long long get_total_mem(void) { unsigned long long total; - total = max_low_pfn - min_low_pfn; -#ifdef CONFIG_HIGHMEM - total += highend_pfn - highstart_pfn; -#endif + total = max_pfn - min_low_pfn; return total << PAGE_SHIFT; } @@ -542,21 +511,25 @@ static void __init reserve_crashkernel(void) /* 0 means: find the address automatically */ if (crash_base <= 0) { - crash_base = find_and_reserve_crashkernel(crash_size); + const unsigned long long alignment = 16<<20; /* 16M */ + + crash_base = find_e820_area(alignment, ULONG_MAX, crash_size, + alignment); if (crash_base == -1ULL) { - pr_info("crashkernel reservation failed. " - "No suitable area found.\n"); + pr_info("crashkernel reservation failed - No suitable area found.\n"); return; } } else { - ret = reserve_bootmem_generic(crash_base, crash_size, - BOOTMEM_EXCLUSIVE); - if (ret < 0) { - pr_info("crashkernel reservation failed - " - "memory is in use\n"); + unsigned long long start; + + start = find_e820_area(crash_base, ULONG_MAX, crash_size, + 1<<20); + if (start != crash_base) { + pr_info("crashkernel reservation failed - memory is in use.\n"); return; } } + reserve_early(crash_base, crash_base + crash_size, "CRASH KERNEL"); printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " "for crashkernel (System RAM: %ldMB)\n", @@ -927,6 +900,8 @@ void __init setup_arch(char **cmdline_p) reserve_initrd(); + reserve_crashkernel(); + vsmp_init(); io_delay_init(); @@ -957,8 +932,6 @@ void __init setup_arch(char **cmdline_p) */ find_smp_config(); - reserve_crashkernel(); - #ifdef CONFIG_X86_64 /* * dma32_reserve_bootmem() allocates bootmem which may conflict -- cgit v1.2.3 From d9c2d5ac6af87b4491bff107113aaf16f6c2b2d9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 21 Nov 2009 00:23:37 -0800 Subject: x86, numa: Use near(er) online node instead of roundrobin for NUMA CPU to node mapping is set via the following sequence: 1. numa_init_array(): Set up roundrobin from cpu to online node 2. init_cpu_to_node(): Set that according to apicid_to_node[] according to srat only handle the node that is online, and leave other cpu on node without ram (aka not online) to still roundrobin. 3. later call srat_detect_node for Intel/AMD, will use first_online node or nearby node. Problem is that setup_per_cpu_areas() is not called between 2 and 3, the per_cpu for cpu on node with ram is on different node, and could put that on node with two hops away. So try to optimize this and add find_near_online_node() and call init_cpu_to_node(). Signed-off-by: Yinghai Lu Cc: Tejun Heo Cc: Linus Torvalds Cc: Thomas Gleixner Cc: H. Peter Anvin Cc: Rusty Russell Cc: David Rientjes Cc: Andrew Morton LKML-Reference: <4B07A739.3030104@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 40e1835b35e..c900b73f922 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -263,8 +263,12 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) /* Don't do the funky fallback heuristics the AMD version employs for now. */ node = apicid_to_node[apicid]; - if (node == NUMA_NO_NODE || !node_online(node)) + if (node == NUMA_NO_NODE) node = first_node(node_online_map); + else if (!node_online(node)) { + /* reuse the value from init_cpu_to_node() */ + node = cpu_to_node(cpu); + } numa_set_node(cpu, node); printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); -- cgit v1.2.3 From e38e2af1c57c3eb5211331a5b4fcaae0c4a2a918 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Thu, 19 Nov 2009 17:12:43 -0600 Subject: x86: SGI UV: Fix BAU initialization A memory mapped register that affects the SGI UV Broadcast Assist Unit's interrupt handling may sometimes be unintialized. Remove the condition on its initialization, as that condition can be randomly satisfied by a hardware reset. Signed-off-by: Cliff Wickman Cc: LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_uv.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 503c1f2e883..af21e555690 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -819,10 +819,8 @@ static int __init uv_init_blade(int blade) */ apicid = blade_to_first_apicid(blade); pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG); - if ((pa & 0xff) != UV_BAU_MESSAGE) { - uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, + uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, ((apicid << 32) | UV_BAU_MESSAGE)); - } return 0; } -- cgit v1.2.3 From fd12a0d69aee6d90fa9b9890db24368a897f8423 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 19 Nov 2009 14:23:41 -0600 Subject: x86: UV SGI: Don't track GRU space in PAT GRU space is always mapped as WB in the page table. There is no need to track the mappings in the PAT. This also eliminates the "freeing invalid memtype" messages when the GRU space is unmapped. Signed-off-by: Jack Steiner LKML-Reference: <20091119202341.GA4420@sgi.com> [ v2: fix build failure ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 19 ++++++++++++++++++- arch/x86/kernel/x86_init.c | 2 ++ 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index f5f5886a6b5..2477c9f8809 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -30,10 +30,22 @@ #include #include #include +#include DEFINE_PER_CPU(int, x2apic_extra_bits); static enum uv_system_type uv_system_type; +static u64 gru_start_paddr, gru_end_paddr; + +static int is_GRU_range(u64 start, u64 end) +{ + return start >= gru_start_paddr && end < gru_end_paddr; +} + +static int uv_is_untracked_pat_range(u64 start, u64 end) +{ + return is_ISA_range(start, end) || is_GRU_range(start, end); +} static int early_get_nodeid(void) { @@ -49,6 +61,7 @@ static int early_get_nodeid(void) static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { if (!strcmp(oem_id, "SGI")) { + x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; if (!strcmp(oem_table_id, "UVL")) uv_system_type = UV_LEGACY_APIC; else if (!strcmp(oem_table_id, "UVX")) @@ -385,8 +398,12 @@ static __init void map_gru_high(int max_pnode) int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT; gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); - if (gru.s.enable) + if (gru.s.enable) { map_high("GRU", gru.s.base, shift, max_pnode, map_wb); + gru_start_paddr = ((u64)gru.s.base << shift); + gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1); + + } } static __init void map_mmr_high(int max_pnode) diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 4449a4a2c2e..bcc749ef62d 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -13,6 +13,7 @@ #include #include #include +#include #include void __cpuinit x86_init_noop(void) { } @@ -69,6 +70,7 @@ struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { }; struct x86_platform_ops x86_platform = { + .is_untracked_pat_range = default_is_untracked_pat_range, .calibrate_tsc = native_calibrate_tsc, .get_wallclock = mach_get_cmos_time, .set_wallclock = mach_set_rtc_mmss, -- cgit v1.2.3 From eb41c8be89dbe079f49202774e04a79ccac48a09 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 23 Nov 2009 14:46:07 -0800 Subject: x86, platform: Change is_untracked_pat_range() to bool; cleanup init - Change is_untracked_pat_range() to return bool. - Clean up the initialization of is_untracked_pat_range() -- by default, we simply point it at is_ISA_range() directly. - Move is_untracked_pat_range to the end of struct x86_platform, since it is the newest field. Signed-off-by: H. Peter Anvin Acked-by: Thomas Gleixner Cc: Jack Steiner LKML-Reference: <20091119202341.GA4420@sgi.com> --- arch/x86/kernel/apic/x2apic_uv_x.c | 4 ++-- arch/x86/kernel/x86_init.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 2477c9f8809..597a47b1cec 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -37,12 +37,12 @@ DEFINE_PER_CPU(int, x2apic_extra_bits); static enum uv_system_type uv_system_type; static u64 gru_start_paddr, gru_end_paddr; -static int is_GRU_range(u64 start, u64 end) +static inline bool is_GRU_range(u64 start, u64 end) { return start >= gru_start_paddr && end < gru_end_paddr; } -static int uv_is_untracked_pat_range(u64 start, u64 end) +static bool uv_is_untracked_pat_range(u64 start, u64 end) { return is_ISA_range(start, end) || is_GRU_range(start, end); } diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index bcc749ef62d..861b8b54e17 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -70,8 +70,8 @@ struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { }; struct x86_platform_ops x86_platform = { - .is_untracked_pat_range = default_is_untracked_pat_range, .calibrate_tsc = native_calibrate_tsc, .get_wallclock = mach_get_cmos_time, .set_wallclock = mach_set_rtc_mmss, + .is_untracked_pat_range = is_ISA_range, }; -- cgit v1.2.3 From b24c2a925a9837cccf54d50aeac22ba0cbc15455 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 24 Nov 2009 02:48:18 -0800 Subject: x86: Move find_smp_config() earlier and avoid bootmem usage Move the find_smp_config() call to before bootmem is initialized. Use reserve_early() instead of reserve_bootmem() in it. This simplifies the code, we only need to call find_smp_config() once and can remove the now unneeded reserve parameter from x86_init_mpparse::find_smp_config. We thus also reduce x86's dependency on bootmem allocations. Signed-off-by: Yinghai Lu LKML-Reference: <4B0BB9F2.70907@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/numaq_32.c | 5 ----- arch/x86/kernel/mpparse.c | 44 +++++++++++------------------------------ arch/x86/kernel/setup.c | 10 +++++----- arch/x86/kernel/visws_quirks.c | 2 +- 4 files changed, 18 insertions(+), 43 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index efa00e2b850..9c0629ceb52 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -263,11 +263,6 @@ static void __init smp_read_mpc_oem(struct mpc_table *mpc) static __init void early_check_numaq(void) { - /* - * Find possible boot-time SMP configuration: - */ - early_find_smp_config(); - /* * get boot-time SMP configuration: */ diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 5be95ef4ffe..35a57c963df 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -667,36 +667,18 @@ void __init default_get_smp_config(unsigned int early) */ } -static void __init smp_reserve_bootmem(struct mpf_intel *mpf) +static void __init smp_reserve_memory(struct mpf_intel *mpf) { unsigned long size = get_mpc_size(mpf->physptr); -#ifdef CONFIG_X86_32 - /* - * We cannot access to MPC table to compute table size yet, - * as only few megabytes from the bottom is mapped now. - * PC-9800's MPC table places on the very last of physical - * memory; so that simply reserving PAGE_SIZE from mpf->physptr - * yields BUG() in reserve_bootmem. - * also need to make sure physptr is below than max_low_pfn - * we don't need reserve the area above max_low_pfn - */ - unsigned long end = max_low_pfn * PAGE_SIZE; - if (mpf->physptr < end) { - if (mpf->physptr + size > end) - size = end - mpf->physptr; - reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT); - } -#else - reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT); -#endif + reserve_early(mpf->physptr, mpf->physptr+size, "MP-table mpc"); } -static int __init smp_scan_config(unsigned long base, unsigned long length, - unsigned reserve) +static int __init smp_scan_config(unsigned long base, unsigned long length) { unsigned int *bp = phys_to_virt(base); struct mpf_intel *mpf; + unsigned long mem; apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", bp, length); @@ -717,12 +699,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, printk(KERN_INFO "found SMP MP-table at [%p] %llx\n", mpf, (u64)virt_to_phys(mpf)); - if (!reserve) - return 1; - reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf), - BOOTMEM_DEFAULT); + mem = virt_to_phys(mpf); + reserve_early(mem, mem + sizeof(*mpf), "MP-table mpf"); if (mpf->physptr) - smp_reserve_bootmem(mpf); + smp_reserve_memory(mpf); return 1; } @@ -732,7 +712,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, return 0; } -void __init default_find_smp_config(unsigned int reserve) +void __init default_find_smp_config(void) { unsigned int address; @@ -744,9 +724,9 @@ void __init default_find_smp_config(unsigned int reserve) * 2) Scan the top 1K of base RAM * 3) Scan the 64K of bios */ - if (smp_scan_config(0x0, 0x400, reserve) || - smp_scan_config(639 * 0x400, 0x400, reserve) || - smp_scan_config(0xF0000, 0x10000, reserve)) + if (smp_scan_config(0x0, 0x400) || + smp_scan_config(639 * 0x400, 0x400) || + smp_scan_config(0xF0000, 0x10000)) return; /* * If it is an SMP machine we should know now, unless the @@ -767,7 +747,7 @@ void __init default_find_smp_config(unsigned int reserve) address = get_bios_ebda(); if (address) - smp_scan_config(address, 0x400, reserve); + smp_scan_config(address, 0x400); } #ifdef CONFIG_X86_IO_APIC diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e3eae5965e4..cdb6a8a506d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -913,6 +913,11 @@ void __init setup_arch(char **cmdline_p) early_acpi_boot_init(); + /* + * Find and reserve possible boot-time SMP configuration: + */ + find_smp_config(); + #ifdef CONFIG_ACPI_NUMA /* * Parse SRAT to discover nodes. @@ -927,11 +932,6 @@ void __init setup_arch(char **cmdline_p) initmem_init(0, max_pfn, acpi, k8); - /* - * Find and reserve possible boot-time SMP configuration: - */ - find_smp_config(); - #ifdef CONFIG_X86_64 /* * dma32_reserve_bootmem() allocates bootmem which may conflict diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index f068553a1b1..1498efa964b 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -197,7 +197,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) apic_version[m->apicid] = ver; } -static void __init visws_find_smp_config(unsigned int reserve) +static void __init visws_find_smp_config(void) { struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS); unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); -- cgit v1.2.3 From 5bf65b9ba67226eae9ffc398a0369fc4da35c259 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 24 Nov 2009 02:46:59 -0800 Subject: x86, mtrr: Fix sorting of mtrr after subtracting In some cases we can coalesce MTRR entries after cleanup; this may allow us to have more entries. As such, introduce clean_sort_range to to sort and coaelsce the MTRR entries. Signed-off-by: Yinghai Lu LKML-Reference: <4B0BB9A3.5020908@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mtrr/cleanup.c | 49 +++++++++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 6e49f6f91f3..6987af786c0 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -170,6 +170,41 @@ static int __init cmp_range(const void *x1, const void *x2) return start1 - start2; } +static int __init clean_sort_range(struct res_range *range, int az) +{ + int i, j, k = az - 1, nr_range = 0; + + for (i = 0; i < k; i++) { + if (range[i].end) + continue; + for (j = k; j > i; j--) { + if (range[j].end) { + k = j; + break; + } + } + if (j == i) + break; + range[i].start = range[k].start; + range[i].end = range[k].end; + range[k].start = 0; + range[k].end = 0; + k--; + } + /* count it */ + for (i = 0; i < az; i++) { + if (!range[i].end) { + nr_range = i; + break; + } + } + + /* sort them */ + sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); + + return nr_range; +} + #define BIOS_BUG_MSG KERN_WARNING \ "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" @@ -223,22 +258,18 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, subtract_range(range, extra_remove_base, extra_remove_base + extra_remove_size - 1); - /* get new range num */ - nr_range = 0; - for (i = 0; i < RANGE_NUM; i++) { - if (!range[i].end) - continue; - nr_range++; - } if (debug_print) { printk(KERN_DEBUG "After UC checking\n"); - for (i = 0; i < nr_range; i++) + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1); + } } /* sort the ranges */ - sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); + nr_range = clean_sort_range(range, RANGE_NUM); if (debug_print) { printk(KERN_DEBUG "After sorting\n"); for (i = 0; i < nr_range; i++) -- cgit v1.2.3 From ccef086454d4c97e7b722e9303390207d681cb4c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 30 Nov 2009 21:33:51 -0800 Subject: x86, mm: Correct the implementation of is_untracked_pat_range() The semantics the PAT code expect of is_untracked_pat_range() is "is this range completely contained inside the untracked region." This means that checkin 8a27138924f64d2f30c1022f909f74480046bc3f was technically wrong, because the implementation needlessly confusing. The sane interface is for it to take a semiclosed range like just about everything else (as evidenced by the sheer number of "- 1"'s removed by that patch) so change the actual implementation to match. Reported-by: Suresh Siddha Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Jack Steiner Signed-off-by: H. Peter Anvin LKML-Reference: <20091119202341.GA4420@sgi.com> --- arch/x86/kernel/apic/x2apic_uv_x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 597a47b1cec..1e09417c992 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -39,7 +39,7 @@ static u64 gru_start_paddr, gru_end_paddr; static inline bool is_GRU_range(u64 start, u64 end) { - return start >= gru_start_paddr && end < gru_end_paddr; + return start >= gru_start_paddr && end <= gru_end_paddr; } static bool uv_is_untracked_pat_range(u64 start, u64 end) -- cgit v1.2.3