diff options
Diffstat (limited to 'arch/x86')
95 files changed, 1098 insertions, 795 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2a59dbb2824..87a693cf2bb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -117,6 +117,9 @@ config ARCH_HAS_CPU_RELAX config HAVE_SETUP_PER_CPU_AREA def_bool X86_64 || (X86_SMP && !X86_VOYAGER) +config HAVE_CPUMASK_OF_CPU_MAP + def_bool X86_64_SMP + config ARCH_HIBERNATION_POSSIBLE def_bool y depends on !SMP || !X86_VOYAGER @@ -903,6 +906,15 @@ config X86_64_ACPI_NUMA help Enable ACPI SRAT based node topology detection. +# Some NUMA nodes have memory ranges that span +# other nodes. Even though a pfn is valid and +# between a node's start and end pfns, it may not +# reside on that node. See memmap_init_zone() +# for details. +config NODES_SPAN_OTHER_NODES + def_bool y + depends on X86_64_ACPI_NUMA + config NUMA_EMU bool "NUMA emulation" depends on X86_64 && NUMA diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c index 31348d054fc..90943f83e84 100644 --- a/arch/x86/boot/a20.c +++ b/arch/x86/boot/a20.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/a20.c - * * Enable A20 gate (return -1 on failure) */ diff --git a/arch/x86/boot/apm.c b/arch/x86/boot/apm.c index c117c7fb859..7aa6033001f 100644 --- a/arch/x86/boot/apm.c +++ b/arch/x86/boot/apm.c @@ -12,8 +12,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/apm.c - * * Get APM BIOS information */ diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h index 8dcc8dc7db8..878e4b9940d 100644 --- a/arch/x86/boot/bitops.h +++ b/arch/x86/boot/bitops.h @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/bitops.h - * * Very simple bitops for the boot code. */ diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 09578070bfb..a34b9982c7c 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/boot.h - * * Header file for the real-mode kernel code */ diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c index 680408a0f46..a1d35634bce 100644 --- a/arch/x86/boot/cmdline.c +++ b/arch/x86/boot/cmdline.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/cmdline.c - * * Simple command-line parser for early boot. */ diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 036e635f18a..ba7736cf2ec 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -130,7 +130,7 @@ relocated: /* * Setup the stack for the decompressor */ - leal stack_end(%ebx), %esp + leal boot_stack_end(%ebx), %esp /* * Do the decompression, and jump to the new kernel.. @@ -142,8 +142,8 @@ relocated: pushl %eax # input_len leal input_data(%ebx), %eax pushl %eax # input_data - leal _end(%ebx), %eax - pushl %eax # end of the image as third argument + leal boot_heap(%ebx), %eax + pushl %eax # heap area as third argument pushl %esi # real mode pointer as second arg call decompress_kernel addl $20, %esp @@ -181,7 +181,10 @@ relocated: jmp *%ebp .bss +/* Stack and heap for uncompression */ .balign 4 -stack: - .fill 4096, 1, 0 -stack_end: +boot_heap: + .fill BOOT_HEAP_SIZE, 1, 0 +boot_stack: + .fill BOOT_STACK_SIZE, 1, 0 +boot_stack_end: diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index e8657b98c90..d8819efac81 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -28,6 +28,7 @@ #include <asm/segment.h> #include <asm/pgtable.h> #include <asm/page.h> +#include <asm/boot.h> #include <asm/msr.h> #include <asm/asm-offsets.h> @@ -62,7 +63,7 @@ startup_32: subl $1b, %ebp /* setup a stack and make sure cpu supports long mode. */ - movl $user_stack_end, %eax + movl $boot_stack_end, %eax addl %ebp, %eax movl %eax, %esp @@ -243,9 +244,9 @@ ENTRY(startup_64) /* Copy the compressed kernel to the end of our buffer * where decompression in place becomes safe. */ - leaq _end(%rip), %r8 - leaq _end(%rbx), %r9 - movq $_end /* - $startup_32 */, %rcx + leaq _end_before_pgt(%rip), %r8 + leaq _end_before_pgt(%rbx), %r9 + movq $_end_before_pgt /* - $startup_32 */, %rcx 1: subq $8, %r8 subq $8, %r9 movq 0(%r8), %rax @@ -267,14 +268,14 @@ relocated: */ xorq %rax, %rax leaq _edata(%rbx), %rdi - leaq _end(%rbx), %rcx + leaq _end_before_pgt(%rbx), %rcx subq %rdi, %rcx cld rep stosb /* Setup the stack */ - leaq user_stack_end(%rip), %rsp + leaq boot_stack_end(%rip), %rsp /* zero EFLAGS after setting rsp */ pushq $0 @@ -285,7 +286,7 @@ relocated: */ pushq %rsi # Save the real mode argument movq %rsi, %rdi # real mode address - leaq _heap(%rip), %rsi # _heap + leaq boot_heap(%rip), %rsi # malloc area for uncompression leaq input_data(%rip), %rdx # input_data movl input_len(%rip), %eax movq %rax, %rcx # input_len @@ -310,9 +311,12 @@ gdt: .quad 0x0080890000000000 /* TS descriptor */ .quad 0x0000000000000000 /* TS continued */ gdt_end: - .bss -/* Stack for uncompression */ - .balign 4 -user_stack: - .fill 4096,4,0 -user_stack_end: + +.bss +/* Stack and heap for uncompression */ +.balign 4 +boot_heap: + .fill BOOT_HEAP_SIZE, 1, 0 +boot_stack: + .fill BOOT_STACK_SIZE, 1, 0 +boot_stack_end: diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index dad4e699f5a..90456cee47c 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -217,12 +217,6 @@ static void putstr(const char *); static memptr free_mem_ptr; static memptr free_mem_end_ptr; -#ifdef CONFIG_X86_64 -#define HEAP_SIZE 0x7000 -#else -#define HEAP_SIZE 0x4000 -#endif - static char *vidmem; static int vidport; static int lines, cols; @@ -449,7 +443,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, window = output; /* Output buffer (Normally at 1M) */ free_mem_ptr = heap; /* Heap */ - free_mem_end_ptr = heap + HEAP_SIZE; + free_mem_end_ptr = heap + BOOT_HEAP_SIZE; inbuf = input_data; /* Input buffer */ insize = input_len; inptr = 0; diff --git a/arch/x86/boot/compressed/vmlinux_64.lds b/arch/x86/boot/compressed/vmlinux_64.lds index 7e5c7209f6c..bef1ac891bc 100644 --- a/arch/x86/boot/compressed/vmlinux_64.lds +++ b/arch/x86/boot/compressed/vmlinux_64.lds @@ -39,10 +39,10 @@ SECTIONS *(.bss.*) *(COMMON) . = ALIGN(8); - _end = . ; + _end_before_pgt = . ; . = ALIGN(4096); pgtable = . ; . = . + 4096 * 6; - _heap = .; + _ebss = .; } } diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S index ef127e56a3c..ef50c84e8b4 100644 --- a/arch/x86/boot/copy.S +++ b/arch/x86/boot/copy.S @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/copy.S - * * Memory copy routines */ diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index 2462c88689e..7804389ee00 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/cpucheck.c - * * Check for obligatory CPU features and abort if the features are not * present. This code should be compilable as 16-, 32- or 64-bit * code, so be very careful with types and inline assembly. diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c index 8721dc46a0b..d84a48ece78 100644 --- a/arch/x86/boot/edd.c +++ b/arch/x86/boot/edd.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/edd.c - * * Get EDD BIOS disk information */ diff --git a/arch/x86/boot/install.sh b/arch/x86/boot/install.sh index 88d77761d01..8d60ee15dfd 100644 --- a/arch/x86/boot/install.sh +++ b/arch/x86/boot/install.sh @@ -1,7 +1,5 @@ #!/bin/sh # -# arch/i386/boot/install.sh -# # This file is subject to the terms and conditions of the GNU General Public # License. See the file "COPYING" in the main directory of this archive # for more details. diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index 7828da5cfd0..77569a4a3be 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/main.c - * * Main module for the real-mode kernel code */ diff --git a/arch/x86/boot/mca.c b/arch/x86/boot/mca.c index 68222f2d4b6..911eaae5d69 100644 --- a/arch/x86/boot/mca.c +++ b/arch/x86/boot/mca.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/mca.c - * * Get the MCA system description table */ diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index e77d89f9e8a..acad32eb429 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/memory.c - * * Memory detection code */ diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c index a93cb8bded4..328956fdb59 100644 --- a/arch/x86/boot/pm.c +++ b/arch/x86/boot/pm.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/pm.c - * * Prepare the machine for transition to protected mode. */ diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S index f5402d51f7c..ab049d40a88 100644 --- a/arch/x86/boot/pmjump.S +++ b/arch/x86/boot/pmjump.S @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/pmjump.S - * * The actual transition into protected mode */ diff --git a/arch/x86/boot/printf.c b/arch/x86/boot/printf.c index 7e7e890699b..c1d00c0274c 100644 --- a/arch/x86/boot/printf.c +++ b/arch/x86/boot/printf.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/printf.c - * * Oh, it's a waste of space, but oh-so-yummy for debugging. This * version of printf() does not include 64-bit support. "Live with * it." diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 481a2209778..f94b7a0c2ab 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/string.c - * * Very basic string functions */ diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c index f3f14bd2637..0be77b39328 100644 --- a/arch/x86/boot/tty.c +++ b/arch/x86/boot/tty.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/tty.c - * * Very simple screen I/O * XXX: Probably should add very simple serial I/O? */ diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c index c61462f7d9a..2723d9b5ce4 100644 --- a/arch/x86/boot/version.c +++ b/arch/x86/boot/version.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/version.c - * * Kernel version string */ diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c index 39e247e9617..49f26aaaebc 100644 --- a/arch/x86/boot/video-bios.c +++ b/arch/x86/boot/video-bios.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/video-bios.c - * * Standard video BIOS modes * * We have two options for this; silent and scanned. diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c index 5d5a3f6e8b5..401ad998ad0 100644 --- a/arch/x86/boot/video-vesa.c +++ b/arch/x86/boot/video-vesa.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/video-vesa.c - * * VESA text modes */ diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c index 330d6589a2a..40ecb8d7688 100644 --- a/arch/x86/boot/video-vga.c +++ b/arch/x86/boot/video-vga.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/video-vga.c - * * Common all-VGA modes */ diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c index c1c47ba069e..83598b23093 100644 --- a/arch/x86/boot/video.c +++ b/arch/x86/boot/video.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/video.c - * * Select video mode */ diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h index d69347f79e8..ee63f5d1446 100644 --- a/arch/x86/boot/video.h +++ b/arch/x86/boot/video.h @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/video.h - * * Header file for the real-mode video probing code */ diff --git a/arch/x86/boot/voyager.c b/arch/x86/boot/voyager.c index 6499e3239b4..433909d61e5 100644 --- a/arch/x86/boot/voyager.c +++ b/arch/x86/boot/voyager.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/voyager.c - * * Get the Voyager config information */ diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 7cede7a9e0d..f00afdf61e6 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -43,7 +43,6 @@ #include <asm/mman.h> #include <asm/types.h> #include <asm/uaccess.h> -#include <asm/semaphore.h> #include <asm/atomic.h> #include <asm/ia32.h> #include <asm/vgtod.h> diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index c3920ea8ac5..90e092d0af0 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -22,13 +22,14 @@ obj-y += setup_$(BITS).o i8259_$(BITS).o setup.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o -obj-y += pci-dma_$(BITS).o bootflag.o e820_$(BITS).o -obj-y += quirks.o i8237.o topology.o kdebugfs.o -obj-y += alternative.o i8253.o -obj-$(CONFIG_X86_64) += pci-nommu_64.o bugs_64.o +obj-y += bootflag.o e820_$(BITS).o +obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o +obj-y += alternative.o i8253.o pci-nommu.o +obj-$(CONFIG_X86_64) += bugs_64.o obj-y += tsc_$(BITS).o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o +obj-y += process.o obj-y += i387.o obj-y += ptrace.o obj-y += ds.o diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 8ca3557a6d5..c2502eb9aa8 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -1,6 +1,4 @@ /* - * arch/i386/kernel/acpi/cstate.c - * * Copyright (C) 2005 Intel Corporation * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> * - Added _PDC for SMP C-states on Intel CPUs @@ -93,7 +91,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, /* Make sure we are running on right CPU */ saved_mask = current->cpus_allowed; - retval = set_cpus_allowed(current, cpumask_of_cpu(cpu)); + retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (retval) return -1; @@ -130,7 +128,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, cx->address); out: - set_cpus_allowed(current, saved_mask); + set_cpus_allowed_ptr(current, &saved_mask); return retval; } EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index 324eb0cab19..de2d2e4ebad 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c @@ -1,6 +1,4 @@ /* - * arch/i386/kernel/acpi/processor.c - * * Copyright (C) 2005 Intel Corporation * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> * - Added _PDC for platforms with Intel CPUs diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d999d7833bc..35b4f6a9c8e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -5,7 +5,6 @@ #include <linux/module.h> #include <linux/percpu.h> #include <linux/bootmem.h> -#include <asm/semaphore.h> #include <asm/processor.h> #include <asm/i387.h> #include <asm/msr.h> diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index a962dcb9c40..e2d870de837 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -192,9 +192,9 @@ static void drv_read(struct drv_cmd *cmd) cpumask_t saved_mask = current->cpus_allowed; cmd->val = 0; - set_cpus_allowed(current, cmd->mask); + set_cpus_allowed_ptr(current, &cmd->mask); do_drv_read(cmd); - set_cpus_allowed(current, saved_mask); + set_cpus_allowed_ptr(current, &saved_mask); } static void drv_write(struct drv_cmd *cmd) @@ -203,30 +203,30 @@ static void drv_write(struct drv_cmd *cmd) unsigned int i; for_each_cpu_mask(i, cmd->mask) { - set_cpus_allowed(current, cpumask_of_cpu(i)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); do_drv_write(cmd); } - set_cpus_allowed(current, saved_mask); + set_cpus_allowed_ptr(current, &saved_mask); return; } -static u32 get_cur_val(cpumask_t mask) +static u32 get_cur_val(const cpumask_t *mask) { struct acpi_processor_performance *perf; struct drv_cmd cmd; - if (unlikely(cpus_empty(mask))) + if (unlikely(cpus_empty(*mask))) return 0; - switch (per_cpu(drv_data, first_cpu(mask))->cpu_feature) { + switch (per_cpu(drv_data, first_cpu(*mask))->cpu_feature) { case SYSTEM_INTEL_MSR_CAPABLE: cmd.type = SYSTEM_INTEL_MSR_CAPABLE; cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; break; case SYSTEM_IO_CAPABLE: cmd.type = SYSTEM_IO_CAPABLE; - perf = per_cpu(drv_data, first_cpu(mask))->acpi_data; + perf = per_cpu(drv_data, first_cpu(*mask))->acpi_data; cmd.addr.io.port = perf->control_register.address; cmd.addr.io.bit_width = perf->control_register.bit_width; break; @@ -234,7 +234,7 @@ static u32 get_cur_val(cpumask_t mask) return 0; } - cmd.mask = mask; + cmd.mask = *mask; drv_read(&cmd); @@ -271,7 +271,7 @@ static unsigned int get_measured_perf(unsigned int cpu) unsigned int retval; saved_mask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (get_cpu() != cpu) { /* We were not able to run on requested processor */ put_cpu(); @@ -329,7 +329,7 @@ static unsigned int get_measured_perf(unsigned int cpu) retval = per_cpu(drv_data, cpu)->max_freq * perf_percent / 100; put_cpu(); - set_cpus_allowed(current, saved_mask); + set_cpus_allowed_ptr(current, &saved_mask); dprintk("cpu %d: performance percent %d\n", cpu, perf_percent); return retval; @@ -347,13 +347,13 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) return 0; } - freq = extract_freq(get_cur_val(cpumask_of_cpu(cpu)), data); + freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data); dprintk("cur freq = %u\n", freq); return freq; } -static unsigned int check_freqs(cpumask_t mask, unsigned int freq, +static unsigned int check_freqs(const cpumask_t *mask, unsigned int freq, struct acpi_cpufreq_data *data) { unsigned int cur_freq; @@ -449,7 +449,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, drv_write(&cmd); if (acpi_pstate_strict) { - if (!check_freqs(cmd.mask, freqs.new, data)) { + if (!check_freqs(&cmd.mask, freqs.new, data)) { dprintk("acpi_cpufreq_target failed (%d)\n", policy->cpu); return -EAGAIN; diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 14791ec55cf..199e4e05e5d 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -289,8 +289,8 @@ static int __init cpufreq_p4_init(void) if (c->x86_vendor != X86_VENDOR_INTEL) return -ENODEV; - if (!test_bit(X86_FEATURE_ACPI, c->x86_capability) || - !test_bit(X86_FEATURE_ACC, c->x86_capability)) + if (!test_cpu_cap(c, X86_FEATURE_ACPI) || + !test_cpu_cap(c, X86_FEATURE_ACC)) return -ENODEV; ret = cpufreq_register_driver(&p4clockmod_driver); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index c99d59d8ef2..46d4034d9f3 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -478,12 +478,12 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi static int check_supported_cpu(unsigned int cpu) { - cpumask_t oldmask = CPU_MASK_ALL; + cpumask_t oldmask; u32 eax, ebx, ecx, edx; unsigned int rc = 0; oldmask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (smp_processor_id() != cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); @@ -528,7 +528,7 @@ static int check_supported_cpu(unsigned int cpu) rc = 1; out: - set_cpus_allowed(current, oldmask); + set_cpus_allowed_ptr(current, &oldmask); return rc; } @@ -1015,7 +1015,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i /* Driver entry point to switch to the target frequency */ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) { - cpumask_t oldmask = CPU_MASK_ALL; + cpumask_t oldmask; struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); u32 checkfid; u32 checkvid; @@ -1030,7 +1030,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); @@ -1085,7 +1085,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi ret = 0; err_out: - set_cpus_allowed(current, oldmask); + set_cpus_allowed_ptr(current, &oldmask); return ret; } @@ -1104,7 +1104,7 @@ static int powernowk8_verify(struct cpufreq_policy *pol) static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { struct powernow_k8_data *data; - cpumask_t oldmask = CPU_MASK_ALL; + cpumask_t oldmask; int rc; if (!cpu_online(pol->cpu)) @@ -1145,7 +1145,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); @@ -1164,7 +1164,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) fidvid_msr_init(); /* run on any CPU again */ - set_cpus_allowed(current, oldmask); + set_cpus_allowed_ptr(current, &oldmask); if (cpu_family == CPU_HW_PSTATE) pol->cpus = cpumask_of_cpu(pol->cpu); @@ -1205,7 +1205,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) return 0; err_out: - set_cpus_allowed(current, oldmask); + set_cpus_allowed_ptr(current, &oldmask); powernow_k8_cpu_exit_acpi(data); kfree(data); @@ -1242,10 +1242,11 @@ static unsigned int powernowk8_get (unsigned int cpu) if (!data) return -EINVAL; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (smp_processor_id() != cpu) { - printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu); - set_cpus_allowed(current, oldmask); + printk(KERN_ERR PFX + "limiting to CPU %d failed in powernowk8_get\n", cpu); + set_cpus_allowed_ptr(current, &oldmask); return 0; } @@ -1253,13 +1254,14 @@ static unsigned int powernowk8_get (unsigned int cpu) goto out; if (cpu_family == CPU_HW_PSTATE) - khz = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); + khz = find_khz_freq_from_pstate(data->powernow_table, + data->currpstate); else khz = find_khz_freq_from_fid(data->currfid); out: - set_cpus_allowed(current, oldmask); + set_cpus_allowed_ptr(current, &oldmask); return khz; } diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 3031f119619..908dd347c67 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -315,7 +315,7 @@ static unsigned int get_cur_freq(unsigned int cpu) cpumask_t saved_mask; saved_mask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (smp_processor_id() != cpu) return 0; @@ -333,7 +333,7 @@ static unsigned int get_cur_freq(unsigned int cpu) clock_freq = extract_clock(l, cpu, 1); } - set_cpus_allowed(current, saved_mask); + set_cpus_allowed_ptr(current, &saved_mask); return clock_freq; } @@ -487,7 +487,7 @@ static int centrino_target (struct cpufreq_policy *policy, else cpu_set(j, set_mask); - set_cpus_allowed(current, set_mask); + set_cpus_allowed_ptr(current, &set_mask); preempt_disable(); if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) { dprintk("couldn't limit to CPUs in this domain\n"); @@ -555,7 +555,8 @@ static int centrino_target (struct cpufreq_policy *policy, if (!cpus_empty(covered_cpus)) { for_each_cpu_mask(j, covered_cpus) { - set_cpus_allowed(current, cpumask_of_cpu(j)); + set_cpus_allowed_ptr(current, + &cpumask_of_cpu(j)); wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); } } @@ -569,12 +570,12 @@ static int centrino_target (struct cpufreq_policy *policy, cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } } - set_cpus_allowed(current, saved_mask); + set_cpus_allowed_ptr(current, &saved_mask); return 0; migrate_end: preempt_enable(); - set_cpus_allowed(current, saved_mask); + set_cpus_allowed_ptr(current, &saved_mask); return 0; } diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 14d68aa301e..1b50244b1fd 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -229,22 +229,22 @@ static unsigned int speedstep_detect_chipset (void) return 0; } -static unsigned int _speedstep_get(cpumask_t cpus) +static unsigned int _speedstep_get(const cpumask_t *cpus) { unsigned int speed; cpumask_t cpus_allowed; cpus_allowed = current->cpus_allowed; - set_cpus_allowed(current, cpus); + set_cpus_allowed_ptr(current, cpus); speed = speedstep_get_processor_frequency(speedstep_processor); - set_cpus_allowed(current, cpus_allowed); + set_cpus_allowed_ptr(current, &cpus_allowed); dprintk("detected %u kHz as current frequency\n", speed); return speed; } static unsigned int speedstep_get(unsigned int cpu) { - return _speedstep_get(cpumask_of_cpu(cpu)); + return _speedstep_get(&cpumask_of_cpu(cpu)); } /** @@ -267,7 +267,7 @@ static int speedstep_target (struct cpufreq_policy *policy, if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) return -EINVAL; - freqs.old = _speedstep_get(policy->cpus); + freqs.old = _speedstep_get(&policy->cpus); freqs.new = speedstep_freqs[newstate].frequency; freqs.cpu = policy->cpu; @@ -285,12 +285,12 @@ static int speedstep_target (struct cpufreq_policy *policy, } /* switch to physical CPU where state is to be changed */ - set_cpus_allowed(current, policy->cpus); + set_cpus_allowed_ptr(current, &policy->cpus); speedstep_set_state(newstate); /* allow to be run on all CPUs */ - set_cpus_allowed(current, cpus_allowed); + set_cpus_allowed_ptr(current, &cpus_allowed); for_each_cpu_mask(i, policy->cpus) { freqs.cpu = i; @@ -326,7 +326,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) #endif cpus_allowed = current->cpus_allowed; - set_cpus_allowed(current, policy->cpus); + set_cpus_allowed_ptr(current, &policy->cpus); /* detect low and high frequency and transition latency */ result = speedstep_get_freqs(speedstep_processor, @@ -334,12 +334,12 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) &speedstep_freqs[SPEEDSTEP_HIGH].frequency, &policy->cpuinfo.transition_latency, &speedstep_set_state); - set_cpus_allowed(current, cpus_allowed); + set_cpus_allowed_ptr(current, &cpus_allowed); if (result) return result; /* get current speed setting */ - speed = _speedstep_get(policy->cpus); + speed = _speedstep_get(&policy->cpus); if (!speed) return -EIO; diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 1b889860eb7..26d615dcb14 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -129,7 +129,7 @@ struct _cpuid4_info { union _cpuid4_leaf_ebx ebx; union _cpuid4_leaf_ecx ecx; unsigned long size; - cpumask_t shared_cpu_map; + cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ }; unsigned short num_cache_leaves; @@ -451,8 +451,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) } /* pointer to _cpuid4_info array (for each cache leaf) */ -static struct _cpuid4_info *cpuid4_info[NR_CPUS]; -#define CPUID4_INFO_IDX(x,y) (&((cpuid4_info[x])[y])) +static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); +#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) #ifdef CONFIG_SMP static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) @@ -474,7 +474,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) { cpu_set(i, this_leaf->shared_cpu_map); - if (i != cpu && cpuid4_info[i]) { + if (i != cpu && per_cpu(cpuid4_info, i)) { sibling_leaf = CPUID4_INFO_IDX(i, index); cpu_set(cpu, sibling_leaf->shared_cpu_map); } @@ -505,8 +505,8 @@ static void __cpuinit free_cache_attributes(unsigned int cpu) for (i = 0; i < num_cache_leaves; i++) cache_remove_shared_cpu_map(cpu, i); - kfree(cpuid4_info[cpu]); - cpuid4_info[cpu] = NULL; + kfree(per_cpu(cpuid4_info, cpu)); + per_cpu(cpuid4_info, cpu) = NULL; } static int __cpuinit detect_cache_attributes(unsigned int cpu) @@ -519,13 +519,13 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) if (num_cache_leaves == 0) return -ENOENT; - cpuid4_info[cpu] = kzalloc( + per_cpu(cpuid4_info, cpu) = kzalloc( sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); - if (cpuid4_info[cpu] == NULL) + if (per_cpu(cpuid4_info, cpu) == NULL) return -ENOMEM; oldmask = current->cpus_allowed; - retval = set_cpus_allowed(current, cpumask_of_cpu(cpu)); + retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (retval) goto out; @@ -542,12 +542,12 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) } cache_shared_cpu_map_setup(cpu, j); } - set_cpus_allowed(current, oldmask); + set_cpus_allowed_ptr(current, &oldmask); out: if (retval) { - kfree(cpuid4_info[cpu]); - cpuid4_info[cpu] = NULL; + kfree(per_cpu(cpuid4_info, cpu)); + per_cpu(cpuid4_info, cpu) = NULL; } return retval; @@ -561,7 +561,7 @@ out: extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */ /* pointer to kobject for cpuX/cache */ -static struct kobject * cache_kobject[NR_CPUS]; +static DEFINE_PER_CPU(struct kobject *, cache_kobject); struct _index_kobject { struct kobject kobj; @@ -570,8 +570,8 @@ struct _index_kobject { }; /* pointer to array of kobjects for cpuX/cache/indexY */ -static struct _index_kobject *index_kobject[NR_CPUS]; -#define INDEX_KOBJECT_PTR(x,y) (&((index_kobject[x])[y])) +static DEFINE_PER_CPU(struct _index_kobject *, index_kobject); +#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y])) #define show_one_plus(file_name, object, val) \ static ssize_t show_##file_name \ @@ -591,11 +591,32 @@ static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf) return sprintf (buf, "%luK\n", this_leaf->size / 1024); } -static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf) +static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, + int type, char *buf) { - char mask_str[NR_CPUS]; - cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map); - return sprintf(buf, "%s\n", mask_str); + ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf; + int n = 0; + + if (len > 1) { + cpumask_t *mask = &this_leaf->shared_cpu_map; + + n = type? + cpulist_scnprintf(buf, len-2, *mask): + cpumask_scnprintf(buf, len-2, *mask); + buf[n++] = '\n'; + buf[n] = '\0'; + } + return n; +} + +static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf) +{ + return show_shared_cpu_map_func(leaf, 0, buf); +} + +static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf) +{ + return show_shared_cpu_map_func(leaf, 1, buf); } static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { @@ -633,6 +654,7 @@ define_one_ro(ways_of_associativity); define_one_ro(number_of_sets); define_one_ro(size); define_one_ro(shared_cpu_map); +define_one_ro(shared_cpu_list); static struct attribute * default_attrs[] = { &type.attr, @@ -643,6 +665,7 @@ static struct attribute * default_attrs[] = { &number_of_sets.attr, &size.attr, &shared_cpu_map.attr, + &shared_cpu_list.attr, NULL }; @@ -684,10 +707,10 @@ static struct kobj_type ktype_percpu_entry = { static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu) { - kfree(cache_kobject[cpu]); - kfree(index_kobject[cpu]); - cache_kobject[cpu] = NULL; - index_kobject[cpu] = NULL; + kfree(per_cpu(cache_kobject, cpu)); + kfree(per_cpu(index_kobject, cpu)); + per_cpu(cache_kobject, cpu) = NULL; + per_cpu(index_kobject, cpu) = NULL; free_cache_attributes(cpu); } @@ -703,13 +726,14 @@ static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) return err; /* Allocate all required memory */ - cache_kobject[cpu] = kzalloc(sizeof(struct kobject), GFP_KERNEL); - if (unlikely(cache_kobject[cpu] == NULL)) + per_cpu(cache_kobject, cpu) = + kzalloc(sizeof(struct kobject), GFP_KERNEL); + if (unlikely(per_cpu(cache_kobject, cpu) == NULL)) goto err_out; - index_kobject[cpu] = kzalloc( + per_cpu(index_kobject, cpu) = kzalloc( sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL); - if (unlikely(index_kobject[cpu] == NULL)) + if (unlikely(per_cpu(index_kobject, cpu) == NULL)) goto err_out; return 0; @@ -733,7 +757,8 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) if (unlikely(retval < 0)) return retval; - retval = kobject_init_and_add(cache_kobject[cpu], &ktype_percpu_entry, + retval = kobject_init_and_add(per_cpu(cache_kobject, cpu), + &ktype_percpu_entry, &sys_dev->kobj, "%s", "cache"); if (retval < 0) { cpuid4_cache_sysfs_exit(cpu); @@ -745,13 +770,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) this_object->cpu = cpu; this_object->index = i; retval = kobject_init_and_add(&(this_object->kobj), - &ktype_cache, cache_kobject[cpu], + &ktype_cache, + per_cpu(cache_kobject, cpu), "index%1lu", i); if (unlikely(retval)) { for (j = 0; j < i; j++) { kobject_put(&(INDEX_KOBJECT_PTR(cpu,j)->kobj)); } - kobject_put(cache_kobject[cpu]); + kobject_put(per_cpu(cache_kobject, cpu)); cpuid4_cache_sysfs_exit(cpu); break; } @@ -760,7 +786,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) if (!retval) cpu_set(cpu, cache_dev_map); - kobject_uevent(cache_kobject[cpu], KOBJ_ADD); + kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); return retval; } @@ -769,7 +795,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) unsigned int cpu = sys_dev->id; unsigned long i; - if (cpuid4_info[cpu] == NULL) + if (per_cpu(cpuid4_info, cpu) == NULL) return; if (!cpu_isset(cpu, cache_dev_map)) return; @@ -777,7 +803,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) for (i = 0; i < num_cache_leaves; i++) kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); - kobject_put(cache_kobject[cpu]); + kobject_put(per_cpu(cache_kobject, cpu)); cpuid4_cache_sysfs_exit(cpu); } diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 32671da8184..7c9a813e119 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -251,18 +251,18 @@ struct threshold_attr { ssize_t(*store) (struct threshold_block *, const char *, size_t count); }; -static cpumask_t affinity_set(unsigned int cpu) +static void affinity_set(unsigned int cpu, cpumask_t *oldmask, + cpumask_t *newmask) { - cpumask_t oldmask = current->cpus_allowed; - cpumask_t newmask = CPU_MASK_NONE; - cpu_set(cpu, newmask); - set_cpus_allowed(current, newmask); - return oldmask; + *oldmask = current->cpus_allowed; + cpus_clear(*newmask); + cpu_set(cpu, *newmask); + set_cpus_allowed_ptr(current, newmask); } -static void affinity_restore(cpumask_t oldmask) +static void affinity_restore(const cpumask_t *oldmask) { - set_cpus_allowed(current, oldmask); + set_cpus_allowed_ptr(current, oldmask); } #define SHOW_FIELDS(name) \ @@ -277,15 +277,15 @@ static ssize_t store_interrupt_enable(struct threshold_block *b, const char *buf, size_t count) { char *end; - cpumask_t oldmask; + cpumask_t oldmask, newmask; unsigned long new = simple_strtoul(buf, &end, 0); if (end == buf) return -EINVAL; b->interrupt_enable = !!new; - oldmask = affinity_set(b->cpu); + affinity_set(b->cpu, &oldmask, &newmask); threshold_restart_bank(b, 0, 0); - affinity_restore(oldmask); + affinity_restore(&oldmask); return end - buf; } @@ -294,7 +294,7 @@ static ssize_t store_threshold_limit(struct threshold_block *b, const char *buf, size_t count) { char *end; - cpumask_t oldmask; + cpumask_t oldmask, newmask; u16 old; unsigned long new = simple_strtoul(buf, &end, 0); if (end == buf) @@ -306,9 +306,9 @@ static ssize_t store_threshold_limit(struct threshold_block *b, old = b->threshold_limit; b->threshold_limit = new; - oldmask = affinity_set(b->cpu); + affinity_set(b->cpu, &oldmask, &newmask); threshold_restart_bank(b, 0, old); - affinity_restore(oldmask); + affinity_restore(&oldmask); return end - buf; } @@ -316,10 +316,10 @@ static ssize_t store_threshold_limit(struct threshold_block *b, static ssize_t show_error_count(struct threshold_block *b, char *buf) { u32 high, low; - cpumask_t oldmask; - oldmask = affinity_set(b->cpu); + cpumask_t oldmask, newmask; + affinity_set(b->cpu, &oldmask, &newmask); rdmsr(b->address, low, high); - affinity_restore(oldmask); + affinity_restore(&oldmask); return sprintf(buf, "%x\n", (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); } @@ -327,10 +327,10 @@ static ssize_t show_error_count(struct threshold_block *b, char *buf) static ssize_t store_error_count(struct threshold_block *b, const char *buf, size_t count) { - cpumask_t oldmask; - oldmask = affinity_set(b->cpu); + cpumask_t oldmask, newmask; + affinity_set(b->cpu, &oldmask, &newmask); threshold_restart_bank(b, 1, 0); - affinity_restore(oldmask); + affinity_restore(&oldmask); return 1; } @@ -468,7 +468,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) { int i, err = 0; struct threshold_bank *b = NULL; - cpumask_t oldmask = CPU_MASK_NONE; + cpumask_t oldmask, newmask; char name[32]; sprintf(name, "threshold_bank%i", bank); @@ -519,10 +519,10 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) per_cpu(threshold_banks, cpu)[bank] = b; - oldmask = affinity_set(cpu); + affinity_set(cpu, &oldmask, &newmask); err = allocate_threshold_blocks(cpu, bank, 0, MSR_IA32_MC0_MISC + bank * 4); - affinity_restore(oldmask); + affinity_restore(&oldmask); if (err) goto out_free; diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 9b7e01daa1c..1f4cc48c14c 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -1,5 +1,4 @@ /* - * linux/arch/i386/kernel/cpu/mcheck/therm_throt.c * * Thermal throttle event support code (such as syslog messaging and rate * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 0978a4a3941..0d0d9057e7c 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -1,7 +1,6 @@ #include <linux/smp.h> #include <linux/timex.h> #include <linux/string.h> -#include <asm/semaphore.h> #include <linux/seq_file.h> #include <linux/cpufreq.h> diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c index 0240cd77836..ed733e7cf4e 100644 --- a/arch/x86/kernel/e820_32.c +++ b/arch/x86/kernel/e820_32.c @@ -475,7 +475,7 @@ int __init copy_e820_map(struct e820entry *biosmap, int nr_map) /* * Find the highest page frame number we have available */ -void __init find_max_pfn(void) +void __init propagate_e820_map(void) { int i; @@ -704,7 +704,7 @@ static int __init parse_memmap(char *arg) * size before original memory map is * reset. */ - find_max_pfn(); + propagate_e820_map(); saved_max_pfn = max_pfn; #endif e820.nr_map = 0; diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 7f6c0c85c8f..cbd42e51cb0 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -96,7 +96,7 @@ void __init early_res_to_bootmem(void) } /* Check for already reserved areas */ -static inline int +static inline int __init bad_addr(unsigned long *addrp, unsigned long size, unsigned long align) { int i; @@ -116,7 +116,7 @@ again: } /* Check for already reserved areas */ -static inline int +static inline int __init bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align) { int i; diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 759e02bec07..77d424cf68b 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -383,6 +383,7 @@ static void __init runtime_code_page_mkexec(void) { efi_memory_desc_t *md; void *p; + u64 addr, npages; /* Make EFI runtime service code area executable */ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { @@ -391,7 +392,10 @@ static void __init runtime_code_page_mkexec(void) if (md->type != EFI_RUNTIME_SERVICES_CODE) continue; - set_memory_x(md->virt_addr, md->num_pages); + addr = md->virt_addr; + npages = md->num_pages; + memrange_efi_to_native(&addr, &npages); + set_memory_x(addr, npages); } } @@ -408,7 +412,7 @@ void __init efi_enter_virtual_mode(void) efi_memory_desc_t *md; efi_status_t status; unsigned long size; - u64 end, systab; + u64 end, systab, addr, npages; void *p, *va; efi.systab = NULL; @@ -420,7 +424,7 @@ void __init efi_enter_virtual_mode(void) size = md->num_pages << EFI_PAGE_SHIFT; end = md->phys_addr + size; - if ((end >> PAGE_SHIFT) <= max_pfn_mapped) + if (PFN_UP(end) <= max_pfn_mapped) va = __va(md->phys_addr); else va = efi_ioremap(md->phys_addr, size); @@ -433,8 +437,12 @@ void __init efi_enter_virtual_mode(void) continue; } - if (!(md->attribute & EFI_MEMORY_WB)) - set_memory_uc(md->virt_addr, md->num_pages); + if (!(md->attribute & EFI_MEMORY_WB)) { + addr = md->virt_addr; + npages = md->num_pages; + memrange_efi_to_native(&addr, &npages); + set_memory_uc(addr, npages); + } systab = (u64) (unsigned long) efi_phys.systab; if (md->phys_addr <= systab && systab < end) { diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index d143a1e76b3..d0060fdccca 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c @@ -105,14 +105,14 @@ void __init efi_reserve_bootmem(void) void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size) { - static unsigned pages_mapped; + static unsigned pages_mapped __initdata; unsigned i, pages; + unsigned long offset; - /* phys_addr and size must be page aligned */ - if ((phys_addr & ~PAGE_MASK) || (size & ~PAGE_MASK)) - return NULL; + pages = PFN_UP(phys_addr + size) - PFN_DOWN(phys_addr); + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; - pages = size >> PAGE_SHIFT; if (pages_mapped + pages > MAX_EFI_IO_PAGES) return NULL; @@ -124,5 +124,5 @@ void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size) } return (void __iomem *)__fix_to_virt(FIX_EFI_IO_MAP_FIRST_PAGE - \ - (pages_mapped - pages)); + (pages_mapped - pages)) + offset; } diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 9ba49a26dff..f0f8934fc30 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1,5 +1,4 @@ /* - * linux/arch/i386/entry.S * * Copyright (C) 1991, 1992 Linus Torvalds */ diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 5d77c9cd8e1..ebf13908a74 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -61,26 +61,31 @@ int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) val = (1UL << UVH_IPI_INT_SEND_SHFT) | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | - (6 << UVH_IPI_INT_DELIVERY_MODE_SHFT); + APIC_DM_INIT; + uv_write_global_mmr64(nasid, UVH_IPI_INT, val); + mdelay(10); + + val = (1UL << UVH_IPI_INT_SEND_SHFT) | + (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | + (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | + APIC_DM_STARTUP; uv_write_global_mmr64(nasid, UVH_IPI_INT, val); return 0; } static void uv_send_IPI_one(int cpu, int vector) { - unsigned long val, apicid; + unsigned long val, apicid, lapicid; int nasid; apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */ + lapicid = apicid & 0x3f; /* ZZZ macro needed */ nasid = uv_apicid_to_nasid(apicid); val = - (1UL << UVH_IPI_INT_SEND_SHFT) | (apicid << + (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid << UVH_IPI_INT_APIC_ID_SHFT) | (vector << UVH_IPI_INT_VECTOR_SHFT); uv_write_global_mmr64(nasid, UVH_IPI_INT, val); - printk(KERN_DEBUG - "UV: IPI to cpu %d, apicid 0x%lx, vec %d, nasid%d, val 0x%lx\n", - cpu, apicid, vector, nasid, val); } static void uv_send_IPI_mask(cpumask_t mask, int vector) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index d6d54faa84d..993c7677325 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -146,6 +146,7 @@ void __init x86_64_start_kernel(char * real_mode_data) reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); +#ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; @@ -153,6 +154,7 @@ void __init x86_64_start_kernel(char * real_mode_data) unsigned long ramdisk_end = ramdisk_image + ramdisk_size; reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); } +#endif reserve_ebda_region(); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 826988a6e96..90f038af3ad 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -1,5 +1,4 @@ /* - * linux/arch/i386/kernel/head.S -- the 32-bit startup code. * * Copyright (C) 1991, 1992 Linus Torvalds * diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 8f8102d967b..db6839b5319 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -35,17 +35,18 @@ #endif static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; +unsigned int xstate_size; +static struct i387_fxsave_struct fx_scratch __cpuinitdata; -void mxcsr_feature_mask_init(void) +void __cpuinit mxcsr_feature_mask_init(void) { unsigned long mask = 0; clts(); if (cpu_has_fxsr) { - memset(¤t->thread.i387.fxsave, 0, - sizeof(struct i387_fxsave_struct)); - asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave)); - mask = current->thread.i387.fxsave.mxcsr_mask; + memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); + asm volatile("fxsave %0" : : "m" (fx_scratch)); + mask = fx_scratch.mxcsr_mask; if (mask == 0) mask = 0x0000ffbf; } @@ -53,6 +54,16 @@ void mxcsr_feature_mask_init(void) stts(); } +void __init init_thread_xstate(void) +{ + if (cpu_has_fxsr) + xstate_size = sizeof(struct i387_fxsave_struct); +#ifdef CONFIG_X86_32 + else + xstate_size = sizeof(struct i387_fsave_struct); +#endif +} + #ifdef CONFIG_X86_64 /* * Called at bootup to set up the initial FPU state that is later cloned @@ -61,10 +72,6 @@ void mxcsr_feature_mask_init(void) void __cpuinit fpu_init(void) { unsigned long oldcr0 = read_cr0(); - extern void __bad_fxsave_alignment(void); - - if (offsetof(struct task_struct, thread.i387.fxsave) & 15) - __bad_fxsave_alignment(); set_in_cr4(X86_CR4_OSFXSR); set_in_cr4(X86_CR4_OSXMMEXCPT); @@ -84,32 +91,44 @@ void __cpuinit fpu_init(void) * value at reset if we support XMM instructions and then * remeber the current task has used the FPU. */ -void init_fpu(struct task_struct *tsk) +int init_fpu(struct task_struct *tsk) { if (tsk_used_math(tsk)) { if (tsk == current) unlazy_fpu(tsk); - return; + return 0; + } + + /* + * Memory allocation at the first usage of the FPU and other state. + */ + if (!tsk->thread.xstate) { + tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep, + GFP_KERNEL); + if (!tsk->thread.xstate) + return -ENOMEM; } if (cpu_has_fxsr) { - memset(&tsk->thread.i387.fxsave, 0, - sizeof(struct i387_fxsave_struct)); - tsk->thread.i387.fxsave.cwd = 0x37f; + struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; + + memset(fx, 0, xstate_size); + fx->cwd = 0x37f; if (cpu_has_xmm) - tsk->thread.i387.fxsave.mxcsr = MXCSR_DEFAULT; + fx->mxcsr = MXCSR_DEFAULT; } else { - memset(&tsk->thread.i387.fsave, 0, - sizeof(struct i387_fsave_struct)); - tsk->thread.i387.fsave.cwd = 0xffff037fu; - tsk->thread.i387.fsave.swd = 0xffff0000u; - tsk->thread.i387.fsave.twd = 0xffffffffu; - tsk->thread.i387.fsave.fos = 0xffff0000u; + struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; + memset(fp, 0, xstate_size); + fp->cwd = 0xffff037fu; + fp->swd = 0xffff0000u; + fp->twd = 0xffffffffu; + fp->fos = 0xffff0000u; } /* * Only the device not available exception or ptrace can call init_fpu. */ set_stopped_child_used_math(tsk); + return 0; } int fpregs_active(struct task_struct *target, const struct user_regset *regset) @@ -126,13 +145,17 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { + int ret; + if (!cpu_has_fxsr) return -ENODEV; - init_fpu(target); + ret = init_fpu(target); + if (ret) + return ret; return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.i387.fxsave, 0, -1); + &target->thread.xstate->fxsave, 0, -1); } int xfpregs_set(struct task_struct *target, const struct user_regset *regset, @@ -144,16 +167,19 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_fxsr) return -ENODEV; - init_fpu(target); + ret = init_fpu(target); + if (ret) + return ret; + set_stopped_child_used_math(target); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.i387.fxsave, 0, -1); + &target->thread.xstate->fxsave, 0, -1); /* * mxcsr reserved bits must be masked to zero for security reasons. */ - target->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; + target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; return ret; } @@ -233,7 +259,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) static void convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) { - struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave; + struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave; struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; int i; @@ -273,7 +299,7 @@ static void convert_to_fxsr(struct task_struct *tsk, const struct user_i387_ia32_struct *env) { - struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave; + struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave; struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; int i; @@ -302,15 +328,19 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, void *kbuf, void __user *ubuf) { struct user_i387_ia32_struct env; + int ret; if (!HAVE_HWFP) return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); - init_fpu(target); + ret = init_fpu(target); + if (ret) + return ret; if (!cpu_has_fxsr) { return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.i387.fsave, 0, -1); + &target->thread.xstate->fsave, 0, + -1); } if (kbuf && pos == 0 && count == sizeof(env)) { @@ -333,12 +363,15 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, if (!HAVE_HWFP) return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); - init_fpu(target); + ret = init_fpu(target); + if (ret) + return ret; + set_stopped_child_used_math(target); if (!cpu_has_fxsr) { return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.i387.fsave, 0, -1); + &target->thread.xstate->fsave, 0, -1); } if (pos > 0 || count < sizeof(env)) @@ -358,11 +391,11 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) { struct task_struct *tsk = current; + struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; unlazy_fpu(tsk); - tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd; - if (__copy_to_user(buf, &tsk->thread.i387.fsave, - sizeof(struct i387_fsave_struct))) + fp->status = fp->swd; + if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) return -1; return 1; } @@ -370,6 +403,7 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) { struct task_struct *tsk = current; + struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; struct user_i387_ia32_struct env; int err = 0; @@ -379,12 +413,12 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) if (__copy_to_user(buf, &env, sizeof(env))) return -1; - err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status); + err |= __put_user(fx->swd, &buf->status); err |= __put_user(X86_FXSR_MAGIC, &buf->magic); if (err) return -1; - if (__copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave, + if (__copy_to_user(&buf->_fxsr_env[0], fx, sizeof(struct i387_fxsave_struct))) return -1; return 1; @@ -417,7 +451,7 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) struct task_struct *tsk = current; clear_fpu(tsk); - return __copy_from_user(&tsk->thread.i387.fsave, buf, + return __copy_from_user(&tsk->thread.xstate->fsave, buf, sizeof(struct i387_fsave_struct)); } @@ -428,10 +462,10 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) int err; clear_fpu(tsk); - err = __copy_from_user(&tsk->thread.i387.fxsave, &buf->_fxsr_env[0], + err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0], sizeof(struct i387_fxsave_struct)); /* mxcsr reserved bits must be masked to zero for security reasons */ - tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; + tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; if (err || __copy_from_user(&env, buf, sizeof(env))) return 1; convert_to_fxsr(tsk, &env); diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index b54464b2665..9ba11d07920 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -785,7 +785,7 @@ static void __clear_irq_vector(int irq) per_cpu(vector_irq, cpu)[vector] = -1; cfg->vector = 0; - cfg->domain = CPU_MASK_NONE; + cpus_clear(cfg->domain); } void __setup_vector_irq(int cpu) diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 24362ecf5f9..f47f0eb886b 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -46,11 +46,7 @@ #include <asm/apicdef.h> #include <asm/system.h> -#ifdef CONFIG_X86_32 -# include <mach_ipi.h> -#else -# include <asm/mach_apic.h> -#endif +#include <mach_ipi.h> /* * Put the error code here just in case the user cares: diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 25cf6dee4e5..69729e38b78 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -402,7 +402,7 @@ static int do_microcode_update (void) if (!uci->valid) continue; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); error = get_maching_microcode(new_mc, cpu); if (error < 0) goto out; @@ -416,7 +416,7 @@ out: vfree(new_mc); if (cursor < 0) error = cursor; - set_cpus_allowed(current, old); + set_cpus_allowed_ptr(current, &old); return error; } @@ -579,7 +579,7 @@ static int apply_microcode_check_cpu(int cpu) return 0; old = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); /* Check if the microcode we have in memory matches the CPU */ if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || @@ -610,7 +610,7 @@ static int apply_microcode_check_cpu(int cpu) " sig=0x%x, pf=0x%x, rev=0x%x\n", cpu, uci->sig, uci->pf, uci->rev); - set_cpus_allowed(current, old); + set_cpus_allowed_ptr(current, &old); return err; } @@ -621,13 +621,13 @@ static void microcode_init_cpu(int cpu, int resume) old = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); mutex_lock(µcode_mutex); collect_cpu_info(cpu); if (uci->valid && system_state == SYSTEM_RUNNING && !resume) cpu_request_microcode(cpu); mutex_unlock(µcode_mutex); - set_cpus_allowed(current, old); + set_cpus_allowed_ptr(current, &old); } static void microcode_fini_cpu(int cpu) @@ -657,14 +657,14 @@ static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz) old = current->cpus_allowed; get_online_cpus(); - set_cpus_allowed(current, cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); mutex_lock(µcode_mutex); if (uci->valid) err = cpu_request_microcode(cpu); mutex_unlock(µcode_mutex); put_online_cpus(); - set_cpus_allowed(current, old); + set_cpus_allowed_ptr(current, &old); } if (err) return err; diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index 8421d0ac6f2..11b14bbaa61 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -321,7 +321,8 @@ EXPORT_SYMBOL(touch_nmi_watchdog); extern void die_nmi(struct pt_regs *, const char *msg); -__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) +notrace __kprobes int +nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) { /* diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c index 11f9130ac51..5a29ded994f 100644 --- a/arch/x86/kernel/nmi_64.c +++ b/arch/x86/kernel/nmi_64.c @@ -313,7 +313,8 @@ void touch_nmi_watchdog(void) } EXPORT_SYMBOL(touch_nmi_watchdog); -int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) +notrace __kprobes int +nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) { int sum; int touched = 0; @@ -384,7 +385,8 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) static unsigned ignore_nmis; -asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code) +asmlinkage notrace __kprobes void +do_nmi(struct pt_regs *regs, long error_code) { nmi_enter(); add_pda(__nmi_count,1); diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 1b5464c2434..adb91e4b62d 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -470,10 +470,11 @@ error: return 0; } -static dma_addr_t calgary_map_single(struct device *dev, void *vaddr, +static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, size_t size, int direction) { dma_addr_t dma_handle = bad_dma_address; + void *vaddr = phys_to_virt(paddr); unsigned long uaddr; unsigned int npages; struct iommu_table *tbl = find_iommu_table(dev); diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma.c index ada5a060499..388b113a7d8 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma.c @@ -1,61 +1,370 @@ -/* - * Dynamic DMA mapping support. - */ - -#include <linux/types.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/pci.h> -#include <linux/module.h> +#include <linux/dma-mapping.h> #include <linux/dmar.h> -#include <asm/io.h> +#include <linux/bootmem.h> +#include <linux/pci.h> + +#include <asm/proto.h> +#include <asm/dma.h> #include <asm/gart.h> #include <asm/calgary.h> -int iommu_merge __read_mostly = 0; - -dma_addr_t bad_dma_address __read_mostly; -EXPORT_SYMBOL(bad_dma_address); +int forbid_dac __read_mostly; +EXPORT_SYMBOL(forbid_dac); -/* This tells the BIO block layer to assume merging. Default to off - because we cannot guarantee merging later. */ -int iommu_bio_merge __read_mostly = 0; -EXPORT_SYMBOL(iommu_bio_merge); +const struct dma_mapping_ops *dma_ops; +EXPORT_SYMBOL(dma_ops); -static int iommu_sac_force __read_mostly = 0; +int iommu_sac_force __read_mostly = 0; -int no_iommu __read_mostly; #ifdef CONFIG_IOMMU_DEBUG int panic_on_overflow __read_mostly = 1; int force_iommu __read_mostly = 1; #else int panic_on_overflow __read_mostly = 0; -int force_iommu __read_mostly= 0; +int force_iommu __read_mostly = 0; #endif +int iommu_merge __read_mostly = 0; + +int no_iommu __read_mostly; /* Set this to 1 if there is a HW IOMMU in the system */ int iommu_detected __read_mostly = 0; +/* This tells the BIO block layer to assume merging. Default to off + because we cannot guarantee merging later. */ +int iommu_bio_merge __read_mostly = 0; +EXPORT_SYMBOL(iommu_bio_merge); + +dma_addr_t bad_dma_address __read_mostly = 0; +EXPORT_SYMBOL(bad_dma_address); + /* Dummy device used for NULL arguments (normally ISA). Better would be probably a smaller DMA mask, but this is bug-to-bug compatible - to i386. */ + to older i386. */ struct device fallback_dev = { .bus_id = "fallback device", .coherent_dma_mask = DMA_32BIT_MASK, .dma_mask = &fallback_dev.coherent_dma_mask, }; +int dma_set_mask(struct device *dev, u64 mask) +{ + if (!dev->dma_mask || !dma_supported(dev, mask)) + return -EIO; + + *dev->dma_mask = mask; + + return 0; +} +EXPORT_SYMBOL(dma_set_mask); + +#ifdef CONFIG_X86_64 +static __initdata void *dma32_bootmem_ptr; +static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); + +static int __init parse_dma32_size_opt(char *p) +{ + if (!p) + return -EINVAL; + dma32_bootmem_size = memparse(p, &p); + return 0; +} +early_param("dma32_size", parse_dma32_size_opt); + +void __init dma32_reserve_bootmem(void) +{ + unsigned long size, align; + if (end_pfn <= MAX_DMA32_PFN) + return; + + align = 64ULL<<20; + size = round_up(dma32_bootmem_size, align); + dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, + __pa(MAX_DMA_ADDRESS)); + if (dma32_bootmem_ptr) + dma32_bootmem_size = size; + else + dma32_bootmem_size = 0; +} +static void __init dma32_free_bootmem(void) +{ + int node; + + if (end_pfn <= MAX_DMA32_PFN) + return; + + if (!dma32_bootmem_ptr) + return; + + for_each_online_node(node) + free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr), + dma32_bootmem_size); + + dma32_bootmem_ptr = NULL; + dma32_bootmem_size = 0; +} + +void __init pci_iommu_alloc(void) +{ + /* free the range so iommu could get some range less than 4G */ + dma32_free_bootmem(); + /* + * The order of these functions is important for + * fall-back/fail-over reasons + */ +#ifdef CONFIG_GART_IOMMU + gart_iommu_hole_init(); +#endif + +#ifdef CONFIG_CALGARY_IOMMU + detect_calgary(); +#endif + + detect_intel_iommu(); + +#ifdef CONFIG_SWIOTLB + pci_swiotlb_init(); +#endif +} +#endif + +/* + * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter + * documentation. + */ +static __init int iommu_setup(char *p) +{ + iommu_merge = 1; + + if (!p) + return -EINVAL; + + while (*p) { + if (!strncmp(p, "off", 3)) + no_iommu = 1; + /* gart_parse_options has more force support */ + if (!strncmp(p, "force", 5)) + force_iommu = 1; + if (!strncmp(p, "noforce", 7)) { + iommu_merge = 0; + force_iommu = 0; + } + + if (!strncmp(p, "biomerge", 8)) { + iommu_bio_merge = 4096; + iommu_merge = 1; + force_iommu = 1; + } + if (!strncmp(p, "panic", 5)) + panic_on_overflow = 1; + if (!strncmp(p, "nopanic", 7)) + panic_on_overflow = 0; + if (!strncmp(p, "merge", 5)) { + iommu_merge = 1; + force_iommu = 1; + } + if (!strncmp(p, "nomerge", 7)) + iommu_merge = 0; + if (!strncmp(p, "forcesac", 8)) + iommu_sac_force = 1; + if (!strncmp(p, "allowdac", 8)) + forbid_dac = 0; + if (!strncmp(p, "nodac", 5)) + forbid_dac = -1; + if (!strncmp(p, "usedac", 6)) { + forbid_dac = -1; + return 1; + } +#ifdef CONFIG_SWIOTLB + if (!strncmp(p, "soft", 4)) + swiotlb = 1; +#endif + +#ifdef CONFIG_GART_IOMMU + gart_parse_options(p); +#endif + +#ifdef CONFIG_CALGARY_IOMMU + if (!strncmp(p, "calgary", 7)) + use_calgary = 1; +#endif /* CONFIG_CALGARY_IOMMU */ + + p += strcspn(p, ","); + if (*p == ',') + ++p; + } + return 0; +} +early_param("iommu", iommu_setup); + +#ifdef CONFIG_X86_32 +int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, + dma_addr_t device_addr, size_t size, int flags) +{ + void __iomem *mem_base = NULL; + int pages = size >> PAGE_SHIFT; + int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); + + if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) + goto out; + if (!size) + goto out; + if (dev->dma_mem) + goto out; + + /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ + + mem_base = ioremap(bus_addr, size); + if (!mem_base) + goto out; + + dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); + if (!dev->dma_mem) + goto out; + dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!dev->dma_mem->bitmap) + goto free1_out; + + dev->dma_mem->virt_base = mem_base; + dev->dma_mem->device_base = device_addr; + dev->dma_mem->size = pages; + dev->dma_mem->flags = flags; + + if (flags & DMA_MEMORY_MAP) + return DMA_MEMORY_MAP; + + return DMA_MEMORY_IO; + + free1_out: + kfree(dev->dma_mem); + out: + if (mem_base) + iounmap(mem_base); + return 0; +} +EXPORT_SYMBOL(dma_declare_coherent_memory); + +void dma_release_declared_memory(struct device *dev) +{ + struct dma_coherent_mem *mem = dev->dma_mem; + + if (!mem) + return; + dev->dma_mem = NULL; + iounmap(mem->virt_base); + kfree(mem->bitmap); + kfree(mem); +} +EXPORT_SYMBOL(dma_release_declared_memory); + +void *dma_mark_declared_memory_occupied(struct device *dev, + dma_addr_t device_addr, size_t size) +{ + struct dma_coherent_mem *mem = dev->dma_mem; + int pos, err; + int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1); + + pages >>= PAGE_SHIFT; + + if (!mem) + return ERR_PTR(-EINVAL); + + pos = (device_addr - mem->device_base) >> PAGE_SHIFT; + err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); + if (err != 0) + return ERR_PTR(err); + return mem->virt_base + (pos << PAGE_SHIFT); +} +EXPORT_SYMBOL(dma_mark_declared_memory_occupied); + +static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size, + dma_addr_t *dma_handle, void **ret) +{ + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; + int order = get_order(size); + + if (mem) { + int page = bitmap_find_free_region(mem->bitmap, mem->size, + order); + if (page >= 0) { + *dma_handle = mem->device_base + (page << PAGE_SHIFT); + *ret = mem->virt_base + (page << PAGE_SHIFT); + memset(*ret, 0, size); + } + if (mem->flags & DMA_MEMORY_EXCLUSIVE) + *ret = NULL; + } + return (mem != NULL); +} + +static int dma_release_coherent(struct device *dev, int order, void *vaddr) +{ + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; + + if (mem && vaddr >= mem->virt_base && vaddr < + (mem->virt_base + (mem->size << PAGE_SHIFT))) { + int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; + + bitmap_release_region(mem->bitmap, page, order); + return 1; + } + return 0; +} +#else +#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0) +#define dma_release_coherent(dev, order, vaddr) (0) +#endif /* CONFIG_X86_32 */ + +int dma_supported(struct device *dev, u64 mask) +{ +#ifdef CONFIG_PCI + if (mask > 0xffffffff && forbid_dac > 0) { + printk(KERN_INFO "PCI: Disallowing DAC for device %s\n", + dev->bus_id); + return 0; + } +#endif + + if (dma_ops->dma_supported) + return dma_ops->dma_supported(dev, mask); + + /* Copied from i386. Doesn't make much sense, because it will + only work for pci_alloc_coherent. + The caller just has to use GFP_DMA in this case. */ + if (mask < DMA_24BIT_MASK) + return 0; + + /* Tell the device to use SAC when IOMMU force is on. This + allows the driver to use cheaper accesses in some cases. + + Problem with this is that if we overflow the IOMMU area and + return DAC as fallback address the device may not handle it + correctly. + + As a special case some controllers have a 39bit address + mode that is as efficient as 32bit (aic79xx). Don't force + SAC for these. Assume all masks <= 40 bits are of this + type. Normally this doesn't make any difference, but gives + more gentle handling of IOMMU overflow. */ + if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { + printk(KERN_INFO "%s: Force SAC with mask %Lx\n", + dev->bus_id, mask); + return 0; + } + + return 1; +} +EXPORT_SYMBOL(dma_supported); + /* Allocate DMA memory on node near device */ -noinline static void * +noinline struct page * dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) { - struct page *page; int node; node = dev_to_node(dev); - page = alloc_pages_node(node, gfp, order); - return page ? page_address(page) : NULL; + return alloc_pages_node(node, gfp, order); } /* @@ -65,9 +374,16 @@ void * dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { - void *memory; + void *memory = NULL; + struct page *page; unsigned long dma_mask = 0; - u64 bus; + dma_addr_t bus; + + /* ignore region specifiers */ + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + + if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) + return memory; if (!dev) dev = &fallback_dev; @@ -82,26 +398,25 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, /* Don't invoke OOM killer */ gfp |= __GFP_NORETRY; - /* Kludge to make it bug-to-bug compatible with i386. i386 - uses the normal dma_mask for alloc_coherent. */ - dma_mask &= *dev->dma_mask; - +#ifdef CONFIG_X86_64 /* Why <=? Even when the mask is smaller than 4GB it is often larger than 16MB and in this case we have a chance of finding fitting memory in the next higher zone first. If not retry with true GFP_DMA. -AK */ if (dma_mask <= DMA_32BIT_MASK) gfp |= GFP_DMA32; +#endif again: - memory = dma_alloc_pages(dev, gfp, get_order(size)); - if (memory == NULL) + page = dma_alloc_pages(dev, gfp, get_order(size)); + if (page == NULL) return NULL; { int high, mmu; - bus = virt_to_bus(memory); - high = (bus + size) >= dma_mask; + bus = page_to_phys(page); + memory = page_address(page); + high = (bus + size) >= dma_mask; mmu = high; if (force_iommu && !(gfp & GFP_DMA)) mmu = 1; @@ -127,7 +442,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, memset(memory, 0, size); if (!mmu) { - *dma_handle = virt_to_bus(memory); + *dma_handle = bus; return memory; } } @@ -139,7 +454,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, } if (dma_ops->map_simple) { - *dma_handle = dma_ops->map_simple(dev, memory, + *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory), size, PCI_DMA_BIDIRECTIONAL); if (*dma_handle != bad_dma_address) @@ -147,7 +462,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, } if (panic_on_overflow) - panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",size); + panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", + (unsigned long)size); free_pages((unsigned long)memory, get_order(size)); return NULL; } @@ -160,153 +476,16 @@ EXPORT_SYMBOL(dma_alloc_coherent); void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t bus) { + int order = get_order(size); WARN_ON(irqs_disabled()); /* for portability */ + if (dma_release_coherent(dev, order, vaddr)) + return; if (dma_ops->unmap_single) dma_ops->unmap_single(dev, bus, size, 0); - free_pages((unsigned long)vaddr, get_order(size)); + free_pages((unsigned long)vaddr, order); } EXPORT_SYMBOL(dma_free_coherent); -static int forbid_dac __read_mostly; - -int dma_supported(struct device *dev, u64 mask) -{ -#ifdef CONFIG_PCI - if (mask > 0xffffffff && forbid_dac > 0) { - - - - printk(KERN_INFO "PCI: Disallowing DAC for device %s\n", dev->bus_id); - return 0; - } -#endif - - if (dma_ops->dma_supported) - return dma_ops->dma_supported(dev, mask); - - /* Copied from i386. Doesn't make much sense, because it will - only work for pci_alloc_coherent. - The caller just has to use GFP_DMA in this case. */ - if (mask < DMA_24BIT_MASK) - return 0; - - /* Tell the device to use SAC when IOMMU force is on. This - allows the driver to use cheaper accesses in some cases. - - Problem with this is that if we overflow the IOMMU area and - return DAC as fallback address the device may not handle it - correctly. - - As a special case some controllers have a 39bit address - mode that is as efficient as 32bit (aic79xx). Don't force - SAC for these. Assume all masks <= 40 bits are of this - type. Normally this doesn't make any difference, but gives - more gentle handling of IOMMU overflow. */ - if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { - printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask); - return 0; - } - - return 1; -} -EXPORT_SYMBOL(dma_supported); - -int dma_set_mask(struct device *dev, u64 mask) -{ - if (!dev->dma_mask || !dma_supported(dev, mask)) - return -EIO; - *dev->dma_mask = mask; - return 0; -} -EXPORT_SYMBOL(dma_set_mask); - -/* - * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter - * documentation. - */ -static __init int iommu_setup(char *p) -{ - iommu_merge = 1; - - if (!p) - return -EINVAL; - - while (*p) { - if (!strncmp(p, "off", 3)) - no_iommu = 1; - /* gart_parse_options has more force support */ - if (!strncmp(p, "force", 5)) - force_iommu = 1; - if (!strncmp(p, "noforce", 7)) { - iommu_merge = 0; - force_iommu = 0; - } - - if (!strncmp(p, "biomerge", 8)) { - iommu_bio_merge = 4096; - iommu_merge = 1; - force_iommu = 1; - } - if (!strncmp(p, "panic", 5)) - panic_on_overflow = 1; - if (!strncmp(p, "nopanic", 7)) - panic_on_overflow = 0; - if (!strncmp(p, "merge", 5)) { - iommu_merge = 1; - force_iommu = 1; - } - if (!strncmp(p, "nomerge", 7)) - iommu_merge = 0; - if (!strncmp(p, "forcesac", 8)) - iommu_sac_force = 1; - if (!strncmp(p, "allowdac", 8)) - forbid_dac = 0; - if (!strncmp(p, "nodac", 5)) - forbid_dac = -1; - -#ifdef CONFIG_SWIOTLB - if (!strncmp(p, "soft", 4)) - swiotlb = 1; -#endif - -#ifdef CONFIG_GART_IOMMU - gart_parse_options(p); -#endif - -#ifdef CONFIG_CALGARY_IOMMU - if (!strncmp(p, "calgary", 7)) - use_calgary = 1; -#endif /* CONFIG_CALGARY_IOMMU */ - - p += strcspn(p, ","); - if (*p == ',') - ++p; - } - return 0; -} -early_param("iommu", iommu_setup); - -void __init pci_iommu_alloc(void) -{ - /* - * The order of these functions is important for - * fall-back/fail-over reasons - */ -#ifdef CONFIG_GART_IOMMU - gart_iommu_hole_init(); -#endif - -#ifdef CONFIG_CALGARY_IOMMU - detect_calgary(); -#endif - - detect_intel_iommu(); - -#ifdef CONFIG_SWIOTLB - pci_swiotlb_init(); -#endif -} - static int __init pci_iommu_init(void) { #ifdef CONFIG_CALGARY_IOMMU @@ -327,6 +506,8 @@ void pci_iommu_shutdown(void) { gart_iommu_shutdown(); } +/* Must execute after PCI subsystem */ +fs_initcall(pci_iommu_init); #ifdef CONFIG_PCI /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ @@ -334,11 +515,10 @@ void pci_iommu_shutdown(void) static __devinit void via_no_dac(struct pci_dev *dev) { if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { - printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n"); + printk(KERN_INFO "PCI: VIA PCI bridge detected." + "Disabling DAC.\n"); forbid_dac = 1; } } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); #endif -/* Must execute after PCI subsystem */ -fs_initcall(pci_iommu_init); diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c deleted file mode 100644 index 51330321a5d..00000000000 --- a/arch/x86/kernel/pci-dma_32.c +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Dynamic DMA mapping support. - * - * On i386 there is no hardware dynamic DMA address translation, - * so consistent alloc/free are merely page allocation/freeing. - * The rest of the dynamic DMA mapping interface is implemented - * in asm/pci.h. - */ - -#include <linux/types.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/pci.h> -#include <linux/module.h> -#include <asm/io.h> - -struct dma_coherent_mem { - void *virt_base; - u32 device_base; - int size; - int flags; - unsigned long *bitmap; -}; - -void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp) -{ - void *ret; - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; - int order = get_order(size); - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); - - if (mem) { - int page = bitmap_find_free_region(mem->bitmap, mem->size, - order); - if (page >= 0) { - *dma_handle = mem->device_base + (page << PAGE_SHIFT); - ret = mem->virt_base + (page << PAGE_SHIFT); - memset(ret, 0, size); - return ret; - } - if (mem->flags & DMA_MEMORY_EXCLUSIVE) - return NULL; - } - - if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) - gfp |= GFP_DMA; - - ret = (void *)__get_free_pages(gfp, order); - - if (ret != NULL) { - memset(ret, 0, size); - *dma_handle = virt_to_phys(ret); - } - return ret; -} -EXPORT_SYMBOL(dma_alloc_coherent); - -void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle) -{ - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; - int order = get_order(size); - - WARN_ON(irqs_disabled()); /* for portability */ - if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { - int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; - - bitmap_release_region(mem->bitmap, page, order); - } else - free_pages((unsigned long)vaddr, order); -} -EXPORT_SYMBOL(dma_free_coherent); - -int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, - dma_addr_t device_addr, size_t size, int flags) -{ - void __iomem *mem_base = NULL; - int pages = size >> PAGE_SHIFT; - int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); - - if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) - goto out; - if (!size) - goto out; - if (dev->dma_mem) - goto out; - - /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ - - mem_base = ioremap(bus_addr, size); - if (!mem_base) - goto out; - - dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); - if (!dev->dma_mem) - goto out; - dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - if (!dev->dma_mem->bitmap) - goto free1_out; - - dev->dma_mem->virt_base = mem_base; - dev->dma_mem->device_base = device_addr; - dev->dma_mem->size = pages; - dev->dma_mem->flags = flags; - - if (flags & DMA_MEMORY_MAP) - return DMA_MEMORY_MAP; - - return DMA_MEMORY_IO; - - free1_out: - kfree(dev->dma_mem); - out: - if (mem_base) - iounmap(mem_base); - return 0; -} -EXPORT_SYMBOL(dma_declare_coherent_memory); - -void dma_release_declared_memory(struct device *dev) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - - if(!mem) - return; - dev->dma_mem = NULL; - iounmap(mem->virt_base); - kfree(mem->bitmap); - kfree(mem); -} -EXPORT_SYMBOL(dma_release_declared_memory); - -void *dma_mark_declared_memory_occupied(struct device *dev, - dma_addr_t device_addr, size_t size) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT; - int pos, err; - - if (!mem) - return ERR_PTR(-EINVAL); - - pos = (device_addr - mem->device_base) >> PAGE_SHIFT; - err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); - if (err != 0) - return ERR_PTR(err); - return mem->virt_base + (pos << PAGE_SHIFT); -} -EXPORT_SYMBOL(dma_mark_declared_memory_occupied); - -#ifdef CONFIG_PCI -/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ - -int forbid_dac; -EXPORT_SYMBOL(forbid_dac); - -static __devinit void via_no_dac(struct pci_dev *dev) -{ - if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { - printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n"); - forbid_dac = 1; - } -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); - -static int check_iommu(char *s) -{ - if (!strcmp(s, "usedac")) { - forbid_dac = -1; - return 1; - } - return 0; -} -__setup("iommu=", check_iommu); -#endif diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 700e4647dd3..c07455d1695 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -264,9 +264,9 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, } static dma_addr_t -gart_map_simple(struct device *dev, char *buf, size_t size, int dir) +gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir) { - dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); + dma_addr_t map = dma_map_area(dev, paddr, size, dir); flush_gart(); @@ -275,18 +275,17 @@ gart_map_simple(struct device *dev, char *buf, size_t size, int dir) /* Map a single area into the IOMMU */ static dma_addr_t -gart_map_single(struct device *dev, void *addr, size_t size, int dir) +gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) { - unsigned long phys_mem, bus; + unsigned long bus; if (!dev) dev = &fallback_dev; - phys_mem = virt_to_phys(addr); - if (!need_iommu(dev, phys_mem, size)) - return phys_mem; + if (!need_iommu(dev, paddr, size)) + return paddr; - bus = gart_map_simple(dev, addr, size, dir); + bus = gart_map_simple(dev, paddr, size, dir); return bus; } diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu.c index ab08e183222..aec43d56f49 100644 --- a/arch/x86/kernel/pci-nommu_64.c +++ b/arch/x86/kernel/pci-nommu.c @@ -14,7 +14,7 @@ static int check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) { - if (hwdev && bus + size > *hwdev->dma_mask) { + if (hwdev && bus + size > *hwdev->dma_mask) { if (*hwdev->dma_mask >= DMA_32BIT_MASK) printk(KERN_ERR "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", @@ -26,19 +26,17 @@ check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) } static dma_addr_t -nommu_map_single(struct device *hwdev, void *ptr, size_t size, +nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int direction) { - dma_addr_t bus = virt_to_bus(ptr); + dma_addr_t bus = paddr; + WARN_ON(size == 0); if (!check_addr("map_single", hwdev, bus, size)) return bad_dma_address; + flush_write_buffers(); return bus; } -static void nommu_unmap_single(struct device *dev, dma_addr_t addr,size_t size, - int direction) -{ -} /* Map a set of buffers described by scatterlist in streaming * mode for DMA. This is the scatter-gather version of the @@ -61,30 +59,34 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, struct scatterlist *s; int i; + WARN_ON(nents == 0 || sg[0].length == 0); + for_each_sg(sg, s, nents, i) { BUG_ON(!sg_page(s)); - s->dma_address = virt_to_bus(sg_virt(s)); + s->dma_address = sg_phys(s); if (!check_addr("map_sg", hwdev, s->dma_address, s->length)) return 0; s->dma_length = s->length; } + flush_write_buffers(); return nents; } -/* Unmap a set of streaming mode DMA translations. - * Again, cpu read rules concerning calls here are the same as for - * pci_unmap_single() above. - */ -static void nommu_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, int dir) +/* Make sure we keep the same behaviour */ +static int nommu_mapping_error(dma_addr_t dma_addr) { +#ifdef CONFIG_X86_32 + return 0; +#else + return (dma_addr == bad_dma_address); +#endif } + const struct dma_mapping_ops nommu_dma_ops = { .map_single = nommu_map_single, - .unmap_single = nommu_unmap_single, .map_sg = nommu_map_sg, - .unmap_sg = nommu_unmap_sg, + .mapping_error = nommu_mapping_error, .is_phys = 1, }; diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 82a0a674a00..490da7f4b8d 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -11,11 +11,18 @@ int swiotlb __read_mostly; +static dma_addr_t +swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, + int direction) +{ + return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction); +} + const struct dma_mapping_ops swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, .alloc_coherent = swiotlb_alloc_coherent, .free_coherent = swiotlb_free_coherent, - .map_single = swiotlb_map_single, + .map_single = swiotlb_map_single_phys, .unmap_single = swiotlb_unmap_single, .sync_single_for_cpu = swiotlb_sync_single_for_cpu, .sync_single_for_device = swiotlb_sync_single_for_device, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c new file mode 100644 index 00000000000..3004d716539 --- /dev/null +++ b/arch/x86/kernel/process.c @@ -0,0 +1,44 @@ +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/slab.h> +#include <linux/sched.h> + +struct kmem_cache *task_xstate_cachep; + +int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) +{ + *dst = *src; + if (src->thread.xstate) { + dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, + GFP_KERNEL); + if (!dst->thread.xstate) + return -ENOMEM; + WARN_ON((unsigned long)dst->thread.xstate & 15); + memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); + } + return 0; +} + +void free_thread_xstate(struct task_struct *tsk) +{ + if (tsk->thread.xstate) { + kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); + tsk->thread.xstate = NULL; + } +} + +void free_thread_info(struct thread_info *ti) +{ + free_thread_xstate(ti->task); + free_pages((unsigned long)ti, get_order(THREAD_SIZE)); +} + +void arch_task_cache_init(void) +{ + task_xstate_cachep = + kmem_cache_create("task_xstate", xstate_size, + __alignof__(union thread_xstate), + SLAB_PANIC, NULL); +} diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 3903a8f2eb9..7adad088e37 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -36,6 +36,7 @@ #include <linux/personality.h> #include <linux/tick.h> #include <linux/percpu.h> +#include <linux/prctl.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -45,7 +46,6 @@ #include <asm/processor.h> #include <asm/i387.h> #include <asm/desc.h> -#include <asm/vm86.h> #ifdef CONFIG_MATH_EMULATION #include <asm/math_emu.h> #endif @@ -521,14 +521,18 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) regs->cs = __USER_CS; regs->ip = new_ip; regs->sp = new_sp; + /* + * Free the old FP and other extended state + */ + free_thread_xstate(current); } EXPORT_SYMBOL_GPL(start_thread); -#ifdef CONFIG_SECCOMP static void hard_disable_TSC(void) { write_cr4(read_cr4() | X86_CR4_TSD); } + void disable_TSC(void) { preempt_disable(); @@ -540,11 +544,47 @@ void disable_TSC(void) hard_disable_TSC(); preempt_enable(); } + static void hard_enable_TSC(void) { write_cr4(read_cr4() & ~X86_CR4_TSD); } -#endif /* CONFIG_SECCOMP */ + +void enable_TSC(void) +{ + preempt_disable(); + if (test_and_clear_thread_flag(TIF_NOTSC)) + /* + * Must flip the CPU state synchronously with + * TIF_NOTSC in the current running context. + */ + hard_enable_TSC(); + preempt_enable(); +} + +int get_tsc_mode(unsigned long adr) +{ + unsigned int val; + + if (test_thread_flag(TIF_NOTSC)) + val = PR_TSC_SIGSEGV; + else + val = PR_TSC_ENABLE; + + return put_user(val, (unsigned int __user *)adr); +} + +int set_tsc_mode(unsigned int val) +{ + if (val == PR_TSC_SIGSEGV) + disable_TSC(); + else if (val == PR_TSC_ENABLE) + enable_TSC(); + else + return -EINVAL; + + return 0; +} static noinline void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, @@ -578,7 +618,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, set_debugreg(next->debugreg7, 7); } -#ifdef CONFIG_SECCOMP if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ test_tsk_thread_flag(next_p, TIF_NOTSC)) { /* prev and next are different */ @@ -587,7 +626,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, else hard_enable_TSC(); } -#endif #ifdef X86_BTS if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) @@ -669,7 +707,7 @@ struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct /* we're going to use this soon, after a few expensive things */ if (next_p->fpu_counter > 5) - prefetch(&next->i387.fxsave); + prefetch(next->xstate); /* * Reload esp0. diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e75ccc8a2b8..891af1a1b48 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -36,6 +36,7 @@ #include <linux/kprobes.h> #include <linux/kdebug.h> #include <linux/tick.h> +#include <linux/prctl.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -532,9 +533,71 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) regs->ss = __USER_DS; regs->flags = 0x200; set_fs(USER_DS); + /* + * Free the old FP and other extended state + */ + free_thread_xstate(current); } EXPORT_SYMBOL_GPL(start_thread); +static void hard_disable_TSC(void) +{ + write_cr4(read_cr4() | X86_CR4_TSD); +} + +void disable_TSC(void) +{ + preempt_disable(); + if (!test_and_set_thread_flag(TIF_NOTSC)) + /* + * Must flip the CPU state synchronously with + * TIF_NOTSC in the current running context. + */ + hard_disable_TSC(); + preempt_enable(); +} + +static void hard_enable_TSC(void) +{ + write_cr4(read_cr4() & ~X86_CR4_TSD); +} + +void enable_TSC(void) +{ + preempt_disable(); + if (test_and_clear_thread_flag(TIF_NOTSC)) + /* + * Must flip the CPU state synchronously with + * TIF_NOTSC in the current running context. + */ + hard_enable_TSC(); + preempt_enable(); +} + +int get_tsc_mode(unsigned long adr) +{ + unsigned int val; + + if (test_thread_flag(TIF_NOTSC)) + val = PR_TSC_SIGSEGV; + else + val = PR_TSC_ENABLE; + + return put_user(val, (unsigned int __user *)adr); +} + +int set_tsc_mode(unsigned int val) +{ + if (val == PR_TSC_SIGSEGV) + disable_TSC(); + else if (val == PR_TSC_ENABLE) + enable_TSC(); + else + return -EINVAL; + + return 0; +} + /* * This special macro can be used to load a debugging register */ @@ -572,6 +635,15 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, loaddebug(next, 7); } + if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ + test_tsk_thread_flag(next_p, TIF_NOTSC)) { + /* prev and next are different */ + if (test_tsk_thread_flag(next_p, TIF_NOTSC)) + hard_disable_TSC(); + else + hard_enable_TSC(); + } + if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { /* * Copy the relevant range of the IO bitmap. @@ -614,7 +686,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* we're going to use this soon, after a few expensive things */ if (next_p->fpu_counter>5) - prefetch(&next->i387.fxsave); + prefetch(next->xstate); /* * Reload esp0, LDT and the page table pointer: diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 9692202d3bf..19c9386ac11 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -420,7 +420,7 @@ static void native_machine_shutdown(void) reboot_cpu_id = smp_processor_id(); /* Make certain I only run on the appropriate processor */ - set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id)); /* O.K Now that I'm on the appropriate processor, * stop all of the others. diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ed157c90412..0d1f44ae6ee 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -54,6 +54,24 @@ static void __init setup_per_cpu_maps(void) #endif } +#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP +cpumask_t *cpumask_of_cpu_map __read_mostly; +EXPORT_SYMBOL(cpumask_of_cpu_map); + +/* requires nr_cpu_ids to be initialized */ +static void __init setup_cpumask_of_cpu(void) +{ + int i; + + /* alloc_bootmem zeroes memory */ + cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids); + for (i = 0; i < nr_cpu_ids; i++) + cpu_set(i, cpumask_of_cpu_map[i]); +} +#else +static inline void setup_cpumask_of_cpu(void) { } +#endif + #ifdef CONFIG_X86_32 /* * Great future not-so-futuristic plan: make i386 and x86_64 do it @@ -70,7 +88,7 @@ EXPORT_SYMBOL(__per_cpu_offset); */ void __init setup_per_cpu_areas(void) { - int i; + int i, highest_cpu = 0; unsigned long size; #ifdef CONFIG_HOTPLUG_CPU @@ -104,10 +122,18 @@ void __init setup_per_cpu_areas(void) __per_cpu_offset[i] = ptr - __per_cpu_start; #endif memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + + highest_cpu = i; } + nr_cpu_ids = highest_cpu + 1; + printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d\n", NR_CPUS, nr_cpu_ids); + /* Setup percpu data maps */ setup_per_cpu_maps(); + + /* Setup cpumask_of_cpu map */ + setup_cpumask_of_cpu(); } #endif diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c index 9042fb0e36f..aee0e820077 100644 --- a/arch/x86/kernel/setup64.c +++ b/arch/x86/kernel/setup64.c @@ -74,8 +74,8 @@ int force_personality32 = 0; Control non executable heap for 32bit processes. To control the stack too use noexec=off -on PROT_READ does not imply PROT_EXEC for 32bit processes -off PROT_READ implies PROT_EXEC (default) +on PROT_READ does not imply PROT_EXEC for 32bit processes (default) +off PROT_READ implies PROT_EXEC */ static int __init nonx32_setup(char *str) { diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 4ef91749959..78828b0f604 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -815,10 +815,10 @@ void __init setup_arch(char **cmdline_p) efi_init(); /* update e820 for memory not covered by WB MTRRs */ - find_max_pfn(); + propagate_e820_map(); mtrr_bp_init(); if (mtrr_trim_uncached_memory(max_pfn)) - find_max_pfn(); + propagate_e820_map(); max_low_pfn = setup_memory(); diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 216c93bd999..c2ec3dcb6b9 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -399,6 +399,8 @@ void __init setup_arch(char **cmdline_p) early_res_to_bootmem(); + dma32_reserve_bootmem(); + #ifdef CONFIG_ACPI_SLEEP /* * Reserve low memory region for sleep support. @@ -421,11 +423,14 @@ void __init setup_arch(char **cmdline_p) unsigned long end_of_mem = end_pfn << PAGE_SHIFT; if (ramdisk_end <= end_of_mem) { - reserve_bootmem_generic(ramdisk_image, ramdisk_size); + /* + * don't need to reserve again, already reserved early + * in x86_64_start_kernel, and early_res_to_bootmem + * convert that to reserved in bootmem + */ initrd_start = ramdisk_image + PAGE_OFFSET; initrd_end = initrd_start+ramdisk_size; } else { - /* Assumes everything on node 0 */ free_bootmem(ramdisk_image, ramdisk_size); printk(KERN_ERR "initrd extends beyond end of memory " "(0x%08lx > 0x%08lx)\ndisabling initrd\n", diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e6abe8a49b1..6a925394bc7 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -61,6 +61,7 @@ #include <asm/mtrr.h> #include <asm/nmi.h> #include <asm/vmi.h> +#include <asm/genapic.h> #include <linux/mc146818rtc.h> #include <mach_apic.h> @@ -677,6 +678,12 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) unsigned long send_status, accept_status = 0; int maxlvt, num_starts, j; + if (get_uv_system_type() == UV_NON_UNIQUE_APIC) { + send_status = uv_wakeup_secondary(phys_apicid, start_eip); + atomic_set(&init_deasserted, 1); + return send_status; + } + /* * Be paranoid about clearing APIC errors. */ @@ -918,16 +925,19 @@ do_rest: atomic_set(&init_deasserted, 0); - Dprintk("Setting warm reset code and vector.\n"); + if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { - store_NMI_vector(&nmi_high, &nmi_low); + Dprintk("Setting warm reset code and vector.\n"); - smpboot_setup_warm_reset_vector(start_ip); - /* - * Be paranoid about clearing APIC errors. - */ - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); + store_NMI_vector(&nmi_high, &nmi_low); + + smpboot_setup_warm_reset_vector(start_ip); + /* + * Be paranoid about clearing APIC errors. + */ + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + } /* * Starting actual IPI sequence... @@ -966,7 +976,8 @@ do_rest: else /* trampoline code not run */ printk(KERN_ERR "Not responding.\n"); - inquire_remote_apic(apicid); + if (get_uv_system_type() != UV_NON_UNIQUE_APIC) + inquire_remote_apic(apicid); } } diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 65791ca2824..471e694d671 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -681,7 +681,7 @@ gp_in_kernel: } } -static __kprobes void +static notrace __kprobes void mem_parity_error(unsigned char reason, struct pt_regs *regs) { printk(KERN_EMERG @@ -707,7 +707,7 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs) clear_mem_error(reason); } -static __kprobes void +static notrace __kprobes void io_check_error(unsigned char reason, struct pt_regs *regs) { unsigned long i; @@ -727,7 +727,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs) outb(reason, 0x61); } -static __kprobes void +static notrace __kprobes void unknown_nmi_error(unsigned char reason, struct pt_regs *regs) { if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) @@ -755,7 +755,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) static DEFINE_SPINLOCK(nmi_print_lock); -void __kprobes die_nmi(struct pt_regs *regs, const char *msg) +void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg) { if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP) return; @@ -786,7 +786,7 @@ void __kprobes die_nmi(struct pt_regs *regs, const char *msg) do_exit(SIGSEGV); } -static __kprobes void default_do_nmi(struct pt_regs *regs) +static notrace __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; @@ -828,7 +828,7 @@ static __kprobes void default_do_nmi(struct pt_regs *regs) static int ignore_nmis; -__kprobes void do_nmi(struct pt_regs *regs, long error_code) +notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code) { int cpu; @@ -1148,9 +1148,22 @@ asmlinkage void math_state_restore(void) struct thread_info *thread = current_thread_info(); struct task_struct *tsk = thread->task; + if (!tsk_used_math(tsk)) { + local_irq_enable(); + /* + * does a slab alloc which can sleep + */ + if (init_fpu(tsk)) { + /* + * ran out of memory! + */ + do_group_exit(SIGKILL); + return; + } + local_irq_disable(); + } + clts(); /* Allow maths ops (or we recurse) */ - if (!tsk_used_math(tsk)) - init_fpu(tsk); restore_fpu(tsk); thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ tsk->fpu_counter++; @@ -1208,11 +1221,6 @@ void __init trap_init(void) #endif set_trap_gate(19, &simd_coprocessor_error); - /* - * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned. - * Generate a build-time error if the alignment is wrong. - */ - BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15); if (cpu_has_fxsr) { printk(KERN_INFO "Enabling fast FPU save and restore... "); set_in_cr4(X86_CR4_OSFXSR); @@ -1233,6 +1241,7 @@ void __init trap_init(void) set_bit(SYSCALL_VECTOR, used_vectors); + init_thread_xstate(); /* * Should be a barrier for any external CPU state: */ diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 79aa6fc0815..adff76ea97c 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -600,7 +600,8 @@ void die(const char * str, struct pt_regs * regs, long err) oops_end(flags, regs, SIGSEGV); } -void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) +notrace __kprobes void +die_nmi(char *str, struct pt_regs *regs, int do_panic) { unsigned long flags; @@ -772,7 +773,7 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, die("general protection fault", regs, error_code); } -static __kprobes void +static notrace __kprobes void mem_parity_error(unsigned char reason, struct pt_regs * regs) { printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", @@ -796,7 +797,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs) outb(reason, 0x61); } -static __kprobes void +static notrace __kprobes void io_check_error(unsigned char reason, struct pt_regs * regs) { printk("NMI: IOCK error (debug interrupt?)\n"); @@ -810,7 +811,7 @@ io_check_error(unsigned char reason, struct pt_regs * regs) outb(reason, 0x61); } -static __kprobes void +static notrace __kprobes void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) { if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) @@ -827,7 +828,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs * regs) /* Runs on IST stack. This code must keep interrupts off all the time. Nested NMIs are prevented by the CPU. */ -asmlinkage __kprobes void default_do_nmi(struct pt_regs *regs) +asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; int cpu; @@ -1123,11 +1124,24 @@ asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) asmlinkage void math_state_restore(void) { struct task_struct *me = current; - clts(); /* Allow maths ops (or we recurse) */ - if (!used_math()) - init_fpu(me); - restore_fpu_checking(&me->thread.i387.fxsave); + if (!used_math()) { + local_irq_enable(); + /* + * does a slab alloc which can sleep + */ + if (init_fpu(me)) { + /* + * ran out of memory! + */ + do_group_exit(SIGKILL); + return; + } + local_irq_disable(); + } + + clts(); /* Allow maths ops (or we recurse) */ + restore_fpu_checking(&me->thread.xstate->fxsave); task_thread_info(me)->status |= TS_USEDFPU; me->fpu_counter++; } @@ -1163,6 +1177,10 @@ void __init trap_init(void) #endif /* + * initialize the per thread extended state: + */ + init_thread_xstate(); + /* * Should be a barrier for any external CPU state. */ cpu_init(); diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index 3d7e6e9fa6c..e4790728b22 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -221,9 +221,9 @@ EXPORT_SYMBOL(recalibrate_cpu_khz); * if the CPU frequency is scaled, TSC-based delays will need a different * loops_per_jiffy value to function properly. */ -static unsigned int ref_freq = 0; -static unsigned long loops_per_jiffy_ref = 0; -static unsigned long cpu_khz_ref = 0; +static unsigned int ref_freq; +static unsigned long loops_per_jiffy_ref; +static unsigned long cpu_khz_ref; static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) @@ -283,15 +283,28 @@ core_initcall(cpufreq_tsc); /* clock source code */ -static unsigned long current_tsc_khz = 0; +static unsigned long current_tsc_khz; +static struct clocksource clocksource_tsc; +/* + * We compare the TSC to the cycle_last value in the clocksource + * structure to avoid a nasty time-warp issue. This can be observed in + * a very small window right after one CPU updated cycle_last under + * xtime lock and the other CPU reads a TSC value which is smaller + * than the cycle_last reference value due to a TSC which is slighty + * behind. This delta is nowhere else observable, but in that case it + * results in a forward time jump in the range of hours due to the + * unsigned delta calculation of the time keeping core code, which is + * necessary to support wrapping clocksources like pm timer. + */ static cycle_t read_tsc(void) { cycle_t ret; rdtscll(ret); - return ret; + return ret >= clocksource_tsc.cycle_last ? + ret : clocksource_tsc.cycle_last; } static struct clocksource clocksource_tsc = { diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c index ceeba01e7f4..fcc16e58609 100644 --- a/arch/x86/kernel/tsc_64.c +++ b/arch/x86/kernel/tsc_64.c @@ -11,6 +11,7 @@ #include <asm/hpet.h> #include <asm/timex.h> #include <asm/timer.h> +#include <asm/vgtod.h> static int notsc __initdata = 0; @@ -287,18 +288,34 @@ int __init notsc_setup(char *s) __setup("notsc", notsc_setup); +static struct clocksource clocksource_tsc; -/* clock source code: */ +/* + * We compare the TSC to the cycle_last value in the clocksource + * structure to avoid a nasty time-warp. This can be observed in a + * very small window right after one CPU updated cycle_last under + * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which + * is smaller than the cycle_last reference value due to a TSC which + * is slighty behind. This delta is nowhere else observable, but in + * that case it results in a forward time jump in the range of hours + * due to the unsigned delta calculation of the time keeping core + * code, which is necessary to support wrapping clocksources like pm + * timer. + */ static cycle_t read_tsc(void) { cycle_t ret = (cycle_t)get_cycles(); - return ret; + + return ret >= clocksource_tsc.cycle_last ? + ret : clocksource_tsc.cycle_last; } static cycle_t __vsyscall_fn vread_tsc(void) { cycle_t ret = (cycle_t)vget_cycles(); - return ret; + + return ret >= __vsyscall_gtod_data.clock.cycle_last ? + ret : __vsyscall_gtod_data.clock.cycle_last; } static struct clocksource clocksource_tsc = { diff --git a/arch/x86/mach-visws/visws_apic.c b/arch/x86/mach-visws/visws_apic.c index 710faf71a65..cef9cb1d15a 100644 --- a/arch/x86/mach-visws/visws_apic.c +++ b/arch/x86/mach-visws/visws_apic.c @@ -1,6 +1,4 @@ /* - * linux/arch/i386/mach-visws/visws_apic.c - * * Copyright (C) 1999 Bent Hagemark, Ingo Molnar * * SGI Visual Workstation interrupt controller diff --git a/arch/x86/mach-voyager/voyager_basic.c b/arch/x86/mach-voyager/voyager_basic.c index 6a949e4edde..46d6f806769 100644 --- a/arch/x86/mach-voyager/voyager_basic.c +++ b/arch/x86/mach-voyager/voyager_basic.c @@ -2,8 +2,6 @@ * * Author: J.E.J.Bottomley@HansenPartnership.com * - * linux/arch/i386/kernel/voyager.c - * * This file contains all the voyager specific routines for getting * initialisation of the architecture to function. For additional * features see: diff --git a/arch/x86/mach-voyager/voyager_cat.c b/arch/x86/mach-voyager/voyager_cat.c index 17a7904f75b..ecab9fff0fd 100644 --- a/arch/x86/mach-voyager/voyager_cat.c +++ b/arch/x86/mach-voyager/voyager_cat.c @@ -4,8 +4,6 @@ * * Author: J.E.J.Bottomley@HansenPartnership.com * - * linux/arch/i386/kernel/voyager_cat.c - * * This file contains all the logic for manipulating the CAT bus * in a level 5 machine. * diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index be7235bf105..96f60c7cd12 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -4,8 +4,6 @@ * * Author: J.E.J.Bottomley@HansenPartnership.com * - * linux/arch/i386/kernel/voyager_smp.c - * * This file provides all the same external entries as smp.c but uses * the voyager hal to provide the functionality */ diff --git a/arch/x86/mach-voyager/voyager_thread.c b/arch/x86/mach-voyager/voyager_thread.c index c69c931818e..15464a20fb3 100644 --- a/arch/x86/mach-voyager/voyager_thread.c +++ b/arch/x86/mach-voyager/voyager_thread.c @@ -4,8 +4,6 @@ * * Author: J.E.J.Bottomley@HansenPartnership.com * - * linux/arch/i386/kernel/voyager_thread.c - * * This module provides the machine status monitor thread for the * voyager architecture. This allows us to monitor the machine * environment (temp, voltage, fan function) and the front panel and diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 4bab3b14539..6e38d877ea7 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -678,7 +678,7 @@ int fpregs_soft_set(struct task_struct *target, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - struct i387_soft_struct *s387 = &target->thread.i387.soft; + struct i387_soft_struct *s387 = &target->thread.xstate->soft; void *space = s387->st_space; int ret; int offset, other, i, tags, regnr, tag, newtop; @@ -730,7 +730,7 @@ int fpregs_soft_get(struct task_struct *target, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - struct i387_soft_struct *s387 = &target->thread.i387.soft; + struct i387_soft_struct *s387 = &target->thread.xstate->soft; const void *space = s387->st_space; int ret; int offset = (S387->ftop & 7) * 10, other = 80 - offset; diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index a3ae28c49dd..13488fa153e 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h @@ -35,8 +35,8 @@ #define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ == (1 << 10)) -#define I387 (current->thread.i387) -#define FPU_info (I387.soft.info) +#define I387 (current->thread.xstate) +#define FPU_info (I387->soft.info) #define FPU_CS (*(unsigned short *) &(FPU_info->___cs)) #define FPU_SS (*(unsigned short *) &(FPU_info->___ss)) @@ -46,25 +46,25 @@ #define FPU_EIP (FPU_info->___eip) #define FPU_ORIG_EIP (FPU_info->___orig_eip) -#define FPU_lookahead (I387.soft.lookahead) +#define FPU_lookahead (I387->soft.lookahead) /* nz if ip_offset and cs_selector are not to be set for the current instruction. */ -#define no_ip_update (*(u_char *)&(I387.soft.no_update)) -#define FPU_rm (*(u_char *)&(I387.soft.rm)) +#define no_ip_update (*(u_char *)&(I387->soft.no_update)) +#define FPU_rm (*(u_char *)&(I387->soft.rm)) /* Number of bytes of data which can be legally accessed by the current instruction. This only needs to hold a number <= 108, so a byte will do. */ -#define access_limit (*(u_char *)&(I387.soft.alimit)) +#define access_limit (*(u_char *)&(I387->soft.alimit)) -#define partial_status (I387.soft.swd) -#define control_word (I387.soft.cwd) -#define fpu_tag_word (I387.soft.twd) -#define registers (I387.soft.st_space) -#define top (I387.soft.ftop) +#define partial_status (I387->soft.swd) +#define control_word (I387->soft.cwd) +#define fpu_tag_word (I387->soft.twd) +#define registers (I387->soft.st_space) +#define top (I387->soft.ftop) -#define instruction_address (*(struct address *)&I387.soft.fip) -#define operand_address (*(struct address *)&I387.soft.foo) +#define instruction_address (*(struct address *)&I387->soft.fip) +#define operand_address (*(struct address *)&I387->soft.foo) #define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \ math_abort(FPU_info,SIGSEGV) diff --git a/arch/x86/math-emu/reg_ld_str.c b/arch/x86/math-emu/reg_ld_str.c index 02af772a24d..d597fe7423c 100644 --- a/arch/x86/math-emu/reg_ld_str.c +++ b/arch/x86/math-emu/reg_ld_str.c @@ -1180,8 +1180,8 @@ u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d) control_word |= 0xffff0040; partial_status = status_word() | 0xffff0000; fpu_tag_word |= 0xffff0000; - I387.soft.fcs &= ~0xf8000000; - I387.soft.fos |= 0xffff0000; + I387->soft.fcs &= ~0xf8000000; + I387->soft.fos |= 0xffff0000; #endif /* PECULIAR_486 */ if (__copy_to_user(d, &control_word, 7 * 4)) FPU_abort; diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index eba0bbede7a..18378850e25 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -120,7 +120,7 @@ int __init get_memcfg_numa_flat(void) printk("NUMA - single node, flat memory mode\n"); /* Run the memory configuration and find the top of memory. */ - find_max_pfn(); + propagate_e820_map(); node_start_pfn[0] = 0; node_end_pfn[0] = max_pfn; memory_present(0, 0, max_pfn); @@ -134,7 +134,7 @@ int __init get_memcfg_numa_flat(void) /* * Find the highest page frame number we have available for the node */ -static void __init find_max_pfn_node(int nid) +static void __init propagate_e820_map_node(int nid) { if (node_end_pfn[nid] > max_pfn) node_end_pfn[nid] = max_pfn; @@ -379,7 +379,7 @@ unsigned long __init setup_memory(void) printk("High memory starts at vaddr %08lx\n", (ulong) pfn_to_kaddr(highstart_pfn)); for_each_online_node(nid) - find_max_pfn_node(nid); + propagate_e820_map_node(nid); memset(NODE_DATA(0), 0, sizeof(struct pglist_data)); NODE_DATA(0)->bdata = &node0_bdata; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 1500dc8d63e..9ec62da85fd 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -1,5 +1,4 @@ /* - * linux/arch/i386/mm/init.c * * Copyright (C) 1995 Linus Torvalds * diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 1076097dcab..1ff7906a9a4 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -47,9 +47,6 @@ #include <asm/numa.h> #include <asm/cacheflush.h> -const struct dma_mapping_ops *dma_ops; -EXPORT_SYMBOL(dma_ops); - static unsigned long dma_reserve __initdata; DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index c590fd200e2..3a4baf95e24 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -134,7 +134,7 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, if (!phys_addr_valid(phys_addr)) { printk(KERN_WARNING "ioremap: invalid physical address %llx\n", - phys_addr); + (unsigned long long)phys_addr); WARN_ON_ONCE(1); return NULL; } @@ -187,7 +187,8 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, new_prot_val == _PAGE_CACHE_WB)) { pr_debug( "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n", - phys_addr, phys_addr + size, + (unsigned long long)phys_addr, + (unsigned long long)(phys_addr + size), prot_val, new_prot_val); free_memtype(phys_addr, phys_addr + size); return NULL; diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 7a2ebce87df..86808e666f9 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c @@ -164,7 +164,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) if (!found) return -1; - memnode_shift = compute_hash_shift(nodes, 8); + memnode_shift = compute_hash_shift(nodes, 8, NULL); if (memnode_shift < 0) { printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n"); return -1; diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 2ea56f48f29..9a6892200b2 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -60,7 +60,7 @@ unsigned long __initdata nodemap_size; * -1 if node overlap or lost ram (shift too big) */ static int __init populate_memnodemap(const struct bootnode *nodes, - int numnodes, int shift) + int numnodes, int shift, int *nodeids) { unsigned long addr, end; int i, res = -1; @@ -76,7 +76,12 @@ static int __init populate_memnodemap(const struct bootnode *nodes, do { if (memnodemap[addr >> shift] != NUMA_NO_NODE) return -1; - memnodemap[addr >> shift] = i; + + if (!nodeids) + memnodemap[addr >> shift] = i; + else + memnodemap[addr >> shift] = nodeids[i]; + addr += (1UL << shift); } while (addr < end); res = 1; @@ -139,7 +144,8 @@ static int __init extract_lsb_from_nodes(const struct bootnode *nodes, return i; } -int __init compute_hash_shift(struct bootnode *nodes, int numnodes) +int __init compute_hash_shift(struct bootnode *nodes, int numnodes, + int *nodeids) { int shift; @@ -149,7 +155,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes) printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", shift); - if (populate_memnodemap(nodes, numnodes, shift) != 1) { + if (populate_memnodemap(nodes, numnodes, shift, nodeids) != 1) { printk(KERN_INFO "Your memory is not aligned you need to " "rebuild your kernel with a bigger NODEMAPSIZE " "shift=%d\n", shift); @@ -380,9 +386,10 @@ static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr, * Sets up the system RAM area from start_pfn to end_pfn according to the * numa=fake command-line option. */ +static struct bootnode nodes[MAX_NUMNODES] __initdata; + static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) { - struct bootnode nodes[MAX_NUMNODES]; u64 size, addr = start_pfn << PAGE_SHIFT; u64 max_addr = end_pfn << PAGE_SHIFT; int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; @@ -462,7 +469,7 @@ done: } } out: - memnode_shift = compute_hash_shift(nodes, num_nodes); + memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); if (memnode_shift < 0) { memnode_shift = 0; printk(KERN_ERR "No NUMA hash function found. NUMA emulation " diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 3165ec0672b..6fb9e7c6893 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -1,7 +1,3 @@ -/* - * linux/arch/i386/mm/pgtable.c - */ - #include <linux/sched.h> #include <linux/kernel.h> #include <linux/errno.h> diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 1bae9c855ce..fb43d89f46f 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -32,6 +32,10 @@ static struct bootnode nodes_add[MAX_NUMNODES]; static int found_add_area __initdata; int hotadd_percent __initdata = 0; +static int num_node_memblks __initdata; +static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; +static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; + /* Too small nodes confuse the VM badly. Usually they result from BIOS bugs. */ #define NODE_MIN_SIZE (4*1024*1024) @@ -41,17 +45,17 @@ static __init int setup_node(int pxm) return acpi_map_pxm_to_node(pxm); } -static __init int conflicting_nodes(unsigned long start, unsigned long end) +static __init int conflicting_memblks(unsigned long start, unsigned long end) { int i; - for_each_node_mask(i, nodes_parsed) { - struct bootnode *nd = &nodes[i]; + for (i = 0; i < num_node_memblks; i++) { + struct bootnode *nd = &node_memblk_range[i]; if (nd->start == nd->end) continue; if (nd->end > start && nd->start < end) - return i; + return memblk_nodeid[i]; if (nd->end == end && nd->start == start) - return i; + return memblk_nodeid[i]; } return -1; } @@ -258,7 +262,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) bad_srat(); return; } - i = conflicting_nodes(start, end); + i = conflicting_memblks(start, end); if (i == node) { printk(KERN_WARNING "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n", @@ -283,10 +287,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) nd->end = end; } - printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, - nd->start, nd->end); - e820_register_active_regions(node, nd->start >> PAGE_SHIFT, - nd->end >> PAGE_SHIFT); + printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, + start, end); + e820_register_active_regions(node, start >> PAGE_SHIFT, + end >> PAGE_SHIFT); push_node_boundaries(node, nd->start >> PAGE_SHIFT, nd->end >> PAGE_SHIFT); @@ -298,6 +302,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) if ((nd->start | nd->end) == 0) node_clear(node, nodes_parsed); } + + node_memblk_range[num_node_memblks].start = start; + node_memblk_range[num_node_memblks].end = end; + memblk_nodeid[num_node_memblks] = node; + num_node_memblks++; } /* Sanity check to catch more bad SRATs (they are amazingly common). @@ -368,7 +377,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) return -1; } - memnode_shift = compute_hash_shift(nodes, MAX_NUMNODES); + memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, + memblk_nodeid); if (memnode_shift < 0) { printk(KERN_ERR "SRAT: No NUMA node hash function found. Contact maintainer\n"); diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 1f11cf0a307..cc48d3fde54 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -23,8 +23,8 @@ #include "op_x86_model.h" static struct op_x86_model_spec const *model; -static struct op_msrs cpu_msrs[NR_CPUS]; -static unsigned long saved_lvtpc[NR_CPUS]; +static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); +static DEFINE_PER_CPU(unsigned long, saved_lvtpc); static int nmi_start(void); static void nmi_stop(void); @@ -89,7 +89,7 @@ static int profile_exceptions_notify(struct notifier_block *self, switch (val) { case DIE_NMI: - if (model->check_ctrs(args->regs, &cpu_msrs[cpu])) + if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu))) ret = NOTIFY_STOP; break; default: @@ -126,7 +126,7 @@ static void nmi_cpu_save_registers(struct op_msrs *msrs) static void nmi_save_registers(void *dummy) { int cpu = smp_processor_id(); - struct op_msrs *msrs = &cpu_msrs[cpu]; + struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); nmi_cpu_save_registers(msrs); } @@ -134,10 +134,10 @@ static void free_msrs(void) { int i; for_each_possible_cpu(i) { - kfree(cpu_msrs[i].counters); - cpu_msrs[i].counters = NULL; - kfree(cpu_msrs[i].controls); - cpu_msrs[i].controls = NULL; + kfree(per_cpu(cpu_msrs, i).counters); + per_cpu(cpu_msrs, i).counters = NULL; + kfree(per_cpu(cpu_msrs, i).controls); + per_cpu(cpu_msrs, i).controls = NULL; } } @@ -149,13 +149,15 @@ static int allocate_msrs(void) int i; for_each_possible_cpu(i) { - cpu_msrs[i].counters = kmalloc(counters_size, GFP_KERNEL); - if (!cpu_msrs[i].counters) { + per_cpu(cpu_msrs, i).counters = kmalloc(counters_size, + GFP_KERNEL); + if (!per_cpu(cpu_msrs, i).counters) { success = 0; break; } - cpu_msrs[i].controls = kmalloc(controls_size, GFP_KERNEL); - if (!cpu_msrs[i].controls) { + per_cpu(cpu_msrs, i).controls = kmalloc(controls_size, + GFP_KERNEL); + if (!per_cpu(cpu_msrs, i).controls) { success = 0; break; } @@ -170,11 +172,11 @@ static int allocate_msrs(void) static void nmi_cpu_setup(void *dummy) { int cpu = smp_processor_id(); - struct op_msrs *msrs = &cpu_msrs[cpu]; + struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); spin_lock(&oprofilefs_lock); model->setup_ctrs(msrs); spin_unlock(&oprofilefs_lock); - saved_lvtpc[cpu] = apic_read(APIC_LVTPC); + per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC); apic_write(APIC_LVTPC, APIC_DM_NMI); } @@ -203,13 +205,15 @@ static int nmi_setup(void) */ /* Assume saved/restored counters are the same on all CPUs */ - model->fill_in_addresses(&cpu_msrs[0]); + model->fill_in_addresses(&per_cpu(cpu_msrs, 0)); for_each_possible_cpu(cpu) { if (cpu != 0) { - memcpy(cpu_msrs[cpu].counters, cpu_msrs[0].counters, + memcpy(per_cpu(cpu_msrs, cpu).counters, + per_cpu(cpu_msrs, 0).counters, sizeof(struct op_msr) * model->num_counters); - memcpy(cpu_msrs[cpu].controls, cpu_msrs[0].controls, + memcpy(per_cpu(cpu_msrs, cpu).controls, + per_cpu(cpu_msrs, 0).controls, sizeof(struct op_msr) * model->num_controls); } @@ -249,7 +253,7 @@ static void nmi_cpu_shutdown(void *dummy) { unsigned int v; int cpu = smp_processor_id(); - struct op_msrs *msrs = &cpu_msrs[cpu]; + struct op_msrs *msrs = &__get_cpu_var(cpu_msrs); /* restoring APIC_LVTPC can trigger an apic error because the delivery * mode and vector nr combination can be illegal. That's by design: on @@ -258,23 +262,24 @@ static void nmi_cpu_shutdown(void *dummy) */ v = apic_read(APIC_LVTERR); apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); - apic_write(APIC_LVTPC, saved_lvtpc[cpu]); + apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); apic_write(APIC_LVTERR, v); nmi_restore_registers(msrs); } static void nmi_shutdown(void) { + struct op_msrs *msrs = &__get_cpu_var(cpu_msrs); nmi_enabled = 0; on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1); unregister_die_notifier(&profile_exceptions_nb); - model->shutdown(cpu_msrs); + model->shutdown(msrs); free_msrs(); } static void nmi_cpu_start(void *dummy) { - struct op_msrs const *msrs = &cpu_msrs[smp_processor_id()]; + struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); model->start(msrs); } @@ -286,7 +291,7 @@ static int nmi_start(void) static void nmi_cpu_stop(void *dummy) { - struct op_msrs const *msrs = &cpu_msrs[smp_processor_id()]; + struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); model->stop(msrs); } diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 17a6b057856..b7ad9f89d21 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -37,7 +37,8 @@ $(obj)/%.so: OBJCOPYFLAGS := -S $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) -CFL := $(PROFILING) -mcmodel=small -fPIC -g0 -O2 -fasynchronous-unwind-tables -m64 +CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ + $(filter -g%,$(KBUILD_CFLAGS)) $(vobjs): KBUILD_CFLAGS += $(CFL) diff --git a/arch/x86/video/fbdev.c b/arch/x86/video/fbdev.c index 48fb38d7d2c..4db42bff8c6 100644 --- a/arch/x86/video/fbdev.c +++ b/arch/x86/video/fbdev.c @@ -1,5 +1,4 @@ /* - * arch/i386/video/fbdev.c - i386 Framebuffer * * Copyright (C) 2007 Antonino Daplas <adaplas@gmail.com> * |