aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig112
-rw-r--r--arch/x86/Kconfig.cpu31
-rw-r--r--arch/x86/boot/boot.h10
-rw-r--r--arch/x86/boot/compressed/head_32.S5
-rw-r--r--arch/x86/boot/compressed/misc.c10
-rw-r--r--arch/x86/boot/compressed/relocs.c2
-rw-r--r--arch/x86/boot/cpu.c3
-rw-r--r--arch/x86/boot/cpucheck.c18
-rw-r--r--arch/x86/boot/header.S1
-rw-r--r--arch/x86/boot/main.c5
-rw-r--r--arch/x86/boot/memory.c1
-rw-r--r--arch/x86/configs/i386_defconfig316
-rw-r--r--arch/x86/configs/x86_64_defconfig279
-rw-r--r--arch/x86/ia32/ia32_aout.c11
-rw-r--r--arch/x86/ia32/ia32_signal.c21
-rw-r--r--arch/x86/ia32/sys_ia32.c9
-rw-r--r--arch/x86/kernel/acpi/boot.c33
-rw-r--r--arch/x86/kernel/acpi/sleep.c4
-rw-r--r--arch/x86/kernel/alternative.c44
-rw-r--r--arch/x86/kernel/amd_iommu.c369
-rw-r--r--arch/x86/kernel/amd_iommu_init.c216
-rw-r--r--arch/x86/kernel/aperture_64.c6
-rw-r--r--arch/x86/kernel/apic_32.c8
-rw-r--r--arch/x86/kernel/apic_64.c7
-rw-r--r--arch/x86/kernel/apm_32.c4
-rw-r--r--arch/x86/kernel/bios_uv.c10
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c17
-rw-r--r--arch/x86/kernel/cpu/amd.c9
-rw-r--r--arch/x86/kernel/cpu/centaur.c11
-rw-r--r--arch/x86/kernel/cpu/common.c18
-rw-r--r--arch/x86/kernel/cpu/common_64.c87
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c109
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h3
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c2
-rw-r--r--arch/x86/kernel/cpu/cyrix.c50
-rw-r--r--arch/x86/kernel/cpu/feature_names.c3
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c5
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c18
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c15
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c4
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c279
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c94
-rw-r--r--arch/x86/kernel/cpuid.c16
-rw-r--r--arch/x86/kernel/crash_dump_64.c13
-rw-r--r--arch/x86/kernel/ds.c954
-rw-r--r--arch/x86/kernel/e820.c2
-rw-r--r--arch/x86/kernel/early-quirks.c18
-rw-r--r--arch/x86/kernel/efi.c6
-rw-r--r--arch/x86/kernel/efi_32.c4
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c10
-rw-r--r--arch/x86/kernel/head64.c6
-rw-r--r--arch/x86/kernel/head_32.S34
-rw-r--r--arch/x86/kernel/head_64.S4
-rw-r--r--arch/x86/kernel/hpet.c43
-rw-r--r--arch/x86/kernel/io_delay.c8
-rw-r--r--arch/x86/kernel/irq_32.c2
-rw-r--r--arch/x86/kernel/irq_64.c2
-rw-r--r--arch/x86/kernel/k8.c5
-rw-r--r--arch/x86/kernel/kdebugfs.c1
-rw-r--r--arch/x86/kernel/kgdb.c50
-rw-r--r--arch/x86/kernel/kvm.c2
-rw-r--r--arch/x86/kernel/machine_kexec_32.c20
-rw-r--r--arch/x86/kernel/mfgpt_32.c52
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c2
-rw-r--r--arch/x86/kernel/mpparse.c6
-rw-r--r--arch/x86/kernel/msr.c40
-rw-r--r--arch/x86/kernel/nmi.c39
-rw-r--r--arch/x86/kernel/numaq_32.c2
-rw-r--r--arch/x86/kernel/olpc.c6
-rw-r--r--arch/x86/kernel/paravirt.c3
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c2
-rw-r--r--arch/x86/kernel/pci-calgary_64.c34
-rw-r--r--arch/x86/kernel/pci-dma.c179
-rw-r--r--arch/x86/kernel/pci-gart_64.c162
-rw-r--r--arch/x86/kernel/pci-nommu.c10
-rw-r--r--arch/x86/kernel/pcspeaker.c13
-rw-r--r--arch/x86/kernel/process.c20
-rw-r--r--arch/x86/kernel/process_32.c67
-rw-r--r--arch/x86/kernel/process_64.c176
-rw-r--r--arch/x86/kernel/ptrace.c478
-rw-r--r--arch/x86/kernel/reboot.c6
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S10
-rw-r--r--arch/x86/kernel/setup.c39
-rw-r--r--arch/x86/kernel/sigframe.h5
-rw-r--r--arch/x86/kernel/signal_32.c11
-rw-r--r--arch/x86/kernel/signal_64.c111
-rw-r--r--arch/x86/kernel/smpboot.c71
-rw-r--r--arch/x86/kernel/smpcommon.c17
-rw-r--r--arch/x86/kernel/sys_x86_64.c43
-rw-r--r--arch/x86/kernel/tlb_uv.c3
-rw-r--r--arch/x86/kernel/traps_64.c66
-rw-r--r--arch/x86/kernel/tsc.c424
-rw-r--r--arch/x86/kernel/tsc_sync.c6
-rw-r--r--arch/x86/kernel/visws_quirks.c22
-rw-r--r--arch/x86/kernel/vmi_32.c12
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S8
-rw-r--r--arch/x86/kernel/vsmp_64.c2
-rw-r--r--arch/x86/kvm/mmu.c4
-rw-r--r--arch/x86/kvm/paging_tmpl.h2
-rw-r--r--arch/x86/kvm/svm.c12
-rw-r--r--arch/x86/kvm/vmx.c3
-rw-r--r--arch/x86/kvm/vmx.h2
-rw-r--r--arch/x86/lib/msr-on-cpu.c76
-rw-r--r--arch/x86/lib/string_32.c42
-rw-r--r--arch/x86/lib/strstr_32.c6
-rw-r--r--arch/x86/mach-rdc321x/platform.c1
-rw-r--r--arch/x86/mm/discontig_32.c2
-rw-r--r--arch/x86/mm/dump_pagetables.c4
-rw-r--r--arch/x86/mm/init_32.c88
-rw-r--r--arch/x86/mm/init_64.c154
-rw-r--r--arch/x86/mm/ioremap.c29
-rw-r--r--arch/x86/mm/mmio-mod.c4
-rw-r--r--arch/x86/mm/numa_64.c10
-rw-r--r--arch/x86/mm/pageattr-test.c12
-rw-r--r--arch/x86/mm/pageattr.c490
-rw-r--r--arch/x86/mm/pat.c182
-rw-r--r--arch/x86/mm/pgtable.c6
-rw-r--r--arch/x86/mm/pgtable_32.c3
-rw-r--r--arch/x86/mm/srat_32.c12
-rw-r--r--arch/x86/oprofile/nmi_int.c43
-rw-r--r--arch/x86/oprofile/op_model_p4.c175
-rw-r--r--arch/x86/pci/amd_bus.c52
-rw-r--r--arch/x86/pci/i386.c9
-rw-r--r--arch/x86/pci/irq.c69
-rw-r--r--arch/x86/pci/legacy.c2
-rw-r--r--arch/x86/pci/mmconfig-shared.c67
-rw-r--r--arch/x86/power/cpu_32.c6
-rw-r--r--arch/x86/power/hibernate_asm_32.S40
-rw-r--r--arch/x86/xen/enlighten.c22
-rw-r--r--arch/x86/xen/setup.c2
132 files changed, 4730 insertions, 2512 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 53ab3687873..44d4f2130d0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -29,6 +29,7 @@ config X86
select HAVE_FTRACE
select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
select HAVE_ARCH_KGDB if !X86_VOYAGER
+ select HAVE_ARCH_TRACEHOOK
select HAVE_GENERIC_DMA_COHERENT if X86_32
select HAVE_EFFICIENT_UNALIGNED_ACCESS
@@ -553,6 +554,7 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
config AMD_IOMMU
bool "AMD IOMMU support"
select SWIOTLB
+ select PCI_MSI
depends on X86_64 && PCI && ACPI
help
With this option you can enable support for AMD IOMMU hardware in
@@ -577,35 +579,29 @@ config SWIOTLB
config IOMMU_HELPER
def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
+
config MAXSMP
bool "Configure Maximum number of SMP Processors and NUMA Nodes"
- depends on X86_64 && SMP
+ depends on X86_64 && SMP && BROKEN
default n
help
Configure maximum number of CPUS and NUMA Nodes for this architecture.
If unsure, say N.
-if MAXSMP
config NR_CPUS
- int
- default "4096"
-endif
-
-if !MAXSMP
-config NR_CPUS
- int "Maximum number of CPUs (2-4096)"
- range 2 4096
+ int "Maximum number of CPUs (2-512)" if !MAXSMP
+ range 2 512
depends on SMP
+ default "4096" if MAXSMP
default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000
default "8"
help
This allows you to specify the maximum number of CPUs which this
- kernel will support. The maximum supported value is 4096 and the
+ kernel will support. The maximum supported value is 512 and the
minimum value which makes sense is 2.
This is purely to save memory - each supported CPU adds
approximately eight kilobytes to the kernel image.
-endif
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
@@ -951,9 +947,9 @@ config NUMA
local memory controller of the CPU and add some more
NUMA awareness to the kernel.
- For i386 this is currently highly experimental and should be only
+ For 32-bit this is currently highly experimental and should be only
used for kernel development. It might also cause boot failures.
- For x86_64 this is recommended on all multiprocessor Opteron systems.
+ For 64-bit this is recommended on all multiprocessor Opteron systems.
If the system is EM64T, you should say N unless your system is
EM64T NUMA.
@@ -996,17 +992,10 @@ config NUMA_EMU
into virtual nodes when booted with "numa=fake=N", where N is the
number of nodes. This is only useful for debugging.
-if MAXSMP
-
config NODES_SHIFT
- int
- default "9"
-endif
-
-if !MAXSMP
-config NODES_SHIFT
- int "Maximum NUMA Nodes (as a power of 2)"
+ int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP
range 1 9 if X86_64
+ default "9" if MAXSMP
default "6" if X86_64
default "4" if X86_NUMAQ
default "3"
@@ -1014,7 +1003,6 @@ config NODES_SHIFT
help
Specify the maximum number of NUMA Nodes available on the target
system. Increases memory reserved to accomodate various tables.
-endif
config HAVE_ARCH_BOOTMEM_NODE
def_bool y
@@ -1034,7 +1022,7 @@ config HAVE_ARCH_ALLOC_REMAP
config ARCH_FLATMEM_ENABLE
def_bool y
- depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC && !NUMA
+ depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && !NUMA
config ARCH_DISCONTIGMEM_ENABLE
def_bool y
@@ -1050,7 +1038,7 @@ config ARCH_SPARSEMEM_DEFAULT
config ARCH_SPARSEMEM_ENABLE
def_bool y
- depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC)
+ depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC) || X86_GENERICARCH
select SPARSEMEM_STATIC if X86_32
select SPARSEMEM_VMEMMAP_ENABLE if X86_64
@@ -1131,10 +1119,10 @@ config MTRR
You can safely say Y even if your machine doesn't have MTRRs, you'll
just add about 9 KB to your kernel.
- See <file:Documentation/mtrr.txt> for more information.
+ See <file:Documentation/x86/mtrr.txt> for more information.
config MTRR_SANITIZER
- bool
+ def_bool y
prompt "MTRR cleanup support"
depends on MTRR
help
@@ -1145,7 +1133,7 @@ config MTRR_SANITIZER
The largest mtrr entry size for a continous block can be set with
mtrr_chunk_size.
- If unsure, say N.
+ If unsure, say Y.
config MTRR_SANITIZER_ENABLE_DEFAULT
int "MTRR cleanup enable value (0-1)"
@@ -1205,7 +1193,6 @@ config IRQBALANCE
config SECCOMP
def_bool y
prompt "Enable seccomp to safely compute untrusted bytecode"
- depends on PROC_FS
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
@@ -1213,7 +1200,7 @@ config SECCOMP
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
- enabled via /proc/<pid>/seccomp, it cannot be disabled
+ enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
@@ -1263,7 +1250,7 @@ config KEXEC
strongly in flux, so no good recommendation can be made.
config CRASH_DUMP
- bool "kernel crash dumps (EXPERIMENTAL)"
+ bool "kernel crash dumps"
depends on X86_64 || (X86_32 && HIGHMEM)
help
Generate crash dump after being started by kexec.
@@ -1370,14 +1357,14 @@ config PHYSICAL_ALIGN
Don't change this unless you know what you are doing.
config HOTPLUG_CPU
- bool "Support for suspend on SMP and hot-pluggable CPUs (EXPERIMENTAL)"
- depends on SMP && HOTPLUG && EXPERIMENTAL && !X86_VOYAGER
+ bool "Support for hot-pluggable CPUs"
+ depends on SMP && HOTPLUG && !X86_VOYAGER
---help---
- Say Y here to experiment with turning CPUs off and on, and to
- enable suspend on SMP systems. CPUs can be controlled through
- /sys/devices/system/cpu.
- Say N if you want to disable CPU hotplug and don't need to
- suspend.
+ Say Y here to allow turning CPUs off and on. CPUs can be
+ controlled through /sys/devices/system/cpu.
+ ( Note: power management support will enable this option
+ automatically on SMP systems. )
+ Say N if you want to disable CPU hotplug.
config COMPAT_VDSO
def_bool y
@@ -1392,6 +1379,51 @@ config COMPAT_VDSO
If unsure, say Y.
+config CMDLINE_BOOL
+ bool "Built-in kernel command line"
+ default n
+ help
+ Allow for specifying boot arguments to the kernel at
+ build time. On some systems (e.g. embedded ones), it is
+ necessary or convenient to provide some or all of the
+ kernel boot arguments with the kernel itself (that is,
+ to not rely on the boot loader to provide them.)
+
+ To compile command line arguments into the kernel,
+ set this option to 'Y', then fill in the
+ the boot arguments in CONFIG_CMDLINE.
+
+ Systems with fully functional boot loaders (i.e. non-embedded)
+ should leave this option set to 'N'.
+
+config CMDLINE
+ string "Built-in kernel command string"
+ depends on CMDLINE_BOOL
+ default ""
+ help
+ Enter arguments here that should be compiled into the kernel
+ image and used at boot time. If the boot loader provides a
+ command line at boot time, it is appended to this string to
+ form the full kernel command line, when the system boots.
+
+ However, you can use the CONFIG_CMDLINE_OVERRIDE option to
+ change this behavior.
+
+ In most cases, the command line (whether built-in or provided
+ by the boot loader) should specify the device for the root
+ file system.
+
+config CMDLINE_OVERRIDE
+ bool "Built-in command line overrides boot loader arguments"
+ default n
+ depends on CMDLINE_BOOL
+ help
+ Set this option to 'Y' to have the kernel ignore the boot loader
+ command line, and use ONLY the built-in command line.
+
+ This is used to work around broken boot loaders. This should
+ be set to 'N' under normal conditions.
+
endmenu
config ARCH_ENABLE_MEMORY_HOTPLUG
@@ -1795,7 +1827,7 @@ config COMPAT_FOR_U64_ALIGNMENT
config SYSVIPC_COMPAT
def_bool y
- depends on X86_64 && COMPAT && SYSVIPC
+ depends on COMPAT && SYSVIPC
endmenu
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 2c518fbc52e..60a85768cfc 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -382,14 +382,17 @@ config X86_OOSTORE
# P6_NOPs are a relatively minor optimization that require a family >=
# 6 processor, except that it is broken on certain VIA chips.
# Furthermore, AMD chips prefer a totally different sequence of NOPs
-# (which work on all CPUs). As a result, disallow these if we're
-# compiling X86_GENERIC but not X86_64 (these NOPs do work on all
-# x86-64 capable chips); the list of processors in the right-hand clause
-# are the cores that benefit from this optimization.
+# (which work on all CPUs). In addition, it looks like Virtual PC
+# does not understand them.
+#
+# As a result, disallow these if we're not compiling for X86_64 (these
+# NOPs do work on all x86-64 capable chips); the list of processors in
+# the right-hand clause are the cores that benefit from this optimization.
#
config X86_P6_NOP
def_bool y
- depends on (X86_64 || !X86_GENERIC) && (M686 || MPENTIUMII || MPENTIUMIII || MPENTIUMM || MCORE2 || MPENTIUM4 || MPSC)
+ depends on X86_64
+ depends on (MCORE2 || MPENTIUM4 || MPSC)
config X86_TSC
def_bool y
@@ -415,3 +418,21 @@ config X86_MINIMUM_CPU_FAMILY
config X86_DEBUGCTLMSR
def_bool y
depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386)
+
+config X86_DS
+ bool "Debug Store support"
+ default y
+ help
+ Add support for Debug Store.
+ This allows the kernel to provide a memory buffer to the hardware
+ to store various profiling and tracing events.
+
+config X86_PTRACE_BTS
+ bool "ptrace interface to Branch Trace Store"
+ default y
+ depends on (X86_DS && X86_DEBUGCTLMSR)
+ help
+ Add a ptrace interface to allow collecting an execution trace
+ of the traced task.
+ This collects control flow changes in a (cyclic) buffer and allows
+ debuggers to fill in the gaps and show an execution trace of the debuggee.
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index a34b9982c7c..cc0ef13fba7 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -24,10 +24,14 @@
#include <linux/edd.h>
#include <asm/boot.h>
#include <asm/setup.h>
+#include "bitops.h"
+#include <asm/cpufeature.h>
/* Useful macros */
#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+
extern struct setup_header hdr;
extern struct boot_params boot_params;
@@ -242,6 +246,12 @@ int cmdline_find_option(const char *option, char *buffer, int bufsize);
int cmdline_find_option_bool(const char *option);
/* cpu.c, cpucheck.c */
+struct cpu_features {
+ int level; /* Family, or 64 for x86-64 */
+ int model;
+ u32 flags[NCAPINTS];
+};
+extern struct cpu_features cpu;
int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr);
int validate_cpu(void);
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index ba7736cf2ec..29c5fbf0839 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -137,14 +137,15 @@ relocated:
*/
movl output_len(%ebx), %eax
pushl %eax
+ # push arguments for decompress_kernel:
pushl %ebp # output address
movl input_len(%ebx), %eax
pushl %eax # input_len
leal input_data(%ebx), %eax
pushl %eax # input_data
leal boot_heap(%ebx), %eax
- pushl %eax # heap area as third argument
- pushl %esi # real mode pointer as second arg
+ pushl %eax # heap area
+ pushl %esi # real mode pointer
call decompress_kernel
addl $20, %esp
popl %ecx
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index aaf5a2131ef..5780d361105 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -27,7 +27,7 @@
#include <linux/linkage.h>
#include <linux/screen_info.h>
#include <linux/elf.h>
-#include <asm/io.h>
+#include <linux/io.h>
#include <asm/page.h>
#include <asm/boot.h>
#include <asm/bootparam.h>
@@ -251,7 +251,7 @@ static void __putstr(int error, const char *s)
y--;
}
} else {
- vidmem [(x + cols * y) * 2] = c;
+ vidmem[(x + cols * y) * 2] = c;
if (++x >= cols) {
x = 0;
if (++y >= lines) {
@@ -277,7 +277,8 @@ static void *memset(void *s, int c, unsigned n)
int i;
char *ss = s;
- for (i = 0; i < n; i++) ss[i] = c;
+ for (i = 0; i < n; i++)
+ ss[i] = c;
return s;
}
@@ -287,7 +288,8 @@ static void *memcpy(void *dest, const void *src, unsigned n)
const char *s = src;
char *d = dest;
- for (i = 0; i < n; i++) d[i] = s[i];
+ for (i = 0; i < n; i++)
+ d[i] = s[i];
return dest;
}
diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c
index a1310c52fc0..857e492c571 100644
--- a/arch/x86/boot/compressed/relocs.c
+++ b/arch/x86/boot/compressed/relocs.c
@@ -492,7 +492,7 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym))
continue;
}
sh_symtab = sec_symtab->symtab;
- sym_strtab = sec->link->strtab;
+ sym_strtab = sec_symtab->link->strtab;
for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) {
Elf32_Rel *rel;
Elf32_Sym *sym;
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
index 92d6fd73dc7..75298fe2edc 100644
--- a/arch/x86/boot/cpu.c
+++ b/arch/x86/boot/cpu.c
@@ -16,9 +16,6 @@
*/
#include "boot.h"
-#include "bitops.h"
-#include <asm/cpufeature.h>
-
#include "cpustr.h"
static char *cpu_name(int level)
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
index 7804389ee00..4d3ff037201 100644
--- a/arch/x86/boot/cpucheck.c
+++ b/arch/x86/boot/cpucheck.c
@@ -22,21 +22,13 @@
#ifdef _SETUP
# include "boot.h"
-# include "bitops.h"
#endif
#include <linux/types.h>
-#include <asm/cpufeature.h>
#include <asm/processor-flags.h>
#include <asm/required-features.h>
#include <asm/msr-index.h>
-struct cpu_features {
- int level; /* Family, or 64 for x86-64 */
- int model;
- u32 flags[NCAPINTS];
-};
-
-static struct cpu_features cpu;
+struct cpu_features cpu;
static u32 cpu_vendor[3];
static u32 err_flags[NCAPINTS];
@@ -46,12 +38,12 @@ static const u32 req_flags[NCAPINTS] =
{
REQUIRED_MASK0,
REQUIRED_MASK1,
- REQUIRED_MASK2,
- REQUIRED_MASK3,
+ 0, /* REQUIRED_MASK2 not implemented in this file */
+ 0, /* REQUIRED_MASK3 not implemented in this file */
REQUIRED_MASK4,
- REQUIRED_MASK5,
+ 0, /* REQUIRED_MASK5 not implemented in this file */
REQUIRED_MASK6,
- REQUIRED_MASK7,
+ 0, /* REQUIRED_MASK7 not implemented in this file */
};
#define A32(a, b, c, d) (((d) << 24)+((c) << 16)+((b) << 8)+(a))
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index af86e431acf..b993062e9a5 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -30,7 +30,6 @@ SYSSEG = DEF_SYSSEG /* system loaded at 0x10000 (65536) */
SYSSIZE = DEF_SYSSIZE /* system size: # of 16-byte clicks */
/* to be loaded */
ROOT_DEV = 0 /* ROOT_DEV is now written by "build" */
-SWAP_DEV = 0 /* SWAP_DEV is now written by "build" */
#ifndef SVGA_MODE
#define SVGA_MODE ASK_VGA
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 2296164b54d..197421db1af 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -73,6 +73,11 @@ static void keyboard_set_repeat(void)
*/
static void query_ist(void)
{
+ /* Some older BIOSes apparently crash on this call, so filter
+ it from machines too old to have SpeedStep at all. */
+ if (cpu.level < 6)
+ return;
+
asm("int $0x15"
: "=a" (boot_params.ist_info.signature),
"=b" (boot_params.ist_info.command),
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index 53165c97336..8c3c25f3557 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -13,7 +13,6 @@
*/
#include "boot.h"
-#include <linux/kernel.h>
#define SMAP 0x534d4150 /* ASCII "SMAP" */
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 4d73f53287b..ef9a52005ec 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1,13 +1,13 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.26-rc1
-# Sun May 4 19:59:02 2008
+# Linux kernel version: 2.6.27-rc5
+# Wed Sep 3 17:23:09 2008
#
# CONFIG_64BIT is not set
CONFIG_X86_32=y
# CONFIG_X86_64 is not set
CONFIG_X86=y
-CONFIG_DEFCONFIG_LIST="arch/x86/configs/i386_defconfig"
+CONFIG_ARCH_DEFCONFIG="arch/x86/configs/i386_defconfig"
# CONFIG_GENERIC_LOCKBREAK is not set
CONFIG_GENERIC_TIME=y
CONFIG_GENERIC_CMOS_UPDATE=y
@@ -53,6 +53,7 @@ CONFIG_X86_HT=y
CONFIG_X86_BIOS_REBOOT=y
CONFIG_X86_TRAMPOLINE=y
CONFIG_KTIME_SCALAR=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
#
# General setup
@@ -82,6 +83,7 @@ CONFIG_CGROUPS=y
CONFIG_CGROUP_NS=y
# CONFIG_CGROUP_DEVICE is not set
CONFIG_CPUSETS=y
+CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
CONFIG_GROUP_SCHED=y
CONFIG_FAIR_GROUP_SCHED=y
# CONFIG_RT_GROUP_SCHED is not set
@@ -105,7 +107,6 @@ CONFIG_SYSCTL=y
# CONFIG_EMBEDDED is not set
CONFIG_UID16=y
CONFIG_SYSCTL_SYSCALL=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_KALLSYMS=y
CONFIG_KALLSYMS_ALL=y
CONFIG_KALLSYMS_EXTRA_PASS=y
@@ -113,6 +114,7 @@ CONFIG_HOTPLUG=y
CONFIG_PRINTK=y
CONFIG_BUG=y
CONFIG_ELF_CORE=y
+CONFIG_PCSPKR_PLATFORM=y
# CONFIG_COMPAT_BRK is not set
CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
@@ -132,27 +134,35 @@ CONFIG_MARKERS=y
# CONFIG_OPROFILE is not set
CONFIG_HAVE_OPROFILE=y
CONFIG_KPROBES=y
+CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
CONFIG_KRETPROBES=y
+CONFIG_HAVE_IOREMAP_PROT=y
CONFIG_HAVE_KPROBES=y
CONFIG_HAVE_KRETPROBES=y
+# CONFIG_HAVE_ARCH_TRACEHOOK is not set
# CONFIG_HAVE_DMA_ATTRS is not set
+CONFIG_USE_GENERIC_SMP_HELPERS=y
+# CONFIG_HAVE_CLK is not set
CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_HAVE_GENERIC_DMA_COHERENT=y
CONFIG_SLABINFO=y
CONFIG_RT_MUTEXES=y
# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_MODVERSIONS is not set
# CONFIG_MODULE_SRCVERSION_ALL is not set
-# CONFIG_KMOD is not set
+CONFIG_KMOD=y
CONFIG_STOP_MACHINE=y
CONFIG_BLOCK=y
# CONFIG_LBD is not set
CONFIG_BLK_DEV_IO_TRACE=y
# CONFIG_LSF is not set
CONFIG_BLK_DEV_BSG=y
+# CONFIG_BLK_DEV_INTEGRITY is not set
#
# IO Schedulers
@@ -176,25 +186,23 @@ CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
CONFIG_SMP=y
+CONFIG_X86_FIND_SMP_CONFIG=y
+CONFIG_X86_MPPARSE=y
CONFIG_X86_PC=y
# CONFIG_X86_ELAN is not set
# CONFIG_X86_VOYAGER is not set
-# CONFIG_X86_NUMAQ is not set
-# CONFIG_X86_SUMMIT is not set
-# CONFIG_X86_BIGSMP is not set
-# CONFIG_X86_VISWS is not set
# CONFIG_X86_GENERICARCH is not set
-# CONFIG_X86_ES7000 is not set
-# CONFIG_X86_RDC321X is not set
# CONFIG_X86_VSMP is not set
+# CONFIG_X86_RDC321X is not set
CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
# CONFIG_PARAVIRT_GUEST is not set
+# CONFIG_MEMTEST is not set
# CONFIG_M386 is not set
# CONFIG_M486 is not set
# CONFIG_M586 is not set
# CONFIG_M586TSC is not set
# CONFIG_M586MMX is not set
-# CONFIG_M686 is not set
+CONFIG_M686=y
# CONFIG_MPENTIUMII is not set
# CONFIG_MPENTIUMIII is not set
# CONFIG_MPENTIUMM is not set
@@ -213,30 +221,30 @@ CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
# CONFIG_MVIAC3_2 is not set
# CONFIG_MVIAC7 is not set
# CONFIG_MPSC is not set
-CONFIG_MCORE2=y
+# CONFIG_MCORE2 is not set
# CONFIG_GENERIC_CPU is not set
-# CONFIG_X86_GENERIC is not set
+CONFIG_X86_GENERIC=y
CONFIG_X86_CPU=y
CONFIG_X86_CMPXCHG=y
-CONFIG_X86_L1_CACHE_SHIFT=6
+CONFIG_X86_L1_CACHE_SHIFT=7
CONFIG_X86_XADD=y
+# CONFIG_X86_PPRO_FENCE is not set
CONFIG_X86_WP_WORKS_OK=y
CONFIG_X86_INVLPG=y
CONFIG_X86_BSWAP=y
CONFIG_X86_POPAD_OK=y
-CONFIG_X86_GOOD_APIC=y
CONFIG_X86_INTEL_USERCOPY=y
CONFIG_X86_USE_PPRO_CHECKSUM=y
-CONFIG_X86_P6_NOP=y
CONFIG_X86_TSC=y
-CONFIG_X86_MINIMUM_CPU_FAMILY=6
+CONFIG_X86_CMOV=y
+CONFIG_X86_MINIMUM_CPU_FAMILY=4
CONFIG_X86_DEBUGCTLMSR=y
CONFIG_HPET_TIMER=y
CONFIG_HPET_EMULATE_RTC=y
CONFIG_DMI=y
# CONFIG_IOMMU_HELPER is not set
-CONFIG_NR_CPUS=4
-# CONFIG_SCHED_SMT is not set
+CONFIG_NR_CPUS=64
+CONFIG_SCHED_SMT=y
CONFIG_SCHED_MC=y
# CONFIG_PREEMPT_NONE is not set
CONFIG_PREEMPT_VOLUNTARY=y
@@ -247,8 +255,9 @@ CONFIG_X86_IO_APIC=y
CONFIG_VM86=y
# CONFIG_TOSHIBA is not set
# CONFIG_I8K is not set
-# CONFIG_X86_REBOOTFIXUPS is not set
-# CONFIG_MICROCODE is not set
+CONFIG_X86_REBOOTFIXUPS=y
+CONFIG_MICROCODE=y
+CONFIG_MICROCODE_OLD_INTERFACE=y
CONFIG_X86_MSR=y
CONFIG_X86_CPUID=y
# CONFIG_NOHIGHMEM is not set
@@ -256,32 +265,28 @@ CONFIG_HIGHMEM4G=y
# CONFIG_HIGHMEM64G is not set
CONFIG_PAGE_OFFSET=0xC0000000
CONFIG_HIGHMEM=y
-CONFIG_NEED_NODE_MEMMAP_SIZE=y
CONFIG_ARCH_FLATMEM_ENABLE=y
CONFIG_ARCH_SPARSEMEM_ENABLE=y
CONFIG_ARCH_SELECT_MEMORY_MODEL=y
CONFIG_SELECT_MEMORY_MODEL=y
-# CONFIG_FLATMEM_MANUAL is not set
+CONFIG_FLATMEM_MANUAL=y
# CONFIG_DISCONTIGMEM_MANUAL is not set
-CONFIG_SPARSEMEM_MANUAL=y
-CONFIG_SPARSEMEM=y
-CONFIG_HAVE_MEMORY_PRESENT=y
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
CONFIG_SPARSEMEM_STATIC=y
# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
-
-#
-# Memory hotplug is currently incompatible with Software Suspend
-#
CONFIG_PAGEFLAGS_EXTENDED=y
CONFIG_SPLIT_PTLOCK_CPUS=4
CONFIG_RESOURCES_64BIT=y
CONFIG_ZONE_DMA_FLAG=1
CONFIG_BOUNCE=y
CONFIG_VIRT_TO_BUS=y
-# CONFIG_HIGHPTE is not set
+CONFIG_HIGHPTE=y
# CONFIG_MATH_EMULATION is not set
CONFIG_MTRR=y
-# CONFIG_X86_PAT is not set
+# CONFIG_MTRR_SANITIZER is not set
+CONFIG_X86_PAT=y
CONFIG_EFI=y
# CONFIG_IRQBALANCE is not set
CONFIG_SECCOMP=y
@@ -293,6 +298,7 @@ CONFIG_HZ=1000
CONFIG_SCHED_HRTICK=y
CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
+# CONFIG_KEXEC_JUMP is not set
CONFIG_PHYSICAL_START=0x1000000
CONFIG_RELOCATABLE=y
CONFIG_PHYSICAL_ALIGN=0x200000
@@ -312,6 +318,7 @@ CONFIG_PM_TRACE_RTC=y
CONFIG_PM_SLEEP_SMP=y
CONFIG_PM_SLEEP=y
CONFIG_SUSPEND=y
+# CONFIG_PM_TEST_SUSPEND is not set
CONFIG_SUSPEND_FREEZER=y
CONFIG_HIBERNATION=y
CONFIG_PM_STD_PARTITION=""
@@ -337,6 +344,7 @@ CONFIG_ACPI_THERMAL=y
CONFIG_ACPI_BLACKLIST_YEAR=0
# CONFIG_ACPI_DEBUG is not set
CONFIG_ACPI_EC=y
+# CONFIG_ACPI_PCI_SLOT is not set
CONFIG_ACPI_POWER=y
CONFIG_ACPI_SYSTEM=y
CONFIG_X86_PM_TIMER=y
@@ -395,8 +403,8 @@ CONFIG_PCI=y
# CONFIG_PCI_GOBIOS is not set
# CONFIG_PCI_GOMMCONFIG is not set
# CONFIG_PCI_GODIRECT is not set
-CONFIG_PCI_GOANY=y
# CONFIG_PCI_GOOLPC is not set
+CONFIG_PCI_GOANY=y
CONFIG_PCI_BIOS=y
CONFIG_PCI_DIRECT=y
CONFIG_PCI_MMCONFIG=y
@@ -448,10 +456,6 @@ CONFIG_HOTPLUG_PCI=y
CONFIG_BINFMT_ELF=y
# CONFIG_BINFMT_AOUT is not set
CONFIG_BINFMT_MISC=y
-
-#
-# Networking
-#
CONFIG_NET=y
#
@@ -475,7 +479,10 @@ CONFIG_IP_FIB_HASH=y
CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_ROUTE_MULTIPATH=y
CONFIG_IP_ROUTE_VERBOSE=y
-# CONFIG_IP_PNP is not set
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
# CONFIG_NET_IPIP is not set
# CONFIG_NET_IPGRE is not set
CONFIG_IP_MROUTE=y
@@ -618,7 +625,6 @@ CONFIG_NET_SCHED=y
# CONFIG_NET_SCH_HTB is not set
# CONFIG_NET_SCH_HFSC is not set
# CONFIG_NET_SCH_PRIO is not set
-# CONFIG_NET_SCH_RR is not set
# CONFIG_NET_SCH_RED is not set
# CONFIG_NET_SCH_SFQ is not set
# CONFIG_NET_SCH_TEQL is not set
@@ -680,28 +686,19 @@ CONFIG_FIB_RULES=y
CONFIG_CFG80211=y
CONFIG_NL80211=y
CONFIG_WIRELESS_EXT=y
+CONFIG_WIRELESS_EXT_SYSFS=y
CONFIG_MAC80211=y
#
# Rate control algorithm selection
#
+CONFIG_MAC80211_RC_PID=y
CONFIG_MAC80211_RC_DEFAULT_PID=y
-# CONFIG_MAC80211_RC_DEFAULT_NONE is not set
-
-#
-# Selecting 'y' for an algorithm will
-#
-
-#
-# build the algorithm into mac80211.
-#
CONFIG_MAC80211_RC_DEFAULT="pid"
-CONFIG_MAC80211_RC_PID=y
# CONFIG_MAC80211_MESH is not set
CONFIG_MAC80211_LEDS=y
# CONFIG_MAC80211_DEBUGFS is not set
-# CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT is not set
-# CONFIG_MAC80211_DEBUG is not set
+# CONFIG_MAC80211_DEBUG_MENU is not set
# CONFIG_IEEE80211 is not set
# CONFIG_RFKILL is not set
# CONFIG_NET_9P is not set
@@ -717,6 +714,8 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_STANDALONE=y
CONFIG_PREVENT_FIRMWARE_BUILD=y
CONFIG_FW_LOADER=y
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_EXTRA_FIRMWARE=""
# CONFIG_DEBUG_DRIVER is not set
CONFIG_DEBUG_DEVRES=y
# CONFIG_SYS_HYPERVISOR is not set
@@ -749,6 +748,7 @@ CONFIG_BLK_DEV_RAM_SIZE=16384
# CONFIG_BLK_DEV_XIP is not set
# CONFIG_CDROM_PKTCDVD is not set
# CONFIG_ATA_OVER_ETH is not set
+# CONFIG_BLK_DEV_HD is not set
CONFIG_MISC_DEVICES=y
# CONFIG_IBM_ASM is not set
# CONFIG_PHANTOM is not set
@@ -760,10 +760,12 @@ CONFIG_MISC_DEVICES=y
# CONFIG_FUJITSU_LAPTOP is not set
# CONFIG_TC1100_WMI is not set
# CONFIG_MSI_LAPTOP is not set
+# CONFIG_COMPAL_LAPTOP is not set
# CONFIG_SONY_LAPTOP is not set
# CONFIG_THINKPAD_ACPI is not set
# CONFIG_INTEL_MENLOW is not set
# CONFIG_ENCLOSURE_SERVICES is not set
+# CONFIG_HP_ILO is not set
CONFIG_HAVE_IDE=y
# CONFIG_IDE is not set
@@ -802,12 +804,13 @@ CONFIG_SCSI_WAIT_SCAN=m
#
CONFIG_SCSI_SPI_ATTRS=y
# CONFIG_SCSI_FC_ATTRS is not set
-# CONFIG_SCSI_ISCSI_ATTRS is not set
+CONFIG_SCSI_ISCSI_ATTRS=y
# CONFIG_SCSI_SAS_ATTRS is not set
# CONFIG_SCSI_SAS_LIBSAS is not set
# CONFIG_SCSI_SRP_ATTRS is not set
# CONFIG_SCSI_LOWLEVEL is not set
# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set
+# CONFIG_SCSI_DH is not set
CONFIG_ATA=y
# CONFIG_ATA_NONSTANDARD is not set
CONFIG_ATA_ACPI=y
@@ -842,7 +845,7 @@ CONFIG_PATA_AMD=y
# CONFIG_PATA_CS5536 is not set
# CONFIG_PATA_CYPRESS is not set
# CONFIG_PATA_EFAR is not set
-# CONFIG_ATA_GENERIC is not set
+CONFIG_ATA_GENERIC=y
# CONFIG_PATA_HPT366 is not set
# CONFIG_PATA_HPT37X is not set
# CONFIG_PATA_HPT3X2N is not set
@@ -852,7 +855,7 @@ CONFIG_PATA_AMD=y
# CONFIG_PATA_JMICRON is not set
# CONFIG_PATA_TRIFLEX is not set
# CONFIG_PATA_MARVELL is not set
-# CONFIG_PATA_MPIIX is not set
+CONFIG_PATA_MPIIX=y
CONFIG_PATA_OLDPIIX=y
# CONFIG_PATA_NETCELL is not set
# CONFIG_PATA_NINJA32 is not set
@@ -871,6 +874,7 @@ CONFIG_PATA_OLDPIIX=y
# CONFIG_PATA_SIS is not set
# CONFIG_PATA_VIA is not set
# CONFIG_PATA_WINBOND is not set
+CONFIG_PATA_SCH=y
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
# CONFIG_MD_LINEAR is not set
@@ -894,13 +898,16 @@ CONFIG_DM_ZERO=y
#
# IEEE 1394 (FireWire) support
#
+
+#
+# Enable only one of the two stacks, unless you know what you are doing
+#
# CONFIG_FIREWIRE is not set
# CONFIG_IEEE1394 is not set
# CONFIG_I2O is not set
CONFIG_MACINTOSH_DRIVERS=y
CONFIG_MAC_EMUMOUSEBTN=y
CONFIG_NETDEVICES=y
-# CONFIG_NETDEVICES_MULTIQUEUE is not set
# CONFIG_IFB is not set
# CONFIG_DUMMY is not set
# CONFIG_BONDING is not set
@@ -910,7 +917,23 @@ CONFIG_NETDEVICES=y
# CONFIG_VETH is not set
# CONFIG_NET_SB1000 is not set
# CONFIG_ARCNET is not set
-# CONFIG_PHYLIB is not set
+CONFIG_PHYLIB=y
+
+#
+# MII PHY device drivers
+#
+# CONFIG_MARVELL_PHY is not set
+# CONFIG_DAVICOM_PHY is not set
+# CONFIG_QSEMI_PHY is not set
+# CONFIG_LXT_PHY is not set
+# CONFIG_CICADA_PHY is not set
+# CONFIG_VITESSE_PHY is not set
+# CONFIG_SMSC_PHY is not set
+# CONFIG_BROADCOM_PHY is not set
+# CONFIG_ICPLUS_PHY is not set
+# CONFIG_REALTEK_PHY is not set
+# CONFIG_FIXED_PHY is not set
+# CONFIG_MDIO_BITBANG is not set
CONFIG_NET_ETHERNET=y
CONFIG_MII=y
# CONFIG_HAPPYMEAL is not set
@@ -943,10 +966,10 @@ CONFIG_FORCEDETH=y
CONFIG_E100=y
# CONFIG_FEALNX is not set
# CONFIG_NATSEMI is not set
-# CONFIG_NE2K_PCI is not set
+CONFIG_NE2K_PCI=y
# CONFIG_8139CP is not set
CONFIG_8139TOO=y
-CONFIG_8139TOO_PIO=y
+# CONFIG_8139TOO_PIO is not set
# CONFIG_8139TOO_TUNE_TWISTER is not set
# CONFIG_8139TOO_8129 is not set
# CONFIG_8139_OLD_RX_RESET is not set
@@ -961,25 +984,24 @@ CONFIG_NETDEV_1000=y
# CONFIG_ACENIC is not set
# CONFIG_DL2K is not set
CONFIG_E1000=y
-# CONFIG_E1000_NAPI is not set
# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set
-# CONFIG_E1000E is not set
-# CONFIG_E1000E_ENABLED is not set
+CONFIG_E1000E=y
# CONFIG_IP1000 is not set
# CONFIG_IGB is not set
# CONFIG_NS83820 is not set
# CONFIG_HAMACHI is not set
# CONFIG_YELLOWFIN is not set
-# CONFIG_R8169 is not set
+CONFIG_R8169=y
# CONFIG_SIS190 is not set
# CONFIG_SKGE is not set
CONFIG_SKY2=y
# CONFIG_SKY2_DEBUG is not set
# CONFIG_VIA_VELOCITY is not set
CONFIG_TIGON3=y
-# CONFIG_BNX2 is not set
+CONFIG_BNX2=y
# CONFIG_QLA3XXX is not set
# CONFIG_ATL1 is not set
+# CONFIG_ATL1E is not set
CONFIG_NETDEV_10000=y
# CONFIG_CHELSIO_T1 is not set
# CONFIG_CHELSIO_T3 is not set
@@ -1019,13 +1041,14 @@ CONFIG_WLAN_80211=y
# CONFIG_RTL8180 is not set
# CONFIG_RTL8187 is not set
# CONFIG_ADM8211 is not set
+# CONFIG_MAC80211_HWSIM is not set
# CONFIG_P54_COMMON is not set
CONFIG_ATH5K=y
# CONFIG_ATH5K_DEBUG is not set
-# CONFIG_IWLWIFI is not set
+# CONFIG_ATH9K is not set
# CONFIG_IWLCORE is not set
# CONFIG_IWLWIFI_LEDS is not set
-# CONFIG_IWL4965 is not set
+# CONFIG_IWLAGN is not set
# CONFIG_IWL3945 is not set
# CONFIG_HOSTAP is not set
# CONFIG_B43 is not set
@@ -1105,6 +1128,7 @@ CONFIG_MOUSE_PS2_TRACKPOINT=y
# CONFIG_MOUSE_PS2_TOUCHKIT is not set
# CONFIG_MOUSE_SERIAL is not set
# CONFIG_MOUSE_APPLETOUCH is not set
+# CONFIG_MOUSE_BCM5974 is not set
# CONFIG_MOUSE_VSXXXAA is not set
CONFIG_INPUT_JOYSTICK=y
# CONFIG_JOYSTICK_ANALOG is not set
@@ -1139,12 +1163,14 @@ CONFIG_INPUT_TOUCHSCREEN=y
# CONFIG_TOUCHSCREEN_GUNZE is not set
# CONFIG_TOUCHSCREEN_ELO is not set
# CONFIG_TOUCHSCREEN_MTOUCH is not set
+# CONFIG_TOUCHSCREEN_INEXIO is not set
# CONFIG_TOUCHSCREEN_MK712 is not set
# CONFIG_TOUCHSCREEN_PENMOUNT is not set
# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set
# CONFIG_TOUCHSCREEN_TOUCHWIN is not set
# CONFIG_TOUCHSCREEN_UCB1400 is not set
# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set
+# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set
CONFIG_INPUT_MISC=y
# CONFIG_INPUT_PCSPKR is not set
# CONFIG_INPUT_APANEL is not set
@@ -1173,6 +1199,7 @@ CONFIG_SERIO_LIBPS2=y
# Character devices
#
CONFIG_VT=y
+CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_VT_CONSOLE=y
CONFIG_HW_CONSOLE=y
CONFIG_VT_HW_CONSOLE_BINDING=y
@@ -1223,8 +1250,8 @@ CONFIG_UNIX98_PTYS=y
# CONFIG_LEGACY_PTYS is not set
# CONFIG_IPMI_HANDLER is not set
CONFIG_HW_RANDOM=y
-# CONFIG_HW_RANDOM_INTEL is not set
-# CONFIG_HW_RANDOM_AMD is not set
+CONFIG_HW_RANDOM_INTEL=y
+CONFIG_HW_RANDOM_AMD=y
CONFIG_HW_RANDOM_GEODE=y
CONFIG_HW_RANDOM_VIA=y
CONFIG_NVRAM=y
@@ -1245,7 +1272,6 @@ CONFIG_NVRAM=y
# CONFIG_CS5535_GPIO is not set
# CONFIG_RAW_DRIVER is not set
CONFIG_HPET=y
-# CONFIG_HPET_RTC_IRQ is not set
# CONFIG_HPET_MMAP is not set
# CONFIG_HANGCHECK_TIMER is not set
# CONFIG_TCG_TPM is not set
@@ -1254,43 +1280,64 @@ CONFIG_DEVPORT=y
CONFIG_I2C=y
CONFIG_I2C_BOARDINFO=y
# CONFIG_I2C_CHARDEV is not set
+CONFIG_I2C_HELPER_AUTO=y
#
# I2C Hardware Bus support
#
+
+#
+# PC SMBus host controller drivers
+#
# CONFIG_I2C_ALI1535 is not set
# CONFIG_I2C_ALI1563 is not set
# CONFIG_I2C_ALI15X3 is not set
# CONFIG_I2C_AMD756 is not set
# CONFIG_I2C_AMD8111 is not set
CONFIG_I2C_I801=y
-# CONFIG_I2C_I810 is not set
+# CONFIG_I2C_ISCH is not set
# CONFIG_I2C_PIIX4 is not set
# CONFIG_I2C_NFORCE2 is not set
-# CONFIG_I2C_OCORES is not set
-# CONFIG_I2C_PARPORT_LIGHT is not set
-# CONFIG_I2C_PROSAVAGE is not set
-# CONFIG_I2C_SAVAGE4 is not set
-# CONFIG_I2C_SIMTEC is not set
-# CONFIG_SCx200_ACB is not set
# CONFIG_I2C_SIS5595 is not set
# CONFIG_I2C_SIS630 is not set
# CONFIG_I2C_SIS96X is not set
-# CONFIG_I2C_TAOS_EVM is not set
-# CONFIG_I2C_STUB is not set
-# CONFIG_I2C_TINY_USB is not set
# CONFIG_I2C_VIA is not set
# CONFIG_I2C_VIAPRO is not set
+
+#
+# I2C system bus drivers (mostly embedded / system-on-chip)
+#
+# CONFIG_I2C_OCORES is not set
+# CONFIG_I2C_SIMTEC is not set
+
+#
+# External I2C/SMBus adapter drivers
+#
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_TAOS_EVM is not set
+# CONFIG_I2C_TINY_USB is not set
+
+#
+# Graphics adapter I2C/DDC channel drivers
+#
# CONFIG_I2C_VOODOO3 is not set
+
+#
+# Other I2C/SMBus bus drivers
+#
# CONFIG_I2C_PCA_PLATFORM is not set
+# CONFIG_I2C_STUB is not set
+# CONFIG_SCx200_ACB is not set
#
# Miscellaneous I2C Chip support
#
# CONFIG_DS1682 is not set
+# CONFIG_AT24 is not set
# CONFIG_SENSORS_EEPROM is not set
# CONFIG_SENSORS_PCF8574 is not set
# CONFIG_PCF8575 is not set
+# CONFIG_SENSORS_PCA9539 is not set
# CONFIG_SENSORS_PCF8591 is not set
# CONFIG_SENSORS_MAX6875 is not set
# CONFIG_SENSORS_TSL2550 is not set
@@ -1299,6 +1346,8 @@ CONFIG_I2C_I801=y
# CONFIG_I2C_DEBUG_BUS is not set
# CONFIG_I2C_DEBUG_CHIP is not set
# CONFIG_SPI is not set
+CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
+# CONFIG_GPIOLIB is not set
# CONFIG_W1 is not set
CONFIG_POWER_SUPPLY=y
# CONFIG_POWER_SUPPLY_DEBUG is not set
@@ -1360,8 +1409,10 @@ CONFIG_SSB_POSSIBLE=y
#
# Multifunction device drivers
#
+# CONFIG_MFD_CORE is not set
# CONFIG_MFD_SM501 is not set
# CONFIG_HTC_PASIC3 is not set
+# CONFIG_MFD_TMIO is not set
#
# Multimedia devices
@@ -1372,6 +1423,7 @@ CONFIG_SSB_POSSIBLE=y
#
# CONFIG_VIDEO_DEV is not set
# CONFIG_DVB_CORE is not set
+# CONFIG_VIDEO_MEDIA is not set
#
# Multimedia drivers
@@ -1418,7 +1470,6 @@ CONFIG_FB_CFB_IMAGEBLIT=y
# CONFIG_FB_SYS_IMAGEBLIT is not set
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_SYS_FOPS is not set
-CONFIG_FB_DEFERRED_IO=y
# CONFIG_FB_SVGALIB is not set
# CONFIG_FB_MACMODES is not set
# CONFIG_FB_BACKLIGHT is not set
@@ -1463,6 +1514,7 @@ CONFIG_FB_EFI=y
# CONFIG_FB_TRIDENT is not set
# CONFIG_FB_ARK is not set
# CONFIG_FB_PM3 is not set
+# CONFIG_FB_CARMINE is not set
# CONFIG_FB_GEODE is not set
# CONFIG_FB_VIRTUAL is not set
CONFIG_BACKLIGHT_LCD_SUPPORT=y
@@ -1470,6 +1522,7 @@ CONFIG_BACKLIGHT_LCD_SUPPORT=y
CONFIG_BACKLIGHT_CLASS_DEVICE=y
# CONFIG_BACKLIGHT_CORGI is not set
# CONFIG_BACKLIGHT_PROGEAR is not set
+# CONFIG_BACKLIGHT_MBP_NVIDIA is not set
#
# Display device support
@@ -1489,15 +1542,7 @@ CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_LOGO_LINUX_VGA16 is not set
CONFIG_LOGO_LINUX_CLUT224=y
-
-#
-# Sound
-#
CONFIG_SOUND=y
-
-#
-# Advanced Linux Sound Architecture
-#
CONFIG_SND=y
CONFIG_SND_TIMER=y
CONFIG_SND_PCM=y
@@ -1515,20 +1560,14 @@ CONFIG_SND_VERBOSE_PROCFS=y
# CONFIG_SND_VERBOSE_PRINTK is not set
# CONFIG_SND_DEBUG is not set
CONFIG_SND_VMASTER=y
-
-#
-# Generic devices
-#
+CONFIG_SND_DRIVERS=y
# CONFIG_SND_PCSP is not set
# CONFIG_SND_DUMMY is not set
# CONFIG_SND_VIRMIDI is not set
# CONFIG_SND_MTPAV is not set
# CONFIG_SND_SERIAL_U16550 is not set
# CONFIG_SND_MPU401 is not set
-
-#
-# PCI devices
-#
+CONFIG_SND_PCI=y
# CONFIG_SND_AD1889 is not set
# CONFIG_SND_ALS300 is not set
# CONFIG_SND_ALS4000 is not set
@@ -1603,36 +1642,14 @@ CONFIG_SND_HDA_GENERIC=y
# CONFIG_SND_VIRTUOSO is not set
# CONFIG_SND_VX222 is not set
# CONFIG_SND_YMFPCI is not set
-
-#
-# USB devices
-#
+CONFIG_SND_USB=y
# CONFIG_SND_USB_AUDIO is not set
# CONFIG_SND_USB_USX2Y is not set
# CONFIG_SND_USB_CAIAQ is not set
-
-#
-# PCMCIA devices
-#
+CONFIG_SND_PCMCIA=y
# CONFIG_SND_VXPOCKET is not set
# CONFIG_SND_PDAUDIOCF is not set
-
-#
-# System on Chip audio support
-#
# CONFIG_SND_SOC is not set
-
-#
-# ALSA SoC audio for Freescale SOCs
-#
-
-#
-# SoC Audio for the Texas Instruments OMAP
-#
-
-#
-# Open Sound System
-#
# CONFIG_SOUND_PRIME is not set
CONFIG_HID_SUPPORT=y
CONFIG_HID=y
@@ -1668,6 +1685,7 @@ CONFIG_USB_DEVICEFS=y
# CONFIG_USB_DYNAMIC_MINORS is not set
CONFIG_USB_SUSPEND=y
# CONFIG_USB_OTG is not set
+CONFIG_USB_MON=y
#
# USB Host Controller Drivers
@@ -1691,6 +1709,7 @@ CONFIG_USB_UHCI_HCD=y
#
# CONFIG_USB_ACM is not set
CONFIG_USB_PRINTER=y
+# CONFIG_USB_WDM is not set
#
# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
@@ -1712,6 +1731,7 @@ CONFIG_USB_STORAGE=y
# CONFIG_USB_STORAGE_ALAUDA is not set
# CONFIG_USB_STORAGE_ONETOUCH is not set
# CONFIG_USB_STORAGE_KARMA is not set
+# CONFIG_USB_STORAGE_SIERRA is not set
# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set
CONFIG_USB_LIBUSUAL=y
@@ -1720,7 +1740,6 @@ CONFIG_USB_LIBUSUAL=y
#
# CONFIG_USB_MDC800 is not set
# CONFIG_USB_MICROTEK is not set
-CONFIG_USB_MON=y
#
# USB port drivers
@@ -1733,7 +1752,6 @@ CONFIG_USB_MON=y
# CONFIG_USB_EMI62 is not set
# CONFIG_USB_EMI26 is not set
# CONFIG_USB_ADUTUX is not set
-# CONFIG_USB_AUERSWALD is not set
# CONFIG_USB_RIO500 is not set
# CONFIG_USB_LEGOTOWER is not set
# CONFIG_USB_LCD is not set
@@ -1750,6 +1768,7 @@ CONFIG_USB_MON=y
# CONFIG_USB_TRANCEVIBRATOR is not set
# CONFIG_USB_IOWARRIOR is not set
# CONFIG_USB_TEST is not set
+# CONFIG_USB_ISIGHTFW is not set
# CONFIG_USB_GADGET is not set
# CONFIG_MMC is not set
# CONFIG_MEMSTICK is not set
@@ -1759,7 +1778,9 @@ CONFIG_LEDS_CLASS=y
#
# LED drivers
#
+# CONFIG_LEDS_PCA9532 is not set
# CONFIG_LEDS_CLEVO_MAIL is not set
+# CONFIG_LEDS_PCA955X is not set
#
# LED Triggers
@@ -1805,6 +1826,7 @@ CONFIG_RTC_INTF_DEV=y
# CONFIG_RTC_DRV_PCF8583 is not set
# CONFIG_RTC_DRV_M41T80 is not set
# CONFIG_RTC_DRV_S35390A is not set
+# CONFIG_RTC_DRV_FM3130 is not set
#
# SPI RTC drivers
@@ -1837,11 +1859,13 @@ CONFIG_DMADEVICES=y
# Firmware Drivers
#
# CONFIG_EDD is not set
+CONFIG_FIRMWARE_MEMMAP=y
CONFIG_EFI_VARS=y
# CONFIG_DELL_RBU is not set
# CONFIG_DCDBAS is not set
CONFIG_DMIID=y
-# CONFIG_ISCSI_IBFT_FIND is not set
+CONFIG_ISCSI_IBFT_FIND=y
+CONFIG_ISCSI_IBFT=y
#
# File systems
@@ -1920,14 +1944,27 @@ CONFIG_HUGETLB_PAGE=y
# CONFIG_CRAMFS is not set
# CONFIG_VXFS_FS is not set
# CONFIG_MINIX_FS is not set
+# CONFIG_OMFS_FS is not set
# CONFIG_HPFS_FS is not set
# CONFIG_QNX4FS_FS is not set
# CONFIG_ROMFS_FS is not set
# CONFIG_SYSV_FS is not set
# CONFIG_UFS_FS is not set
CONFIG_NETWORK_FILESYSTEMS=y
-# CONFIG_NFS_FS is not set
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
# CONFIG_NFSD is not set
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_ACL_SUPPORT=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+CONFIG_SUNRPC_GSS=y
+CONFIG_RPCSEC_GSS_KRB5=y
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
# CONFIG_SMB_FS is not set
# CONFIG_CIFS is not set
# CONFIG_NCP_FS is not set
@@ -2001,9 +2038,9 @@ CONFIG_NLS_UTF8=y
# Kernel hacking
#
CONFIG_TRACE_IRQFLAGS_SUPPORT=y
-# CONFIG_PRINTK_TIME is not set
-# CONFIG_ENABLE_WARN_DEPRECATED is not set
-# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_PRINTK_TIME=y
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
CONFIG_FRAME_WARN=2048
CONFIG_MAGIC_SYSRQ=y
# CONFIG_UNUSED_SYMBOLS is not set
@@ -2033,6 +2070,7 @@ CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_INFO is not set
# CONFIG_DEBUG_VM is not set
# CONFIG_DEBUG_WRITECOUNT is not set
+CONFIG_DEBUG_MEMORY_INIT=y
# CONFIG_DEBUG_LIST is not set
# CONFIG_DEBUG_SG is not set
CONFIG_FRAME_POINTER=y
@@ -2043,23 +2081,32 @@ CONFIG_FRAME_POINTER=y
# CONFIG_LKDTM is not set
# CONFIG_FAULT_INJECTION is not set
# CONFIG_LATENCYTOP is not set
+CONFIG_SYSCTL_SYSCALL_CHECK=y
+CONFIG_HAVE_FTRACE=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+# CONFIG_FTRACE is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SYSPROF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
# CONFIG_SAMPLES is not set
-# CONFIG_KGDB is not set
CONFIG_HAVE_ARCH_KGDB=y
+# CONFIG_KGDB is not set
# CONFIG_STRICT_DEVMEM is not set
+CONFIG_X86_VERBOSE_BOOTUP=y
CONFIG_EARLY_PRINTK=y
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_DEBUG_STACK_USAGE=y
# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_DEBUG_PER_CPU_MAPS is not set
# CONFIG_X86_PTDUMP is not set
CONFIG_DEBUG_RODATA=y
# CONFIG_DEBUG_RODATA_TEST is not set
CONFIG_DEBUG_NX_TEST=m
# CONFIG_4KSTACKS is not set
-CONFIG_X86_FIND_SMP_CONFIG=y
-CONFIG_X86_MPPARSE=y
CONFIG_DOUBLEFAULT=y
+# CONFIG_MMIOTRACE is not set
CONFIG_IO_DELAY_TYPE_0X80=0
CONFIG_IO_DELAY_TYPE_0XED=1
CONFIG_IO_DELAY_TYPE_UDELAY=2
@@ -2071,6 +2118,7 @@ CONFIG_IO_DELAY_0X80=y
CONFIG_DEFAULT_IO_DELAY_TYPE=0
CONFIG_DEBUG_BOOT_PARAMS=y
# CONFIG_CPA_DEBUG is not set
+CONFIG_OPTIMIZE_INLINING=y
#
# Security options
@@ -2080,7 +2128,6 @@ CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
# CONFIG_SECURITY_NETWORK_XFRM is not set
-CONFIG_SECURITY_CAPABILITIES=y
CONFIG_SECURITY_FILE_CAPABILITIES=y
# CONFIG_SECURITY_ROOTPLUG is not set
CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536
@@ -2141,6 +2188,10 @@ CONFIG_CRYPTO_HMAC=y
# CONFIG_CRYPTO_MD4 is not set
CONFIG_CRYPTO_MD5=y
# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
CONFIG_CRYPTO_SHA1=y
# CONFIG_CRYPTO_SHA256 is not set
# CONFIG_CRYPTO_SHA512 is not set
@@ -2151,7 +2202,7 @@ CONFIG_CRYPTO_SHA1=y
# Ciphers
#
CONFIG_CRYPTO_AES=y
-# CONFIG_CRYPTO_AES_586 is not set
+CONFIG_CRYPTO_AES_586=y
# CONFIG_CRYPTO_ANUBIS is not set
CONFIG_CRYPTO_ARC4=y
# CONFIG_CRYPTO_BLOWFISH is not set
@@ -2193,6 +2244,7 @@ CONFIG_GENERIC_FIND_FIRST_BIT=y
CONFIG_GENERIC_FIND_NEXT_BIT=y
# CONFIG_CRC_CCITT is not set
# CONFIG_CRC16 is not set
+CONFIG_CRC_T10DIF=y
# CONFIG_CRC_ITU_T is not set
CONFIG_CRC32=y
# CONFIG_CRC7 is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index a4045242962..e620ea6e2a7 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1,13 +1,13 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.26-rc1
-# Sun May 4 19:59:57 2008
+# Linux kernel version: 2.6.27-rc5
+# Wed Sep 3 17:13:39 2008
#
CONFIG_64BIT=y
# CONFIG_X86_32 is not set
CONFIG_X86_64=y
CONFIG_X86=y
-CONFIG_DEFCONFIG_LIST="arch/x86/configs/x86_64_defconfig"
+CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
# CONFIG_GENERIC_LOCKBREAK is not set
CONFIG_GENERIC_TIME=y
CONFIG_GENERIC_CMOS_UPDATE=y
@@ -53,6 +53,7 @@ CONFIG_X86_HT=y
CONFIG_X86_BIOS_REBOOT=y
CONFIG_X86_TRAMPOLINE=y
# CONFIG_KTIME_SCALAR is not set
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
#
# General setup
@@ -82,6 +83,7 @@ CONFIG_CGROUPS=y
CONFIG_CGROUP_NS=y
# CONFIG_CGROUP_DEVICE is not set
CONFIG_CPUSETS=y
+CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
CONFIG_GROUP_SCHED=y
CONFIG_FAIR_GROUP_SCHED=y
# CONFIG_RT_GROUP_SCHED is not set
@@ -105,7 +107,6 @@ CONFIG_SYSCTL=y
# CONFIG_EMBEDDED is not set
CONFIG_UID16=y
CONFIG_SYSCTL_SYSCALL=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_KALLSYMS=y
CONFIG_KALLSYMS_ALL=y
CONFIG_KALLSYMS_EXTRA_PASS=y
@@ -113,6 +114,7 @@ CONFIG_HOTPLUG=y
CONFIG_PRINTK=y
CONFIG_BUG=y
CONFIG_ELF_CORE=y
+CONFIG_PCSPKR_PLATFORM=y
# CONFIG_COMPAT_BRK is not set
CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
@@ -132,25 +134,33 @@ CONFIG_MARKERS=y
# CONFIG_OPROFILE is not set
CONFIG_HAVE_OPROFILE=y
CONFIG_KPROBES=y
+CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
CONFIG_KRETPROBES=y
+CONFIG_HAVE_IOREMAP_PROT=y
CONFIG_HAVE_KPROBES=y
CONFIG_HAVE_KRETPROBES=y
+# CONFIG_HAVE_ARCH_TRACEHOOK is not set
# CONFIG_HAVE_DMA_ATTRS is not set
+CONFIG_USE_GENERIC_SMP_HELPERS=y
+# CONFIG_HAVE_CLK is not set
CONFIG_PROC_PAGE_MONITOR=y
+# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
CONFIG_SLABINFO=y
CONFIG_RT_MUTEXES=y
# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_MODVERSIONS is not set
# CONFIG_MODULE_SRCVERSION_ALL is not set
-# CONFIG_KMOD is not set
+CONFIG_KMOD=y
CONFIG_STOP_MACHINE=y
CONFIG_BLOCK=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_BLK_DEV_BSG=y
+# CONFIG_BLK_DEV_INTEGRITY is not set
CONFIG_BLOCK_COMPAT=y
#
@@ -175,20 +185,15 @@ CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
CONFIG_SMP=y
+CONFIG_X86_FIND_SMP_CONFIG=y
+CONFIG_X86_MPPARSE=y
CONFIG_X86_PC=y
# CONFIG_X86_ELAN is not set
# CONFIG_X86_VOYAGER is not set
-# CONFIG_X86_NUMAQ is not set
-# CONFIG_X86_SUMMIT is not set
-# CONFIG_X86_BIGSMP is not set
-# CONFIG_X86_VISWS is not set
# CONFIG_X86_GENERICARCH is not set
-# CONFIG_X86_ES7000 is not set
-# CONFIG_X86_RDC321X is not set
# CONFIG_X86_VSMP is not set
# CONFIG_PARAVIRT_GUEST is not set
-CONFIG_MEMTEST_BOOTPARAM=y
-CONFIG_MEMTEST_BOOTPARAM_VALUE=0
+# CONFIG_MEMTEST is not set
# CONFIG_M386 is not set
# CONFIG_M486 is not set
# CONFIG_M586 is not set
@@ -213,18 +218,16 @@ CONFIG_MEMTEST_BOOTPARAM_VALUE=0
# CONFIG_MVIAC3_2 is not set
# CONFIG_MVIAC7 is not set
# CONFIG_MPSC is not set
-CONFIG_MCORE2=y
-# CONFIG_GENERIC_CPU is not set
+# CONFIG_MCORE2 is not set
+CONFIG_GENERIC_CPU=y
CONFIG_X86_CPU=y
-CONFIG_X86_L1_CACHE_BYTES=64
-CONFIG_X86_INTERNODE_CACHE_BYTES=64
+CONFIG_X86_L1_CACHE_BYTES=128
+CONFIG_X86_INTERNODE_CACHE_BYTES=128
CONFIG_X86_CMPXCHG=y
-CONFIG_X86_L1_CACHE_SHIFT=6
-CONFIG_X86_GOOD_APIC=y
-CONFIG_X86_INTEL_USERCOPY=y
-CONFIG_X86_USE_PPRO_CHECKSUM=y
-CONFIG_X86_P6_NOP=y
+CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_X86_WP_WORKS_OK=y
CONFIG_X86_TSC=y
+CONFIG_X86_CMPXCHG64=y
CONFIG_X86_CMOV=y
CONFIG_X86_MINIMUM_CPU_FAMILY=64
CONFIG_X86_DEBUGCTLMSR=y
@@ -234,10 +237,11 @@ CONFIG_DMI=y
CONFIG_GART_IOMMU=y
CONFIG_CALGARY_IOMMU=y
CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT=y
+CONFIG_AMD_IOMMU=y
CONFIG_SWIOTLB=y
CONFIG_IOMMU_HELPER=y
-CONFIG_NR_CPUS=4
-# CONFIG_SCHED_SMT is not set
+CONFIG_NR_CPUS=64
+CONFIG_SCHED_SMT=y
CONFIG_SCHED_MC=y
# CONFIG_PREEMPT_NONE is not set
CONFIG_PREEMPT_VOLUNTARY=y
@@ -246,7 +250,8 @@ CONFIG_X86_LOCAL_APIC=y
CONFIG_X86_IO_APIC=y
# CONFIG_X86_MCE is not set
# CONFIG_I8K is not set
-# CONFIG_MICROCODE is not set
+CONFIG_MICROCODE=y
+CONFIG_MICROCODE_OLD_INTERFACE=y
CONFIG_X86_MSR=y
CONFIG_X86_CPUID=y
CONFIG_NUMA=y
@@ -281,7 +286,8 @@ CONFIG_ZONE_DMA_FLAG=1
CONFIG_BOUNCE=y
CONFIG_VIRT_TO_BUS=y
CONFIG_MTRR=y
-# CONFIG_X86_PAT is not set
+# CONFIG_MTRR_SANITIZER is not set
+CONFIG_X86_PAT=y
CONFIG_EFI=y
CONFIG_SECCOMP=y
# CONFIG_HZ_100 is not set
@@ -313,6 +319,7 @@ CONFIG_PM_TRACE_RTC=y
CONFIG_PM_SLEEP_SMP=y
CONFIG_PM_SLEEP=y
CONFIG_SUSPEND=y
+# CONFIG_PM_TEST_SUSPEND is not set
CONFIG_SUSPEND_FREEZER=y
CONFIG_HIBERNATION=y
CONFIG_PM_STD_PARTITION=""
@@ -339,6 +346,7 @@ CONFIG_ACPI_NUMA=y
CONFIG_ACPI_BLACKLIST_YEAR=0
# CONFIG_ACPI_DEBUG is not set
CONFIG_ACPI_EC=y
+# CONFIG_ACPI_PCI_SLOT is not set
CONFIG_ACPI_POWER=y
CONFIG_ACPI_SYSTEM=y
CONFIG_X86_PM_TIMER=y
@@ -437,10 +445,6 @@ CONFIG_IA32_EMULATION=y
CONFIG_COMPAT=y
CONFIG_COMPAT_FOR_U64_ALIGNMENT=y
CONFIG_SYSVIPC_COMPAT=y
-
-#
-# Networking
-#
CONFIG_NET=y
#
@@ -464,7 +468,10 @@ CONFIG_IP_FIB_HASH=y
CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_ROUTE_MULTIPATH=y
CONFIG_IP_ROUTE_VERBOSE=y
-# CONFIG_IP_PNP is not set
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
# CONFIG_NET_IPIP is not set
# CONFIG_NET_IPGRE is not set
CONFIG_IP_MROUTE=y
@@ -607,7 +614,6 @@ CONFIG_NET_SCHED=y
# CONFIG_NET_SCH_HTB is not set
# CONFIG_NET_SCH_HFSC is not set
# CONFIG_NET_SCH_PRIO is not set
-# CONFIG_NET_SCH_RR is not set
# CONFIG_NET_SCH_RED is not set
# CONFIG_NET_SCH_SFQ is not set
# CONFIG_NET_SCH_TEQL is not set
@@ -669,28 +675,19 @@ CONFIG_FIB_RULES=y
CONFIG_CFG80211=y
CONFIG_NL80211=y
CONFIG_WIRELESS_EXT=y
+CONFIG_WIRELESS_EXT_SYSFS=y
CONFIG_MAC80211=y
#
# Rate control algorithm selection
#
+CONFIG_MAC80211_RC_PID=y
CONFIG_MAC80211_RC_DEFAULT_PID=y
-# CONFIG_MAC80211_RC_DEFAULT_NONE is not set
-
-#
-# Selecting 'y' for an algorithm will
-#
-
-#
-# build the algorithm into mac80211.
-#
CONFIG_MAC80211_RC_DEFAULT="pid"
-CONFIG_MAC80211_RC_PID=y
# CONFIG_MAC80211_MESH is not set
CONFIG_MAC80211_LEDS=y
# CONFIG_MAC80211_DEBUGFS is not set
-# CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT is not set
-# CONFIG_MAC80211_DEBUG is not set
+# CONFIG_MAC80211_DEBUG_MENU is not set
# CONFIG_IEEE80211 is not set
# CONFIG_RFKILL is not set
# CONFIG_NET_9P is not set
@@ -706,6 +703,8 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_STANDALONE=y
CONFIG_PREVENT_FIRMWARE_BUILD=y
CONFIG_FW_LOADER=y
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_EXTRA_FIRMWARE=""
# CONFIG_DEBUG_DRIVER is not set
CONFIG_DEBUG_DEVRES=y
# CONFIG_SYS_HYPERVISOR is not set
@@ -738,6 +737,7 @@ CONFIG_BLK_DEV_RAM_SIZE=16384
# CONFIG_BLK_DEV_XIP is not set
# CONFIG_CDROM_PKTCDVD is not set
# CONFIG_ATA_OVER_ETH is not set
+# CONFIG_BLK_DEV_HD is not set
CONFIG_MISC_DEVICES=y
# CONFIG_IBM_ASM is not set
# CONFIG_PHANTOM is not set
@@ -748,10 +748,14 @@ CONFIG_MISC_DEVICES=y
# CONFIG_ASUS_LAPTOP is not set
# CONFIG_FUJITSU_LAPTOP is not set
# CONFIG_MSI_LAPTOP is not set
+# CONFIG_COMPAL_LAPTOP is not set
# CONFIG_SONY_LAPTOP is not set
# CONFIG_THINKPAD_ACPI is not set
# CONFIG_INTEL_MENLOW is not set
# CONFIG_ENCLOSURE_SERVICES is not set
+# CONFIG_SGI_XP is not set
+# CONFIG_HP_ILO is not set
+# CONFIG_SGI_GRU is not set
CONFIG_HAVE_IDE=y
# CONFIG_IDE is not set
@@ -790,12 +794,13 @@ CONFIG_SCSI_WAIT_SCAN=m
#
CONFIG_SCSI_SPI_ATTRS=y
# CONFIG_SCSI_FC_ATTRS is not set
-# CONFIG_SCSI_ISCSI_ATTRS is not set
+CONFIG_SCSI_ISCSI_ATTRS=y
# CONFIG_SCSI_SAS_ATTRS is not set
# CONFIG_SCSI_SAS_LIBSAS is not set
# CONFIG_SCSI_SRP_ATTRS is not set
# CONFIG_SCSI_LOWLEVEL is not set
# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set
+# CONFIG_SCSI_DH is not set
CONFIG_ATA=y
# CONFIG_ATA_NONSTANDARD is not set
CONFIG_ATA_ACPI=y
@@ -857,6 +862,7 @@ CONFIG_PATA_OLDPIIX=y
# CONFIG_PATA_SIS is not set
# CONFIG_PATA_VIA is not set
# CONFIG_PATA_WINBOND is not set
+CONFIG_PATA_SCH=y
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
# CONFIG_MD_LINEAR is not set
@@ -880,13 +886,16 @@ CONFIG_DM_ZERO=y
#
# IEEE 1394 (FireWire) support
#
+
+#
+# Enable only one of the two stacks, unless you know what you are doing
+#
# CONFIG_FIREWIRE is not set
# CONFIG_IEEE1394 is not set
# CONFIG_I2O is not set
CONFIG_MACINTOSH_DRIVERS=y
CONFIG_MAC_EMUMOUSEBTN=y
CONFIG_NETDEVICES=y
-# CONFIG_NETDEVICES_MULTIQUEUE is not set
# CONFIG_IFB is not set
# CONFIG_DUMMY is not set
# CONFIG_BONDING is not set
@@ -896,7 +905,23 @@ CONFIG_NETDEVICES=y
# CONFIG_VETH is not set
# CONFIG_NET_SB1000 is not set
# CONFIG_ARCNET is not set
-# CONFIG_PHYLIB is not set
+CONFIG_PHYLIB=y
+
+#
+# MII PHY device drivers
+#
+# CONFIG_MARVELL_PHY is not set
+# CONFIG_DAVICOM_PHY is not set
+# CONFIG_QSEMI_PHY is not set
+# CONFIG_LXT_PHY is not set
+# CONFIG_CICADA_PHY is not set
+# CONFIG_VITESSE_PHY is not set
+# CONFIG_SMSC_PHY is not set
+# CONFIG_BROADCOM_PHY is not set
+# CONFIG_ICPLUS_PHY is not set
+# CONFIG_REALTEK_PHY is not set
+# CONFIG_FIXED_PHY is not set
+# CONFIG_MDIO_BITBANG is not set
CONFIG_NET_ETHERNET=y
CONFIG_MII=y
# CONFIG_HAPPYMEAL is not set
@@ -940,16 +965,15 @@ CONFIG_8139TOO_PIO=y
# CONFIG_SIS900 is not set
# CONFIG_EPIC100 is not set
# CONFIG_SUNDANCE is not set
+# CONFIG_TLAN is not set
# CONFIG_VIA_RHINE is not set
# CONFIG_SC92031 is not set
CONFIG_NETDEV_1000=y
# CONFIG_ACENIC is not set
# CONFIG_DL2K is not set
CONFIG_E1000=y
-# CONFIG_E1000_NAPI is not set
# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set
# CONFIG_E1000E is not set
-# CONFIG_E1000E_ENABLED is not set
# CONFIG_IP1000 is not set
# CONFIG_IGB is not set
# CONFIG_NS83820 is not set
@@ -965,6 +989,7 @@ CONFIG_TIGON3=y
# CONFIG_BNX2 is not set
# CONFIG_QLA3XXX is not set
# CONFIG_ATL1 is not set
+# CONFIG_ATL1E is not set
CONFIG_NETDEV_10000=y
# CONFIG_CHELSIO_T1 is not set
# CONFIG_CHELSIO_T3 is not set
@@ -1003,13 +1028,14 @@ CONFIG_WLAN_80211=y
# CONFIG_RTL8180 is not set
# CONFIG_RTL8187 is not set
# CONFIG_ADM8211 is not set
+# CONFIG_MAC80211_HWSIM is not set
# CONFIG_P54_COMMON is not set
CONFIG_ATH5K=y
# CONFIG_ATH5K_DEBUG is not set
-# CONFIG_IWLWIFI is not set
+# CONFIG_ATH9K is not set
# CONFIG_IWLCORE is not set
# CONFIG_IWLWIFI_LEDS is not set
-# CONFIG_IWL4965 is not set
+# CONFIG_IWLAGN is not set
# CONFIG_IWL3945 is not set
# CONFIG_HOSTAP is not set
# CONFIG_B43 is not set
@@ -1088,6 +1114,7 @@ CONFIG_MOUSE_PS2_TRACKPOINT=y
# CONFIG_MOUSE_PS2_TOUCHKIT is not set
# CONFIG_MOUSE_SERIAL is not set
# CONFIG_MOUSE_APPLETOUCH is not set
+# CONFIG_MOUSE_BCM5974 is not set
# CONFIG_MOUSE_VSXXXAA is not set
CONFIG_INPUT_JOYSTICK=y
# CONFIG_JOYSTICK_ANALOG is not set
@@ -1122,12 +1149,14 @@ CONFIG_INPUT_TOUCHSCREEN=y
# CONFIG_TOUCHSCREEN_GUNZE is not set
# CONFIG_TOUCHSCREEN_ELO is not set
# CONFIG_TOUCHSCREEN_MTOUCH is not set
+# CONFIG_TOUCHSCREEN_INEXIO is not set
# CONFIG_TOUCHSCREEN_MK712 is not set
# CONFIG_TOUCHSCREEN_PENMOUNT is not set
# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set
# CONFIG_TOUCHSCREEN_TOUCHWIN is not set
# CONFIG_TOUCHSCREEN_UCB1400 is not set
# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set
+# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set
CONFIG_INPUT_MISC=y
# CONFIG_INPUT_PCSPKR is not set
# CONFIG_INPUT_APANEL is not set
@@ -1155,6 +1184,7 @@ CONFIG_SERIO_LIBPS2=y
# Character devices
#
CONFIG_VT=y
+CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_VT_CONSOLE=y
CONFIG_HW_CONSOLE=y
CONFIG_VT_HW_CONSOLE_BINDING=y
@@ -1222,7 +1252,6 @@ CONFIG_NVRAM=y
# CONFIG_PC8736x_GPIO is not set
# CONFIG_RAW_DRIVER is not set
CONFIG_HPET=y
-# CONFIG_HPET_RTC_IRQ is not set
# CONFIG_HPET_MMAP is not set
# CONFIG_HANGCHECK_TIMER is not set
# CONFIG_TCG_TPM is not set
@@ -1231,42 +1260,63 @@ CONFIG_DEVPORT=y
CONFIG_I2C=y
CONFIG_I2C_BOARDINFO=y
# CONFIG_I2C_CHARDEV is not set
+CONFIG_I2C_HELPER_AUTO=y
#
# I2C Hardware Bus support
#
+
+#
+# PC SMBus host controller drivers
+#
# CONFIG_I2C_ALI1535 is not set
# CONFIG_I2C_ALI1563 is not set
# CONFIG_I2C_ALI15X3 is not set
# CONFIG_I2C_AMD756 is not set
# CONFIG_I2C_AMD8111 is not set
CONFIG_I2C_I801=y
-# CONFIG_I2C_I810 is not set
+# CONFIG_I2C_ISCH is not set
# CONFIG_I2C_PIIX4 is not set
# CONFIG_I2C_NFORCE2 is not set
-# CONFIG_I2C_OCORES is not set
-# CONFIG_I2C_PARPORT_LIGHT is not set
-# CONFIG_I2C_PROSAVAGE is not set
-# CONFIG_I2C_SAVAGE4 is not set
-# CONFIG_I2C_SIMTEC is not set
# CONFIG_I2C_SIS5595 is not set
# CONFIG_I2C_SIS630 is not set
# CONFIG_I2C_SIS96X is not set
-# CONFIG_I2C_TAOS_EVM is not set
-# CONFIG_I2C_STUB is not set
-# CONFIG_I2C_TINY_USB is not set
# CONFIG_I2C_VIA is not set
# CONFIG_I2C_VIAPRO is not set
+
+#
+# I2C system bus drivers (mostly embedded / system-on-chip)
+#
+# CONFIG_I2C_OCORES is not set
+# CONFIG_I2C_SIMTEC is not set
+
+#
+# External I2C/SMBus adapter drivers
+#
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_TAOS_EVM is not set
+# CONFIG_I2C_TINY_USB is not set
+
+#
+# Graphics adapter I2C/DDC channel drivers
+#
# CONFIG_I2C_VOODOO3 is not set
+
+#
+# Other I2C/SMBus bus drivers
+#
# CONFIG_I2C_PCA_PLATFORM is not set
+# CONFIG_I2C_STUB is not set
#
# Miscellaneous I2C Chip support
#
# CONFIG_DS1682 is not set
+# CONFIG_AT24 is not set
# CONFIG_SENSORS_EEPROM is not set
# CONFIG_SENSORS_PCF8574 is not set
# CONFIG_PCF8575 is not set
+# CONFIG_SENSORS_PCA9539 is not set
# CONFIG_SENSORS_PCF8591 is not set
# CONFIG_SENSORS_MAX6875 is not set
# CONFIG_SENSORS_TSL2550 is not set
@@ -1275,6 +1325,8 @@ CONFIG_I2C_I801=y
# CONFIG_I2C_DEBUG_BUS is not set
# CONFIG_I2C_DEBUG_CHIP is not set
# CONFIG_SPI is not set
+CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
+# CONFIG_GPIOLIB is not set
# CONFIG_W1 is not set
CONFIG_POWER_SUPPLY=y
# CONFIG_POWER_SUPPLY_DEBUG is not set
@@ -1335,8 +1387,10 @@ CONFIG_SSB_POSSIBLE=y
#
# Multifunction device drivers
#
+# CONFIG_MFD_CORE is not set
# CONFIG_MFD_SM501 is not set
# CONFIG_HTC_PASIC3 is not set
+# CONFIG_MFD_TMIO is not set
#
# Multimedia devices
@@ -1347,6 +1401,7 @@ CONFIG_SSB_POSSIBLE=y
#
# CONFIG_VIDEO_DEV is not set
# CONFIG_DVB_CORE is not set
+# CONFIG_VIDEO_MEDIA is not set
#
# Multimedia drivers
@@ -1387,7 +1442,6 @@ CONFIG_FB_CFB_IMAGEBLIT=y
# CONFIG_FB_SYS_IMAGEBLIT is not set
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_SYS_FOPS is not set
-CONFIG_FB_DEFERRED_IO=y
# CONFIG_FB_SVGALIB is not set
# CONFIG_FB_MACMODES is not set
# CONFIG_FB_BACKLIGHT is not set
@@ -1430,6 +1484,7 @@ CONFIG_FB_EFI=y
# CONFIG_FB_TRIDENT is not set
# CONFIG_FB_ARK is not set
# CONFIG_FB_PM3 is not set
+# CONFIG_FB_CARMINE is not set
# CONFIG_FB_GEODE is not set
# CONFIG_FB_VIRTUAL is not set
CONFIG_BACKLIGHT_LCD_SUPPORT=y
@@ -1437,6 +1492,7 @@ CONFIG_BACKLIGHT_LCD_SUPPORT=y
CONFIG_BACKLIGHT_CLASS_DEVICE=y
# CONFIG_BACKLIGHT_CORGI is not set
# CONFIG_BACKLIGHT_PROGEAR is not set
+# CONFIG_BACKLIGHT_MBP_NVIDIA is not set
#
# Display device support
@@ -1456,15 +1512,7 @@ CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_LOGO_LINUX_VGA16 is not set
CONFIG_LOGO_LINUX_CLUT224=y
-
-#
-# Sound
-#
CONFIG_SOUND=y
-
-#
-# Advanced Linux Sound Architecture
-#
CONFIG_SND=y
CONFIG_SND_TIMER=y
CONFIG_SND_PCM=y
@@ -1482,20 +1530,14 @@ CONFIG_SND_VERBOSE_PROCFS=y
# CONFIG_SND_VERBOSE_PRINTK is not set
# CONFIG_SND_DEBUG is not set
CONFIG_SND_VMASTER=y
-
-#
-# Generic devices
-#
+CONFIG_SND_DRIVERS=y
# CONFIG_SND_PCSP is not set
# CONFIG_SND_DUMMY is not set
# CONFIG_SND_VIRMIDI is not set
# CONFIG_SND_MTPAV is not set
# CONFIG_SND_SERIAL_U16550 is not set
# CONFIG_SND_MPU401 is not set
-
-#
-# PCI devices
-#
+CONFIG_SND_PCI=y
# CONFIG_SND_AD1889 is not set
# CONFIG_SND_ALS300 is not set
# CONFIG_SND_ALS4000 is not set
@@ -1568,36 +1610,14 @@ CONFIG_SND_HDA_GENERIC=y
# CONFIG_SND_VIRTUOSO is not set
# CONFIG_SND_VX222 is not set
# CONFIG_SND_YMFPCI is not set
-
-#
-# USB devices
-#
+CONFIG_SND_USB=y
# CONFIG_SND_USB_AUDIO is not set
# CONFIG_SND_USB_USX2Y is not set
# CONFIG_SND_USB_CAIAQ is not set
-
-#
-# PCMCIA devices
-#
+CONFIG_SND_PCMCIA=y
# CONFIG_SND_VXPOCKET is not set
# CONFIG_SND_PDAUDIOCF is not set
-
-#
-# System on Chip audio support
-#
# CONFIG_SND_SOC is not set
-
-#
-# ALSA SoC audio for Freescale SOCs
-#
-
-#
-# SoC Audio for the Texas Instruments OMAP
-#
-
-#
-# Open Sound System
-#
# CONFIG_SOUND_PRIME is not set
CONFIG_HID_SUPPORT=y
CONFIG_HID=y
@@ -1633,6 +1653,7 @@ CONFIG_USB_DEVICEFS=y
# CONFIG_USB_DYNAMIC_MINORS is not set
CONFIG_USB_SUSPEND=y
# CONFIG_USB_OTG is not set
+CONFIG_USB_MON=y
#
# USB Host Controller Drivers
@@ -1656,6 +1677,7 @@ CONFIG_USB_UHCI_HCD=y
#
# CONFIG_USB_ACM is not set
CONFIG_USB_PRINTER=y
+# CONFIG_USB_WDM is not set
#
# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
@@ -1677,6 +1699,7 @@ CONFIG_USB_STORAGE=y
# CONFIG_USB_STORAGE_ALAUDA is not set
# CONFIG_USB_STORAGE_ONETOUCH is not set
# CONFIG_USB_STORAGE_KARMA is not set
+# CONFIG_USB_STORAGE_SIERRA is not set
# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set
CONFIG_USB_LIBUSUAL=y
@@ -1685,7 +1708,6 @@ CONFIG_USB_LIBUSUAL=y
#
# CONFIG_USB_MDC800 is not set
# CONFIG_USB_MICROTEK is not set
-CONFIG_USB_MON=y
#
# USB port drivers
@@ -1698,7 +1720,6 @@ CONFIG_USB_MON=y
# CONFIG_USB_EMI62 is not set
# CONFIG_USB_EMI26 is not set
# CONFIG_USB_ADUTUX is not set
-# CONFIG_USB_AUERSWALD is not set
# CONFIG_USB_RIO500 is not set
# CONFIG_USB_LEGOTOWER is not set
# CONFIG_USB_LCD is not set
@@ -1715,6 +1736,7 @@ CONFIG_USB_MON=y
# CONFIG_USB_TRANCEVIBRATOR is not set
# CONFIG_USB_IOWARRIOR is not set
# CONFIG_USB_TEST is not set
+# CONFIG_USB_ISIGHTFW is not set
# CONFIG_USB_GADGET is not set
# CONFIG_MMC is not set
# CONFIG_MEMSTICK is not set
@@ -1724,7 +1746,9 @@ CONFIG_LEDS_CLASS=y
#
# LED drivers
#
+# CONFIG_LEDS_PCA9532 is not set
# CONFIG_LEDS_CLEVO_MAIL is not set
+# CONFIG_LEDS_PCA955X is not set
#
# LED Triggers
@@ -1770,6 +1794,7 @@ CONFIG_RTC_INTF_DEV=y
# CONFIG_RTC_DRV_PCF8583 is not set
# CONFIG_RTC_DRV_M41T80 is not set
# CONFIG_RTC_DRV_S35390A is not set
+# CONFIG_RTC_DRV_FM3130 is not set
#
# SPI RTC drivers
@@ -1802,11 +1827,13 @@ CONFIG_DMADEVICES=y
# Firmware Drivers
#
# CONFIG_EDD is not set
+CONFIG_FIRMWARE_MEMMAP=y
CONFIG_EFI_VARS=y
# CONFIG_DELL_RBU is not set
# CONFIG_DCDBAS is not set
CONFIG_DMIID=y
-# CONFIG_ISCSI_IBFT_FIND is not set
+CONFIG_ISCSI_IBFT_FIND=y
+CONFIG_ISCSI_IBFT=y
#
# File systems
@@ -1886,14 +1913,27 @@ CONFIG_HUGETLB_PAGE=y
# CONFIG_CRAMFS is not set
# CONFIG_VXFS_FS is not set
# CONFIG_MINIX_FS is not set
+# CONFIG_OMFS_FS is not set
# CONFIG_HPFS_FS is not set
# CONFIG_QNX4FS_FS is not set
# CONFIG_ROMFS_FS is not set
# CONFIG_SYSV_FS is not set
# CONFIG_UFS_FS is not set
CONFIG_NETWORK_FILESYSTEMS=y
-# CONFIG_NFS_FS is not set
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
# CONFIG_NFSD is not set
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_ACL_SUPPORT=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+CONFIG_SUNRPC_GSS=y
+CONFIG_RPCSEC_GSS_KRB5=y
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
# CONFIG_SMB_FS is not set
# CONFIG_CIFS is not set
# CONFIG_NCP_FS is not set
@@ -1967,9 +2007,9 @@ CONFIG_NLS_UTF8=y
# Kernel hacking
#
CONFIG_TRACE_IRQFLAGS_SUPPORT=y
-# CONFIG_PRINTK_TIME is not set
-# CONFIG_ENABLE_WARN_DEPRECATED is not set
-# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_PRINTK_TIME=y
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
CONFIG_FRAME_WARN=2048
CONFIG_MAGIC_SYSRQ=y
# CONFIG_UNUSED_SYMBOLS is not set
@@ -1998,6 +2038,7 @@ CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_INFO is not set
# CONFIG_DEBUG_VM is not set
# CONFIG_DEBUG_WRITECOUNT is not set
+CONFIG_DEBUG_MEMORY_INIT=y
# CONFIG_DEBUG_LIST is not set
# CONFIG_DEBUG_SG is not set
CONFIG_FRAME_POINTER=y
@@ -2008,11 +2049,20 @@ CONFIG_FRAME_POINTER=y
# CONFIG_LKDTM is not set
# CONFIG_FAULT_INJECTION is not set
# CONFIG_LATENCYTOP is not set
+CONFIG_SYSCTL_SYSCALL_CHECK=y
+CONFIG_HAVE_FTRACE=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+# CONFIG_FTRACE is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SYSPROF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
# CONFIG_SAMPLES is not set
-# CONFIG_KGDB is not set
CONFIG_HAVE_ARCH_KGDB=y
+# CONFIG_KGDB is not set
# CONFIG_STRICT_DEVMEM is not set
+CONFIG_X86_VERBOSE_BOOTUP=y
CONFIG_EARLY_PRINTK=y
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_DEBUG_STACK_USAGE=y
@@ -2023,8 +2073,8 @@ CONFIG_DEBUG_RODATA=y
# CONFIG_DIRECT_GBPAGES is not set
# CONFIG_DEBUG_RODATA_TEST is not set
CONFIG_DEBUG_NX_TEST=m
-CONFIG_X86_MPPARSE=y
# CONFIG_IOMMU_DEBUG is not set
+# CONFIG_MMIOTRACE is not set
CONFIG_IO_DELAY_TYPE_0X80=0
CONFIG_IO_DELAY_TYPE_0XED=1
CONFIG_IO_DELAY_TYPE_UDELAY=2
@@ -2036,6 +2086,7 @@ CONFIG_IO_DELAY_0X80=y
CONFIG_DEFAULT_IO_DELAY_TYPE=0
CONFIG_DEBUG_BOOT_PARAMS=y
# CONFIG_CPA_DEBUG is not set
+CONFIG_OPTIMIZE_INLINING=y
#
# Security options
@@ -2045,7 +2096,6 @@ CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
# CONFIG_SECURITY_NETWORK_XFRM is not set
-CONFIG_SECURITY_CAPABILITIES=y
CONFIG_SECURITY_FILE_CAPABILITIES=y
# CONFIG_SECURITY_ROOTPLUG is not set
CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536
@@ -2106,6 +2156,10 @@ CONFIG_CRYPTO_HMAC=y
# CONFIG_CRYPTO_MD4 is not set
CONFIG_CRYPTO_MD5=y
# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
CONFIG_CRYPTO_SHA1=y
# CONFIG_CRYPTO_SHA256 is not set
# CONFIG_CRYPTO_SHA512 is not set
@@ -2155,6 +2209,7 @@ CONFIG_GENERIC_FIND_FIRST_BIT=y
CONFIG_GENERIC_FIND_NEXT_BIT=y
# CONFIG_CRC_CCITT is not set
# CONFIG_CRC16 is not set
+CONFIG_CRC_T10DIF=y
# CONFIG_CRC_ITU_T is not set
CONFIG_CRC32=y
# CONFIG_CRC7 is not set
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index a0e1dbe67dc..127ec3f0721 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -85,8 +85,10 @@ static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
dump->regs.ax = regs->ax;
dump->regs.ds = current->thread.ds;
dump->regs.es = current->thread.es;
- asm("movl %%fs,%0" : "=r" (fs)); dump->regs.fs = fs;
- asm("movl %%gs,%0" : "=r" (gs)); dump->regs.gs = gs;
+ savesegment(fs, fs);
+ dump->regs.fs = fs;
+ savesegment(gs, gs);
+ dump->regs.gs = gs;
dump->regs.orig_ax = regs->orig_ax;
dump->regs.ip = regs->ip;
dump->regs.cs = regs->cs;
@@ -430,8 +432,9 @@ beyond_if:
current->mm->start_stack =
(unsigned long)create_aout_tables((char __user *)bprm->p, bprm);
/* start thread */
- asm volatile("movl %0,%%fs" :: "r" (0)); \
- asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS));
+ loadsegment(fs, 0);
+ loadsegment(ds, __USER32_DS);
+ loadsegment(es, __USER32_DS);
load_gs_index(0);
(regs)->ip = ex.a_entry;
(regs)->sp = current->mm->start_stack;
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 20af4c79579..f1a2ac777fa 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -206,7 +206,7 @@ struct rt_sigframe
{ unsigned int cur; \
unsigned short pre; \
err |= __get_user(pre, &sc->seg); \
- asm volatile("movl %%" #seg ",%0" : "=r" (cur)); \
+ savesegment(seg, cur); \
pre |= mask; \
if (pre != cur) loadsegment(seg, pre); }
@@ -235,7 +235,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
*/
err |= __get_user(gs, &sc->gs);
gs |= 3;
- asm("movl %%gs,%0" : "=r" (oldgs));
+ savesegment(gs, oldgs);
if (gs != oldgs)
load_gs_index(gs);
@@ -355,14 +355,13 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
{
int tmp, err = 0;
- tmp = 0;
- __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp));
+ savesegment(gs, tmp);
err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
- __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp));
+ savesegment(fs, tmp);
err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
- __asm__("movl %%ds,%0" : "=r"(tmp): "0"(tmp));
+ savesegment(ds, tmp);
err |= __put_user(tmp, (unsigned int __user *)&sc->ds);
- __asm__("movl %%es,%0" : "=r"(tmp): "0"(tmp));
+ savesegment(es, tmp);
err |= __put_user(tmp, (unsigned int __user *)&sc->es);
err |= __put_user((u32)regs->di, &sc->di);
@@ -498,8 +497,8 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
regs->dx = 0;
regs->cx = 0;
- asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
- asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
+ loadsegment(ds, __USER32_DS);
+ loadsegment(es, __USER32_DS);
regs->cs = __USER32_CS;
regs->ss = __USER32_DS;
@@ -591,8 +590,8 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->dx = (unsigned long) &frame->info;
regs->cx = (unsigned long) &frame->uc;
- asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
- asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
+ loadsegment(ds, __USER32_DS);
+ loadsegment(es, __USER32_DS);
regs->cs = __USER32_CS;
regs->ss = __USER32_DS;
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index d3c64088b98..beda4232ce6 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -556,15 +556,6 @@ asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig,
return ret;
}
-/* These are here just in case some old ia32 binary calls it. */
-asmlinkage long sys32_pause(void)
-{
- current->state = TASK_INTERRUPTIBLE;
- schedule();
- return -ERESTARTNOHAND;
-}
-
-
#ifdef CONFIG_SYSCTL_SYSCALL
struct sysctl_ia32 {
unsigned int name;
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index d9770a56511..c2ac1b4515a 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -58,7 +58,6 @@ EXPORT_SYMBOL(acpi_disabled);
#ifdef CONFIG_X86_64
#include <asm/proto.h>
-#include <asm/genapic.h>
#else /* X86 */
@@ -158,6 +157,16 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
struct acpi_mcfg_allocation *pci_mmcfg_config;
int pci_mmcfg_config_num;
+static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
+
+static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg)
+{
+ if (!strcmp(mcfg->header.oem_id, "SGI"))
+ acpi_mcfg_64bit_base_addr = TRUE;
+
+ return 0;
+}
+
int __init acpi_parse_mcfg(struct acpi_table_header *header)
{
struct acpi_table_mcfg *mcfg;
@@ -190,8 +199,12 @@ int __init acpi_parse_mcfg(struct acpi_table_header *header)
}
memcpy(pci_mmcfg_config, &mcfg[1], config_size);
+
+ acpi_mcfg_oem_check(mcfg);
+
for (i = 0; i < pci_mmcfg_config_num; ++i) {
- if (pci_mmcfg_config[i].address > 0xFFFFFFFF) {
+ if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) &&
+ !acpi_mcfg_64bit_base_addr) {
printk(KERN_ERR PREFIX
"MMCONFIG not in low 4GB of memory\n");
kfree(pci_mmcfg_config);
@@ -1589,6 +1602,14 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
*/
{
.callback = dmi_ignore_irq0_timer_override,
+ .ident = "HP nx6115 laptop",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6115"),
+ },
+ },
+ {
+ .callback = dmi_ignore_irq0_timer_override,
.ident = "HP NX6125 laptop",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
@@ -1603,6 +1624,14 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"),
},
},
+ {
+ .callback = dmi_ignore_irq0_timer_override,
+ .ident = "HP 6715b laptop",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"),
+ },
+ },
{}
};
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index fa2161d5003..426e5d91b63 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -20,7 +20,7 @@ unsigned long acpi_realmode_flags;
/* address in low memory of the wakeup routine. */
static unsigned long acpi_realmode;
-#ifdef CONFIG_64BIT
+#if defined(CONFIG_SMP) && defined(CONFIG_64BIT)
static char temp_stack[10240];
#endif
@@ -86,7 +86,7 @@ int acpi_save_state_mem(void)
#endif /* !CONFIG_64BIT */
header->pmode_cr0 = read_cr0();
- header->pmode_cr4 = read_cr4();
+ header->pmode_cr4 = read_cr4_safe();
header->realmode_flags = acpi_realmode_flags;
header->real_magic = 0x12345678;
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 2763cb37b55..fb04e49776b 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -145,35 +145,25 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
extern char __vsyscall_0;
const unsigned char *const *find_nop_table(void)
{
- return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
- boot_cpu_data.x86 < 6 ? k8_nops : p6_nops;
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+ boot_cpu_has(X86_FEATURE_NOPL))
+ return p6_nops;
+ else
+ return k8_nops;
}
#else /* CONFIG_X86_64 */
-static const struct nop {
- int cpuid;
- const unsigned char *const *noptable;
-} noptypes[] = {
- { X86_FEATURE_K8, k8_nops },
- { X86_FEATURE_K7, k7_nops },
- { X86_FEATURE_P4, p6_nops },
- { X86_FEATURE_P3, p6_nops },
- { -1, NULL }
-};
-
const unsigned char *const *find_nop_table(void)
{
- const unsigned char *const *noptable = intel_nops;
- int i;
-
- for (i = 0; noptypes[i].cpuid >= 0; i++) {
- if (boot_cpu_has(noptypes[i].cpuid)) {
- noptable = noptypes[i].noptable;
- break;
- }
- }
- return noptable;
+ if (boot_cpu_has(X86_FEATURE_K8))
+ return k8_nops;
+ else if (boot_cpu_has(X86_FEATURE_K7))
+ return k7_nops;
+ else if (boot_cpu_has(X86_FEATURE_NOPL))
+ return p6_nops;
+ else
+ return intel_nops;
}
#endif /* CONFIG_X86_64 */
@@ -241,25 +231,25 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
continue;
if (*ptr > text_end)
continue;
- text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */
+ /* turn DS segment override prefix into lock prefix */
+ text_poke(*ptr, ((unsigned char []){0xf0}), 1);
};
}
static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
{
u8 **ptr;
- char insn[1];
if (noreplace_smp)
return;
- add_nops(insn, 1);
for (ptr = start; ptr < end; ptr++) {
if (*ptr < text)
continue;
if (*ptr > text_end)
continue;
- text_poke(*ptr, insn, 1);
+ /* turn lock prefix into DS segment override prefix */
+ text_poke(*ptr, ((unsigned char []){0x3E}), 1);
};
}
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 22d7d050905..34e4d112b1e 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -33,6 +33,10 @@
static DEFINE_RWLOCK(amd_iommu_devtable_lock);
+/* A list of preallocated protection domains */
+static LIST_HEAD(iommu_pd_list);
+static DEFINE_SPINLOCK(iommu_pd_list_lock);
+
/*
* general struct to manage commands send to an IOMMU
*/
@@ -51,6 +55,102 @@ static int iommu_has_npcache(struct amd_iommu *iommu)
/****************************************************************************
*
+ * Interrupt handling functions
+ *
+ ****************************************************************************/
+
+static void iommu_print_event(void *__evt)
+{
+ u32 *event = __evt;
+ int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK;
+ int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
+ int domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK;
+ int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
+ u64 address = (u64)(((u64)event[3]) << 32) | event[2];
+
+ printk(KERN_ERR "AMD IOMMU: Event logged [");
+
+ switch (type) {
+ case EVENT_TYPE_ILL_DEV:
+ printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x "
+ "address=0x%016llx flags=0x%04x]\n",
+ PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+ address, flags);
+ break;
+ case EVENT_TYPE_IO_FAULT:
+ printk("IO_PAGE_FAULT device=%02x:%02x.%x "
+ "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+ PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+ domid, address, flags);
+ break;
+ case EVENT_TYPE_DEV_TAB_ERR:
+ printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
+ "address=0x%016llx flags=0x%04x]\n",
+ PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+ address, flags);
+ break;
+ case EVENT_TYPE_PAGE_TAB_ERR:
+ printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
+ "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+ PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+ domid, address, flags);
+ break;
+ case EVENT_TYPE_ILL_CMD:
+ printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
+ break;
+ case EVENT_TYPE_CMD_HARD_ERR:
+ printk("COMMAND_HARDWARE_ERROR address=0x%016llx "
+ "flags=0x%04x]\n", address, flags);
+ break;
+ case EVENT_TYPE_IOTLB_INV_TO:
+ printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
+ "address=0x%016llx]\n",
+ PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+ address);
+ break;
+ case EVENT_TYPE_INV_DEV_REQ:
+ printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
+ "address=0x%016llx flags=0x%04x]\n",
+ PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+ address, flags);
+ break;
+ default:
+ printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type);
+ }
+}
+
+static void iommu_poll_events(struct amd_iommu *iommu)
+{
+ u32 head, tail;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+
+ head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
+ tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
+
+ while (head != tail) {
+ iommu_print_event(iommu->evt_buf + head);
+ head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
+ }
+
+ writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+irqreturn_t amd_iommu_int_handler(int irq, void *data)
+{
+ struct amd_iommu *iommu;
+
+ list_for_each_entry(iommu, &amd_iommu_list, list)
+ iommu_poll_events(iommu);
+
+ return IRQ_HANDLED;
+}
+
+/****************************************************************************
+ *
* IOMMU command queuing functions
*
****************************************************************************/
@@ -65,7 +165,7 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
u8 *target;
tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
- target = (iommu->cmd_buf + tail);
+ target = iommu->cmd_buf + tail;
memcpy_toio(target, cmd, sizeof(*cmd));
tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
@@ -101,32 +201,39 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
*/
static int iommu_completion_wait(struct amd_iommu *iommu)
{
- int ret;
+ int ret = 0, ready = 0;
+ unsigned status = 0;
struct iommu_cmd cmd;
- volatile u64 ready = 0;
- unsigned long ready_phys = virt_to_phys(&ready);
- unsigned long i = 0;
+ unsigned long flags, i = 0;
memset(&cmd, 0, sizeof(cmd));
- cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK;
- cmd.data[1] = upper_32_bits(ready_phys);
- cmd.data[2] = 1; /* value written to 'ready' */
+ cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
iommu->need_sync = 0;
- ret = iommu_queue_command(iommu, &cmd);
+ spin_lock_irqsave(&iommu->lock, flags);
+
+ ret = __iommu_queue_command(iommu, &cmd);
if (ret)
- return ret;
+ goto out;
while (!ready && (i < EXIT_LOOP_COUNT)) {
++i;
- cpu_relax();
+ /* wait for the bit to become one */
+ status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
+ ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
}
+ /* set bit back to zero */
+ status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
+ writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
+
if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit()))
printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n");
+out:
+ spin_unlock_irqrestore(&iommu->lock, flags);
return 0;
}
@@ -137,6 +244,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
{
struct iommu_cmd cmd;
+ int ret;
BUG_ON(iommu == NULL);
@@ -144,9 +252,11 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
cmd.data[0] = devid;
+ ret = iommu_queue_command(iommu, &cmd);
+
iommu->need_sync = 1;
- return iommu_queue_command(iommu, &cmd);
+ return ret;
}
/*
@@ -156,21 +266,24 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
u64 address, u16 domid, int pde, int s)
{
struct iommu_cmd cmd;
+ int ret;
memset(&cmd, 0, sizeof(cmd));
address &= PAGE_MASK;
CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
cmd.data[1] |= domid;
- cmd.data[2] = LOW_U32(address);
+ cmd.data[2] = lower_32_bits(address);
cmd.data[3] = upper_32_bits(address);
if (s) /* size bit - we flush more than one 4kb page */
cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
+ ret = iommu_queue_command(iommu, &cmd);
+
iommu->need_sync = 1;
- return iommu_queue_command(iommu, &cmd);
+ return ret;
}
/*
@@ -200,6 +313,14 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
return 0;
}
+/* Flush the whole IO/TLB for a given protection domain */
+static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
+{
+ u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
+
+ iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
+}
+
/****************************************************************************
*
* The functions below are used the create the page table mappings for
@@ -359,11 +480,6 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
* efficient allocator.
*
****************************************************************************/
-static unsigned long dma_mask_to_pages(unsigned long mask)
-{
- return (mask >> PAGE_SHIFT) +
- (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT);
-}
/*
* The address allocator core function.
@@ -372,25 +488,31 @@ static unsigned long dma_mask_to_pages(unsigned long mask)
*/
static unsigned long dma_ops_alloc_addresses(struct device *dev,
struct dma_ops_domain *dom,
- unsigned int pages)
+ unsigned int pages,
+ unsigned long align_mask,
+ u64 dma_mask)
{
- unsigned long limit = dma_mask_to_pages(*dev->dma_mask);
+ unsigned long limit;
unsigned long address;
- unsigned long size = dom->aperture_size >> PAGE_SHIFT;
unsigned long boundary_size;
boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
PAGE_SIZE) >> PAGE_SHIFT;
- limit = limit < size ? limit : size;
+ limit = iommu_device_max_index(dom->aperture_size >> PAGE_SHIFT, 0,
+ dma_mask >> PAGE_SHIFT);
- if (dom->next_bit >= limit)
+ if (dom->next_bit >= limit) {
dom->next_bit = 0;
+ dom->need_flush = true;
+ }
address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
- 0 , boundary_size, 0);
- if (address == -1)
+ 0 , boundary_size, align_mask);
+ if (address == -1) {
address = iommu_area_alloc(dom->bitmap, limit, 0, pages,
- 0, boundary_size, 0);
+ 0, boundary_size, align_mask);
+ dom->need_flush = true;
+ }
if (likely(address != -1)) {
dom->next_bit = address + pages;
@@ -456,7 +578,7 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
if (start_page + pages > last_page)
pages = last_page - start_page;
- set_bit_string(dom->bitmap, start_page, pages);
+ iommu_area_reserve(dom->bitmap, start_page, pages);
}
static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
@@ -550,6 +672,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
dma_dom->bitmap[0] = 1;
dma_dom->next_bit = 0;
+ dma_dom->need_flush = false;
+ dma_dom->target_dev = 0xffff;
+
/* Intialize the exclusion range if necessary */
if (iommu->exclusion_start &&
iommu->exclusion_start < dma_dom->aperture_size) {
@@ -620,12 +745,13 @@ static void set_device_domain(struct amd_iommu *iommu,
u64 pte_root = virt_to_phys(domain->pt_root);
- pte_root |= (domain->mode & 0x07) << 9;
- pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | 2;
+ pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
+ << DEV_ENTRY_MODE_SHIFT;
+ pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
write_lock_irqsave(&amd_iommu_devtable_lock, flags);
- amd_iommu_dev_table[devid].data[0] = pte_root;
- amd_iommu_dev_table[devid].data[1] = pte_root >> 32;
+ amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
+ amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
amd_iommu_dev_table[devid].data[2] = domain->id;
amd_iommu_pd_table[devid] = domain;
@@ -643,6 +769,45 @@ static void set_device_domain(struct amd_iommu *iommu,
*****************************************************************************/
/*
+ * This function checks if the driver got a valid device from the caller to
+ * avoid dereferencing invalid pointers.
+ */
+static bool check_device(struct device *dev)
+{
+ if (!dev || !dev->dma_mask)
+ return false;
+
+ return true;
+}
+
+/*
+ * In this function the list of preallocated protection domains is traversed to
+ * find the domain for a specific device
+ */
+static struct dma_ops_domain *find_protection_domain(u16 devid)
+{
+ struct dma_ops_domain *entry, *ret = NULL;
+ unsigned long flags;
+
+ if (list_empty(&iommu_pd_list))
+ return NULL;
+
+ spin_lock_irqsave(&iommu_pd_list_lock, flags);
+
+ list_for_each_entry(entry, &iommu_pd_list, list) {
+ if (entry->target_dev == devid) {
+ ret = entry;
+ list_del(&ret->list);
+ break;
+ }
+ }
+
+ spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
+
+ return ret;
+}
+
+/*
* In the dma_ops path we only have the struct device. This function
* finds the corresponding IOMMU, the protection domain and the
* requestor id for a given device.
@@ -658,27 +823,30 @@ static int get_device_resources(struct device *dev,
struct pci_dev *pcidev;
u16 _bdf;
- BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask);
+ *iommu = NULL;
+ *domain = NULL;
+ *bdf = 0xffff;
+
+ if (dev->bus != &pci_bus_type)
+ return 0;
pcidev = to_pci_dev(dev);
_bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
/* device not translated by any IOMMU in the system? */
- if (_bdf > amd_iommu_last_bdf) {
- *iommu = NULL;
- *domain = NULL;
- *bdf = 0xffff;
+ if (_bdf > amd_iommu_last_bdf)
return 0;
- }
*bdf = amd_iommu_alias_table[_bdf];
*iommu = amd_iommu_rlookup_table[*bdf];
if (*iommu == NULL)
return 0;
- dma_dom = (*iommu)->default_dom;
*domain = domain_for_device(*bdf);
if (*domain == NULL) {
+ dma_dom = find_protection_domain(*bdf);
+ if (!dma_dom)
+ dma_dom = (*iommu)->default_dom;
*domain = &dma_dom->domain;
set_device_domain(*iommu, *domain, *bdf);
printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
@@ -757,17 +925,24 @@ static dma_addr_t __map_single(struct device *dev,
struct dma_ops_domain *dma_dom,
phys_addr_t paddr,
size_t size,
- int dir)
+ int dir,
+ bool align,
+ u64 dma_mask)
{
dma_addr_t offset = paddr & ~PAGE_MASK;
dma_addr_t address, start;
unsigned int pages;
+ unsigned long align_mask = 0;
int i;
pages = iommu_num_pages(paddr, size);
paddr &= PAGE_MASK;
- address = dma_ops_alloc_addresses(dev, dma_dom, pages);
+ if (align)
+ align_mask = (1UL << get_order(size)) - 1;
+
+ address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
+ dma_mask);
if (unlikely(address == bad_dma_address))
goto out;
@@ -779,6 +954,12 @@ static dma_addr_t __map_single(struct device *dev,
}
address += offset;
+ if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
+ iommu_flush_tlb(iommu, dma_dom->domain.id);
+ dma_dom->need_flush = false;
+ } else if (unlikely(iommu_has_npcache(iommu)))
+ iommu_flush_pages(iommu, dma_dom->domain.id, address, size);
+
out:
return address;
}
@@ -809,6 +990,9 @@ static void __unmap_single(struct amd_iommu *iommu,
}
dma_ops_free_addresses(dma_dom, dma_addr, pages);
+
+ if (amd_iommu_unmap_flush)
+ iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size);
}
/*
@@ -822,6 +1006,12 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
struct protection_domain *domain;
u16 devid;
dma_addr_t addr;
+ u64 dma_mask;
+
+ if (!check_device(dev))
+ return bad_dma_address;
+
+ dma_mask = *dev->dma_mask;
get_device_resources(dev, &iommu, &domain, &devid);
@@ -830,14 +1020,12 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
return (dma_addr_t)paddr;
spin_lock_irqsave(&domain->lock, flags);
- addr = __map_single(dev, iommu, domain->priv, paddr, size, dir);
+ addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false,
+ dma_mask);
if (addr == bad_dma_address)
goto out;
- if (iommu_has_npcache(iommu))
- iommu_flush_pages(iommu, domain->id, addr, size);
-
- if (iommu->need_sync)
+ if (unlikely(iommu->need_sync))
iommu_completion_wait(iommu);
out:
@@ -857,7 +1045,8 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
struct protection_domain *domain;
u16 devid;
- if (!get_device_resources(dev, &iommu, &domain, &devid))
+ if (!check_device(dev) ||
+ !get_device_resources(dev, &iommu, &domain, &devid))
/* device not handled by any AMD IOMMU */
return;
@@ -865,9 +1054,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
__unmap_single(iommu, domain->priv, dma_addr, size, dir);
- iommu_flush_pages(iommu, domain->id, dma_addr, size);
-
- if (iommu->need_sync)
+ if (unlikely(iommu->need_sync))
iommu_completion_wait(iommu);
spin_unlock_irqrestore(&domain->lock, flags);
@@ -906,6 +1093,12 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
struct scatterlist *s;
phys_addr_t paddr;
int mapped_elems = 0;
+ u64 dma_mask;
+
+ if (!check_device(dev))
+ return 0;
+
+ dma_mask = *dev->dma_mask;
get_device_resources(dev, &iommu, &domain, &devid);
@@ -918,19 +1111,17 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
paddr = sg_phys(s);
s->dma_address = __map_single(dev, iommu, domain->priv,
- paddr, s->length, dir);
+ paddr, s->length, dir, false,
+ dma_mask);
if (s->dma_address) {
s->dma_length = s->length;
mapped_elems++;
} else
goto unmap;
- if (iommu_has_npcache(iommu))
- iommu_flush_pages(iommu, domain->id, s->dma_address,
- s->dma_length);
}
- if (iommu->need_sync)
+ if (unlikely(iommu->need_sync))
iommu_completion_wait(iommu);
out:
@@ -964,7 +1155,8 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
u16 devid;
int i;
- if (!get_device_resources(dev, &iommu, &domain, &devid))
+ if (!check_device(dev) ||
+ !get_device_resources(dev, &iommu, &domain, &devid))
return;
spin_lock_irqsave(&domain->lock, flags);
@@ -972,12 +1164,10 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
for_each_sg(sglist, s, nelems, i) {
__unmap_single(iommu, domain->priv, s->dma_address,
s->dma_length, dir);
- iommu_flush_pages(iommu, domain->id, s->dma_address,
- s->dma_length);
s->dma_address = s->dma_length = 0;
}
- if (iommu->need_sync)
+ if (unlikely(iommu->need_sync))
iommu_completion_wait(iommu);
spin_unlock_irqrestore(&domain->lock, flags);
@@ -995,25 +1185,33 @@ static void *alloc_coherent(struct device *dev, size_t size,
struct protection_domain *domain;
u16 devid;
phys_addr_t paddr;
+ u64 dma_mask = dev->coherent_dma_mask;
+
+ if (!check_device(dev))
+ return NULL;
+
+ if (!get_device_resources(dev, &iommu, &domain, &devid))
+ flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
+ flag |= __GFP_ZERO;
virt_addr = (void *)__get_free_pages(flag, get_order(size));
if (!virt_addr)
return 0;
- memset(virt_addr, 0, size);
paddr = virt_to_phys(virt_addr);
- get_device_resources(dev, &iommu, &domain, &devid);
-
if (!iommu || !domain) {
*dma_addr = (dma_addr_t)paddr;
return virt_addr;
}
+ if (!dma_mask)
+ dma_mask = *dev->dma_mask;
+
spin_lock_irqsave(&domain->lock, flags);
*dma_addr = __map_single(dev, iommu, domain->priv, paddr,
- size, DMA_BIDIRECTIONAL);
+ size, DMA_BIDIRECTIONAL, true, dma_mask);
if (*dma_addr == bad_dma_address) {
free_pages((unsigned long)virt_addr, get_order(size));
@@ -1021,10 +1219,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
goto out;
}
- if (iommu_has_npcache(iommu))
- iommu_flush_pages(iommu, domain->id, *dma_addr, size);
-
- if (iommu->need_sync)
+ if (unlikely(iommu->need_sync))
iommu_completion_wait(iommu);
out:
@@ -1035,8 +1230,6 @@ out:
/*
* The exported free_coherent function for dma_ops.
- * FIXME: fix the generic x86 DMA layer so that it actually calls that
- * function.
*/
static void free_coherent(struct device *dev, size_t size,
void *virt_addr, dma_addr_t dma_addr)
@@ -1046,6 +1239,9 @@ static void free_coherent(struct device *dev, size_t size,
struct protection_domain *domain;
u16 devid;
+ if (!check_device(dev))
+ return;
+
get_device_resources(dev, &iommu, &domain, &devid);
if (!iommu || !domain)
@@ -1054,9 +1250,8 @@ static void free_coherent(struct device *dev, size_t size,
spin_lock_irqsave(&domain->lock, flags);
__unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
- iommu_flush_pages(iommu, domain->id, dma_addr, size);
- if (iommu->need_sync)
+ if (unlikely(iommu->need_sync))
iommu_completion_wait(iommu);
spin_unlock_irqrestore(&domain->lock, flags);
@@ -1066,6 +1261,30 @@ free_mem:
}
/*
+ * This function is called by the DMA layer to find out if we can handle a
+ * particular device. It is part of the dma_ops.
+ */
+static int amd_iommu_dma_supported(struct device *dev, u64 mask)
+{
+ u16 bdf;
+ struct pci_dev *pcidev;
+
+ /* No device or no PCI device */
+ if (!dev || dev->bus != &pci_bus_type)
+ return 0;
+
+ pcidev = to_pci_dev(dev);
+
+ bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
+
+ /* Out of our scope? */
+ if (bdf > amd_iommu_last_bdf)
+ return 0;
+
+ return 1;
+}
+
+/*
* The function for pre-allocating protection domains.
*
* If the driver core informs the DMA layer if a driver grabs a device
@@ -1094,10 +1313,9 @@ void prealloc_protection_domains(void)
if (!dma_dom)
continue;
init_unity_mappings_for_device(dma_dom, devid);
- set_device_domain(iommu, &dma_dom->domain, devid);
- printk(KERN_INFO "AMD IOMMU: Allocated domain %d for device ",
- dma_dom->domain.id);
- print_devid(devid, 1);
+ dma_dom->target_dev = devid;
+
+ list_add_tail(&dma_dom->list, &iommu_pd_list);
}
}
@@ -1108,6 +1326,7 @@ static struct dma_mapping_ops amd_iommu_dma_ops = {
.unmap_single = unmap_single,
.map_sg = map_sg,
.unmap_sg = unmap_sg,
+ .dma_supported = amd_iommu_dma_supported,
};
/*
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index d9a9da597e7..148fcfe22f1 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -22,6 +22,8 @@
#include <linux/gfp.h>
#include <linux/list.h>
#include <linux/sysdev.h>
+#include <linux/interrupt.h>
+#include <linux/msi.h>
#include <asm/pci-direct.h>
#include <asm/amd_iommu_types.h>
#include <asm/amd_iommu.h>
@@ -30,7 +32,6 @@
/*
* definitions for the ACPI scanning code
*/
-#define PCI_BUS(x) (((x) >> 8) & 0xff)
#define IVRS_HEADER_LENGTH 48
#define ACPI_IVHD_TYPE 0x10
@@ -121,6 +122,7 @@ LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
we find in ACPI */
unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
int amd_iommu_isolate; /* if 1, device isolation is enabled */
+bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
system */
@@ -234,7 +236,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
{
u32 ctrl;
- ctrl = (u64)readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
+ ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
ctrl &= ~(1 << bit);
writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
}
@@ -242,13 +244,23 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
/* Function to enable the hardware */
void __init iommu_enable(struct amd_iommu *iommu)
{
- printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at ");
- print_devid(iommu->devid, 0);
- printk(" cap 0x%hx\n", iommu->cap_ptr);
+ printk(KERN_INFO "AMD IOMMU: Enabling IOMMU "
+ "at %02x:%02x.%x cap 0x%hx\n",
+ iommu->dev->bus->number,
+ PCI_SLOT(iommu->dev->devfn),
+ PCI_FUNC(iommu->dev->devfn),
+ iommu->cap_ptr);
iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
}
+/* Function to enable IOMMU event logging and event interrupts */
+void __init iommu_enable_event_logging(struct amd_iommu *iommu)
+{
+ iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
+ iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
+}
+
/*
* mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
* the system has one.
@@ -286,6 +298,14 @@ static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
****************************************************************************/
/*
+ * This function calculates the length of a given IVHD entry
+ */
+static inline int ivhd_entry_length(u8 *ivhd)
+{
+ return 0x04 << (*ivhd >> 6);
+}
+
+/*
* This function reads the last device id the IOMMU has to handle from the PCI
* capability header for this IOMMU
*/
@@ -329,7 +349,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
default:
break;
}
- p += 0x04 << (*p >> 6);
+ p += ivhd_entry_length(p);
}
WARN_ON(p != end);
@@ -414,7 +434,32 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
static void __init free_command_buffer(struct amd_iommu *iommu)
{
- free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
+ free_pages((unsigned long)iommu->cmd_buf,
+ get_order(iommu->cmd_buf_size));
+}
+
+/* allocates the memory where the IOMMU will log its events to */
+static u8 * __init alloc_event_buffer(struct amd_iommu *iommu)
+{
+ u64 entry;
+ iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(EVT_BUFFER_SIZE));
+
+ if (iommu->evt_buf == NULL)
+ return NULL;
+
+ entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
+ memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
+ &entry, sizeof(entry));
+
+ iommu->evt_buf_size = EVT_BUFFER_SIZE;
+
+ return iommu->evt_buf;
+}
+
+static void __init free_event_buffer(struct amd_iommu *iommu)
+{
+ free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
}
/* sets a specific bit in the device table entry. */
@@ -487,19 +532,21 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
*/
static void __init init_iommu_from_pci(struct amd_iommu *iommu)
{
- int bus = PCI_BUS(iommu->devid);
- int dev = PCI_SLOT(iommu->devid);
- int fn = PCI_FUNC(iommu->devid);
int cap_ptr = iommu->cap_ptr;
- u32 range;
+ u32 range, misc;
- iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET);
+ pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
+ &iommu->cap);
+ pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET,
+ &range);
+ pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET,
+ &misc);
- range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
iommu->first_device = calc_devid(MMIO_GET_BUS(range),
MMIO_GET_FD(range));
iommu->last_device = calc_devid(MMIO_GET_BUS(range),
MMIO_GET_LD(range));
+ iommu->evt_msi_num = MMIO_MSI_NUM(misc);
}
/*
@@ -604,7 +651,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
}
- p += 0x04 << (e->type >> 6);
+ p += ivhd_entry_length(p);
}
}
@@ -622,6 +669,7 @@ static int __init init_iommu_devices(struct amd_iommu *iommu)
static void __init free_iommu_one(struct amd_iommu *iommu)
{
free_command_buffer(iommu);
+ free_event_buffer(iommu);
iommu_unmap_mmio_space(iommu);
}
@@ -649,8 +697,12 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
/*
* Copy data from ACPI table entry to the iommu struct
*/
- iommu->devid = h->devid;
+ iommu->dev = pci_get_bus_and_slot(PCI_BUS(h->devid), h->devid & 0xff);
+ if (!iommu->dev)
+ return 1;
+
iommu->cap_ptr = h->cap_ptr;
+ iommu->pci_seg = h->pci_seg;
iommu->mmio_phys = h->mmio_phys;
iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys);
if (!iommu->mmio_base)
@@ -661,10 +713,18 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
if (!iommu->cmd_buf)
return -ENOMEM;
+ iommu->evt_buf = alloc_event_buffer(iommu);
+ if (!iommu->evt_buf)
+ return -ENOMEM;
+
+ iommu->int_enabled = false;
+
init_iommu_from_pci(iommu);
init_iommu_from_acpi(iommu, h);
init_iommu_devices(iommu);
+ pci_enable_device(iommu->dev);
+
return 0;
}
@@ -706,6 +766,95 @@ static int __init init_iommu_all(struct acpi_table_header *table)
/****************************************************************************
*
+ * The following functions initialize the MSI interrupts for all IOMMUs
+ * in the system. Its a bit challenging because there could be multiple
+ * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per
+ * pci_dev.
+ *
+ ****************************************************************************/
+
+static int __init iommu_setup_msix(struct amd_iommu *iommu)
+{
+ struct amd_iommu *curr;
+ struct msix_entry entries[32]; /* only 32 supported by AMD IOMMU */
+ int nvec = 0, i;
+
+ list_for_each_entry(curr, &amd_iommu_list, list) {
+ if (curr->dev == iommu->dev) {
+ entries[nvec].entry = curr->evt_msi_num;
+ entries[nvec].vector = 0;
+ curr->int_enabled = true;
+ nvec++;
+ }
+ }
+
+ if (pci_enable_msix(iommu->dev, entries, nvec)) {
+ pci_disable_msix(iommu->dev);
+ return 1;
+ }
+
+ for (i = 0; i < nvec; ++i) {
+ int r = request_irq(entries->vector, amd_iommu_int_handler,
+ IRQF_SAMPLE_RANDOM,
+ "AMD IOMMU",
+ NULL);
+ if (r)
+ goto out_free;
+ }
+
+ return 0;
+
+out_free:
+ for (i -= 1; i >= 0; --i)
+ free_irq(entries->vector, NULL);
+
+ pci_disable_msix(iommu->dev);
+
+ return 1;
+}
+
+static int __init iommu_setup_msi(struct amd_iommu *iommu)
+{
+ int r;
+ struct amd_iommu *curr;
+
+ list_for_each_entry(curr, &amd_iommu_list, list) {
+ if (curr->dev == iommu->dev)
+ curr->int_enabled = true;
+ }
+
+
+ if (pci_enable_msi(iommu->dev))
+ return 1;
+
+ r = request_irq(iommu->dev->irq, amd_iommu_int_handler,
+ IRQF_SAMPLE_RANDOM,
+ "AMD IOMMU",
+ NULL);
+
+ if (r) {
+ pci_disable_msi(iommu->dev);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int __init iommu_init_msi(struct amd_iommu *iommu)
+{
+ if (iommu->int_enabled)
+ return 0;
+
+ if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSIX))
+ return iommu_setup_msix(iommu);
+ else if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
+ return iommu_setup_msi(iommu);
+
+ return 1;
+}
+
+/****************************************************************************
+ *
* The next functions belong to the third pass of parsing the ACPI
* table. In this last pass the memory mapping requirements are
* gathered (like exclusion and unity mapping reanges).
@@ -801,6 +950,20 @@ static int __init init_memory_definitions(struct acpi_table_header *table)
}
/*
+ * Init the device table to not allow DMA access for devices and
+ * suppress all page faults
+ */
+static void init_device_table(void)
+{
+ u16 devid;
+
+ for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
+ set_dev_entry_bit(devid, DEV_ENTRY_VALID);
+ set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
+ }
+}
+
+/*
* This function finally enables all IOMMUs found in the system after
* they have been initialized
*/
@@ -810,6 +973,8 @@ static void __init enable_iommus(void)
list_for_each_entry(iommu, &amd_iommu_list, list) {
iommu_set_exclusion_range(iommu);
+ iommu_init_msi(iommu);
+ iommu_enable_event_logging(iommu);
iommu_enable(iommu);
}
}
@@ -931,6 +1096,9 @@ int __init amd_iommu_init(void)
if (amd_iommu_pd_alloc_bitmap == NULL)
goto free;
+ /* init the device table */
+ init_device_table();
+
/*
* let all alias entries point to itself
*/
@@ -954,15 +1122,15 @@ int __init amd_iommu_init(void)
if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
goto free;
- ret = amd_iommu_init_dma_ops();
+ ret = sysdev_class_register(&amd_iommu_sysdev_class);
if (ret)
goto free;
- ret = sysdev_class_register(&amd_iommu_sysdev_class);
+ ret = sysdev_register(&device_amd_iommu);
if (ret)
goto free;
- ret = sysdev_register(&device_amd_iommu);
+ ret = amd_iommu_init_dma_ops();
if (ret)
goto free;
@@ -977,11 +1145,17 @@ int __init amd_iommu_init(void)
else
printk("disabled\n");
+ if (amd_iommu_unmap_flush)
+ printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n");
+ else
+ printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n");
+
out:
return ret;
free:
- free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1);
+ free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
+ get_order(MAX_DOMAIN_ID/8));
free_pages((unsigned long)amd_iommu_pd_table,
get_order(rlookup_table_size));
@@ -1039,8 +1213,10 @@ void __init amd_iommu_detect(void)
static int __init parse_amd_iommu_options(char *str)
{
for (; *str; ++str) {
- if (strcmp(str, "isolate") == 0)
+ if (strncmp(str, "isolate", 7) == 0)
amd_iommu_isolate = 1;
+ if (strncmp(str, "fullflush", 11) == 0)
+ amd_iommu_unmap_flush = true;
}
return 1;
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 44e21826db1..9a32b37ee2e 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -455,11 +455,11 @@ out:
force_iommu ||
valid_agp ||
fallback_aper_force) {
- printk(KERN_ERR
+ printk(KERN_INFO
"Your BIOS doesn't leave a aperture memory hole\n");
- printk(KERN_ERR
+ printk(KERN_INFO
"Please enable the IOMMU option in the BIOS setup\n");
- printk(KERN_ERR
+ printk(KERN_INFO
"This costs you %d MB of RAM\n",
32 << fallback_aper_order);
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index b8d80c29165..a91c57cb666 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -114,8 +114,6 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
static int enabled_via_apicbase;
static unsigned long apic_phys;
-unsigned int __cpuinitdata maxcpus = NR_CPUS;
-
/*
* Get the LAPIC version
@@ -1507,12 +1505,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
return;
}
- if (num_processors >= maxcpus) {
- printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
- " Processor ignored.\n", maxcpus);
- return;
- }
-
num_processors++;
cpus_complement(tmp_map, cpu_present_map);
cpu = first_cpu(tmp_map);
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 37e037606f3..53898b65a6a 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -98,7 +98,6 @@ static struct clock_event_device lapic_clockevent = {
static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
static unsigned long apic_phys;
-unsigned int __cpuinitdata maxcpus = NR_CPUS;
unsigned long mp_lapic_addr;
@@ -1444,12 +1443,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
return;
}
- if (num_processors >= maxcpus) {
- printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
- " Processor ignored.\n", maxcpus);
- return;
- }
-
num_processors++;
cpus_complement(tmp_map, cpu_present_map);
cpu = first_cpu(tmp_map);
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 9ee24e6bc4b..5145a6e72bb 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -228,12 +228,12 @@
#include <linux/suspend.h>
#include <linux/kthread.h>
#include <linux/jiffies.h>
-#include <linux/smp_lock.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/desc.h>
#include <asm/i8253.h>
+#include <asm/olpc.h>
#include <asm/paravirt.h>
#include <asm/reboot.h>
@@ -2217,7 +2217,7 @@ static int __init apm_init(void)
dmi_check_system(apm_dmi_table);
- if (apm_info.bios.version == 0 || paravirt_enabled()) {
+ if (apm_info.bios.version == 0 || paravirt_enabled() || machine_is_olpc()) {
printk(KERN_INFO "apm: BIOS not found.\n");
return -ENODEV;
}
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index c639bd55391..fdd585f9c53 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -25,11 +25,11 @@ x86_bios_strerror(long status)
{
const char *str;
switch (status) {
- case 0: str = "Call completed without error"; break;
- case -1: str = "Not implemented"; break;
- case -2: str = "Invalid argument"; break;
- case -3: str = "Call completed with error"; break;
- default: str = "Unknown BIOS status code"; break;
+ case 0: str = "Call completed without error"; break;
+ case -1: str = "Not implemented"; break;
+ case -2: str = "Invalid argument"; break;
+ case -3: str = "Call completed with error"; break;
+ default: str = "Unknown BIOS status code"; break;
}
return str;
}
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 84a8220a607..a6ef672adbb 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -56,9 +56,22 @@ void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
switch (c->x86_vendor) {
case X86_VENDOR_INTEL:
- if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15))
+ /*
+ * There is a known erratum on Pentium III and Core Solo
+ * and Core Duo CPUs.
+ * " Page with PAT set to WC while associated MTRR is UC
+ * may consolidate to UC "
+ * Because of this erratum, it is better to stick with
+ * setting WC in MTRR rather than using PAT on these CPUs.
+ *
+ * Enable PAT WC only on P4, Core 2 or later CPUs.
+ */
+ if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15))
return;
- break;
+
+ pat_disable("PAT WC disabled due to known CPU erratum.");
+ return;
+
case X86_VENDOR_AMD:
case X86_VENDOR_CENTAUR:
case X86_VENDOR_TRANSMETA:
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index cae9cabc303..18514ed2610 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -31,6 +31,11 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
if (c->x86_power & (1<<8))
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
}
+
+ /* Set MTRR capability flag if appropriate */
+ if (c->x86_model == 13 || c->x86_model == 9 ||
+ (c->x86_model == 8 && c->x86_mask >= 8))
+ set_cpu_cap(c, X86_FEATURE_K6_MTRR);
}
static void __cpuinit init_amd(struct cpuinfo_x86 *c)
@@ -166,10 +171,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
mbytes);
}
- /* Set MTRR capability flag if appropriate */
- if (c->x86_model == 13 || c->x86_model == 9 ||
- (c->x86_model == 8 && c->x86_mask >= 8))
- set_cpu_cap(c, X86_FEATURE_K6_MTRR);
break;
}
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index e0f45edd6a5..a0534c04d38 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -314,6 +314,16 @@ enum {
EAMD3D = 1<<20,
};
+static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
+{
+ switch (c->x86) {
+ case 5:
+ /* Emulate MTRRs using Centaur's MCR. */
+ set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
+ break;
+ }
+}
+
static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
{
@@ -462,6 +472,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
.c_vendor = "Centaur",
.c_ident = { "CentaurHauls" },
+ .c_early_init = early_init_centaur,
.c_init = init_centaur,
.c_size_cache = centaur_size_cache,
};
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 80ab20d4fa3..4e456bd955b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -13,6 +13,7 @@
#include <asm/mtrr.h>
#include <asm/mce.h>
#include <asm/pat.h>
+#include <asm/asm.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/mpspec.h>
#include <asm/apic.h>
@@ -334,11 +335,24 @@ static void __init early_cpu_detect(void)
get_cpu_vendor(c, 1);
+ early_get_cap(c);
+
if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
cpu_devs[c->x86_vendor]->c_early_init)
cpu_devs[c->x86_vendor]->c_early_init(c);
+}
- early_get_cap(c);
+/*
+ * The NOPL instruction is supposed to exist on all CPUs with
+ * family >= 6; unfortunately, that's not true in practice because
+ * of early VIA chips and (more importantly) broken virtualizers that
+ * are not easy to detect. In the latter case it doesn't even *fail*
+ * reliably, so probing for it doesn't even work. Disable it completely
+ * unless we can find a reliable way to detect all the broken cases.
+ */
+static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
+{
+ clear_cpu_cap(c, X86_FEATURE_NOPL);
}
static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
@@ -395,8 +409,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
}
init_scattered_cpuid_features(c);
+ detect_nopl(c);
}
-
}
static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index cc6efe86249..43f1aa51da5 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -18,6 +18,7 @@
#include <asm/mtrr.h>
#include <asm/mce.h>
#include <asm/pat.h>
+#include <asm/asm.h>
#include <asm/numa.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/mpspec.h>
@@ -215,6 +216,39 @@ static void __init early_cpu_support_print(void)
}
}
+/*
+ * The NOPL instruction is supposed to exist on all CPUs with
+ * family >= 6, unfortunately, that's not true in practice because
+ * of early VIA chips and (more importantly) broken virtualizers that
+ * are not easy to detect. Hence, probe for it based on first
+ * principles.
+ *
+ * Note: no 64-bit chip is known to lack these, but put the code here
+ * for consistency with 32 bits, and to make it utterly trivial to
+ * diagnose the problem should it ever surface.
+ */
+static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
+{
+ const u32 nopl_signature = 0x888c53b1; /* Random number */
+ u32 has_nopl = nopl_signature;
+
+ clear_cpu_cap(c, X86_FEATURE_NOPL);
+ if (c->x86 >= 6) {
+ asm volatile("\n"
+ "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */
+ "2:\n"
+ " .section .fixup,\"ax\"\n"
+ "3: xor %0,%0\n"
+ " jmp 2b\n"
+ " .previous\n"
+ _ASM_EXTABLE(1b,3b)
+ : "+a" (has_nopl));
+
+ if (has_nopl == nopl_signature)
+ set_cpu_cap(c, X86_FEATURE_NOPL);
+ }
+}
+
static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
void __init early_cpu_init(void)
@@ -313,6 +347,8 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
c->x86_phys_bits = eax & 0xff;
}
+ detect_nopl(c);
+
if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
cpu_devs[c->x86_vendor]->c_early_init)
cpu_devs[c->x86_vendor]->c_early_init(c);
@@ -394,6 +430,49 @@ static __init int setup_noclflush(char *arg)
}
__setup("noclflush", setup_noclflush);
+struct msr_range {
+ unsigned min;
+ unsigned max;
+};
+
+static struct msr_range msr_range_array[] __cpuinitdata = {
+ { 0x00000000, 0x00000418},
+ { 0xc0000000, 0xc000040b},
+ { 0xc0010000, 0xc0010142},
+ { 0xc0011000, 0xc001103b},
+};
+
+static void __cpuinit print_cpu_msr(void)
+{
+ unsigned index;
+ u64 val;
+ int i;
+ unsigned index_min, index_max;
+
+ for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
+ index_min = msr_range_array[i].min;
+ index_max = msr_range_array[i].max;
+ for (index = index_min; index < index_max; index++) {
+ if (rdmsrl_amd_safe(index, &val))
+ continue;
+ printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
+ }
+ }
+}
+
+static int show_msr __cpuinitdata;
+static __init int setup_show_msr(char *arg)
+{
+ int num;
+
+ get_option(&arg, &num);
+
+ if (num > 0)
+ show_msr = num;
+ return 1;
+}
+__setup("show_msr=", setup_show_msr);
+
void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
{
if (c->x86_model_id[0])
@@ -403,6 +482,14 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
printk(KERN_CONT " stepping %02x\n", c->x86_mask);
else
printk(KERN_CONT "\n");
+
+#ifdef CONFIG_SMP
+ if (c->cpu_index < show_msr)
+ print_cpu_msr();
+#else
+ if (show_msr)
+ print_cpu_msr();
+#endif
}
static __init int setup_disablecpuid(char *arg)
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index f1685fb91fb..b8e05ee4f73 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -171,7 +171,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
}
if (c->x86 != 0xF) {
- printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@lists.linux.org.uk>\n");
+ printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@vger.kernel.org>\n");
return 0;
}
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 4e7271999a7..84bb395038d 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -737,63 +737,44 @@ static int find_psb_table(struct powernow_k8_data *data)
#ifdef CONFIG_X86_POWERNOW_K8_ACPI
static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index)
{
- if (!data->acpi_data->state_count || (cpu_family == CPU_HW_PSTATE))
+ if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
return;
- data->irt = (data->acpi_data->states[index].control >> IRT_SHIFT) & IRT_MASK;
- data->rvo = (data->acpi_data->states[index].control >> RVO_SHIFT) & RVO_MASK;
- data->exttype = (data->acpi_data->states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
- data->plllock = (data->acpi_data->states[index].control >> PLL_L_SHIFT) & PLL_L_MASK;
- data->vidmvs = 1 << ((data->acpi_data->states[index].control >> MVS_SHIFT) & MVS_MASK);
- data->vstable = (data->acpi_data->states[index].control >> VST_SHIFT) & VST_MASK;
-}
-
-
-static struct acpi_processor_performance *acpi_perf_data;
-static int preregister_valid;
-
-static int powernow_k8_cpu_preinit_acpi(void)
-{
- acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
- if (!acpi_perf_data)
- return -ENODEV;
-
- if (acpi_processor_preregister_performance(acpi_perf_data))
- return -ENODEV;
- else
- preregister_valid = 1;
- return 0;
+ data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK;
+ data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK;
+ data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
+ data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK;
+ data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK);
+ data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK;
}
static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
{
struct cpufreq_frequency_table *powernow_table;
int ret_val;
- int cpu = 0;
- data->acpi_data = percpu_ptr(acpi_perf_data, cpu);
- if (acpi_processor_register_performance(data->acpi_data, data->cpu)) {
+ if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
dprintk("register performance failed: bad ACPI data\n");
return -EIO;
}
/* verify the data contained in the ACPI structures */
- if (data->acpi_data->state_count <= 1) {
+ if (data->acpi_data.state_count <= 1) {
dprintk("No ACPI P-States\n");
goto err_out;
}
- if ((data->acpi_data->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
- (data->acpi_data->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
+ if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
+ (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
dprintk("Invalid control/status registers (%x - %x)\n",
- data->acpi_data->control_register.space_id,
- data->acpi_data->status_register.space_id);
+ data->acpi_data.control_register.space_id,
+ data->acpi_data.status_register.space_id);
goto err_out;
}
/* fill in data->powernow_table */
powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
- * (data->acpi_data->state_count + 1)), GFP_KERNEL);
+ * (data->acpi_data.state_count + 1)), GFP_KERNEL);
if (!powernow_table) {
dprintk("powernow_table memory alloc failure\n");
goto err_out;
@@ -806,12 +787,12 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
if (ret_val)
goto err_out_mem;
- powernow_table[data->acpi_data->state_count].frequency = CPUFREQ_TABLE_END;
- powernow_table[data->acpi_data->state_count].index = 0;
+ powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END;
+ powernow_table[data->acpi_data.state_count].index = 0;
data->powernow_table = powernow_table;
/* fill in data */
- data->numps = data->acpi_data->state_count;
+ data->numps = data->acpi_data.state_count;
if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu)
print_basics(data);
powernow_k8_acpi_pst_values(data, 0);
@@ -819,31 +800,16 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
/* notify BIOS that we exist */
acpi_processor_notify_smm(THIS_MODULE);
- /* determine affinity, from ACPI if available */
- if (preregister_valid) {
- if ((data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ALL) ||
- (data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ANY))
- data->starting_core_affinity = data->acpi_data->shared_cpu_map;
- else
- data->starting_core_affinity = cpumask_of_cpu(data->cpu);
- } else {
- /* best guess from family if not */
- if (cpu_family == CPU_HW_PSTATE)
- data->starting_core_affinity = cpumask_of_cpu(data->cpu);
- else
- data->starting_core_affinity = per_cpu(cpu_core_map, data->cpu);
- }
-
return 0;
err_out_mem:
kfree(powernow_table);
err_out:
- acpi_processor_unregister_performance(data->acpi_data, data->cpu);
+ acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
/* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
- data->acpi_data->state_count = 0;
+ data->acpi_data.state_count = 0;
return -ENODEV;
}
@@ -855,10 +821,10 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf
rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo);
data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;
- for (i = 0; i < data->acpi_data->state_count; i++) {
+ for (i = 0; i < data->acpi_data.state_count; i++) {
u32 index;
- index = data->acpi_data->states[i].control & HW_PSTATE_MASK;
+ index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
if (index > data->max_hw_pstate) {
printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index);
printk(KERN_ERR PFX "Please report to BIOS manufacturer\n");
@@ -874,7 +840,7 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf
powernow_table[i].index = index;
- powernow_table[i].frequency = data->acpi_data->states[i].core_frequency * 1000;
+ powernow_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000;
}
return 0;
}
@@ -883,16 +849,16 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
{
int i;
int cntlofreq = 0;
- for (i = 0; i < data->acpi_data->state_count; i++) {
+ for (i = 0; i < data->acpi_data.state_count; i++) {
u32 fid;
u32 vid;
if (data->exttype) {
- fid = data->acpi_data->states[i].status & EXT_FID_MASK;
- vid = (data->acpi_data->states[i].status >> VID_SHIFT) & EXT_VID_MASK;
+ fid = data->acpi_data.states[i].status & EXT_FID_MASK;
+ vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK;
} else {
- fid = data->acpi_data->states[i].control & FID_MASK;
- vid = (data->acpi_data->states[i].control >> VID_SHIFT) & VID_MASK;
+ fid = data->acpi_data.states[i].control & FID_MASK;
+ vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK;
}
dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
@@ -933,10 +899,10 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
cntlofreq = i;
}
- if (powernow_table[i].frequency != (data->acpi_data->states[i].core_frequency * 1000)) {
+ if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) {
printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n",
powernow_table[i].frequency,
- (unsigned int) (data->acpi_data->states[i].core_frequency * 1000));
+ (unsigned int) (data->acpi_data.states[i].core_frequency * 1000));
powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
continue;
}
@@ -946,12 +912,11 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
{
- if (data->acpi_data->state_count)
- acpi_processor_unregister_performance(data->acpi_data, data->cpu);
+ if (data->acpi_data.state_count)
+ acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
}
#else
-static int powernow_k8_cpu_preinit_acpi(void) { return -ENODEV; }
static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; }
static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; }
static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; }
@@ -1136,7 +1101,7 @@ static int powernowk8_verify(struct cpufreq_policy *pol)
static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
{
struct powernow_k8_data *data;
- cpumask_t oldmask = CPU_MASK_ALL;
+ cpumask_t oldmask;
int rc;
if (!cpu_online(pol->cpu))
@@ -1209,7 +1174,10 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
/* run on any CPU again */
set_cpus_allowed_ptr(current, &oldmask);
- pol->cpus = data->starting_core_affinity;
+ if (cpu_family == CPU_HW_PSTATE)
+ pol->cpus = cpumask_of_cpu(pol->cpu);
+ else
+ pol->cpus = per_cpu(cpu_core_map, pol->cpu);
data->available_cores = &(pol->cpus);
/* Take a crude guess here.
@@ -1332,7 +1300,6 @@ static int __cpuinit powernowk8_init(void)
}
if (supported_cpus == num_online_cpus()) {
- powernow_k8_cpu_preinit_acpi();
printk(KERN_INFO PFX "Found %d %s "
"processors (%d cpu cores) (" VERSION ")\n",
num_online_nodes(),
@@ -1349,10 +1316,6 @@ static void __exit powernowk8_exit(void)
dprintk("exit\n");
cpufreq_unregister_driver(&cpufreq_amd64_driver);
-
-#ifdef CONFIG_X86_POWERNOW_K8_ACPI
- free_percpu(acpi_perf_data);
-#endif
}
MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>");
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index a62612cd4be..ab48cfed4d9 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -33,13 +33,12 @@ struct powernow_k8_data {
#ifdef CONFIG_X86_POWERNOW_K8_ACPI
/* the acpi table needs to be kept. it's only available if ACPI was
* used to determine valid frequency/vid/fid states */
- struct acpi_processor_performance *acpi_data;
+ struct acpi_processor_performance acpi_data;
#endif
/* we need to keep track of associated cores, but let cpufreq
* handle hotplug events - so just point at cpufreq pol->cpus
* structure */
cpumask_t *available_cores;
- cpumask_t starting_core_affinity;
};
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 15e13c01cc3..3b5f06423e7 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -26,7 +26,7 @@
#include <asm/cpufeature.h>
#define PFX "speedstep-centrino: "
-#define MAINTAINER "cpufreq@lists.linux.org.uk"
+#define MAINTAINER "cpufreq@vger.kernel.org"
#define dprintk(msg...) \
cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg)
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 3fd7a67bb06..898a5a2002e 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -15,13 +15,11 @@
/*
* Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU
*/
-static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
+static void __cpuinit __do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
{
unsigned char ccr2, ccr3;
- unsigned long flags;
/* we test for DEVID by checking whether CCR3 is writable */
- local_irq_save(flags);
ccr3 = getCx86(CX86_CCR3);
setCx86(CX86_CCR3, ccr3 ^ 0x80);
getCx86(0xc0); /* dummy to change bus */
@@ -44,9 +42,16 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
*dir0 = getCx86(CX86_DIR0);
*dir1 = getCx86(CX86_DIR1);
}
- local_irq_restore(flags);
}
+static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __do_cyrix_devid(dir0, dir1);
+ local_irq_restore(flags);
+}
/*
* Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in
* order to identify the Cyrix CPU model after we're out of setup.c
@@ -134,23 +139,6 @@ static void __cpuinit set_cx86_memwb(void)
setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14);
}
-static void __cpuinit set_cx86_inc(void)
-{
- unsigned char ccr3;
-
- printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n");
-
- ccr3 = getCx86(CX86_CCR3);
- setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
- /* PCR1 -- Performance Control */
- /* Incrementor on, whatever that is */
- setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02);
- /* PCR0 -- Performance Control */
- /* Incrementor Margin 10 */
- setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04);
- setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
-}
-
/*
* Configure later MediaGX and/or Geode processor.
*/
@@ -174,11 +162,28 @@ static void __cpuinit geode_configure(void)
set_cx86_memwb();
set_cx86_reorder();
- set_cx86_inc();
local_irq_restore(flags);
}
+static void __cpuinit early_init_cyrix(struct cpuinfo_x86 *c)
+{
+ unsigned char dir0, dir0_msn, dir1 = 0;
+
+ __do_cyrix_devid(&dir0, &dir1);
+ dir0_msn = dir0 >> 4; /* identifies CPU "family" */
+
+ switch (dir0_msn) {
+ case 3: /* 6x86/6x86L */
+ /* Emulate MTRRs using Cyrix's ARRs. */
+ set_cpu_cap(c, X86_FEATURE_CYRIX_ARR);
+ break;
+ case 5: /* 6x86MX/M II */
+ /* Emulate MTRRs using Cyrix's ARRs. */
+ set_cpu_cap(c, X86_FEATURE_CYRIX_ARR);
+ break;
+ }
+}
static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
{
@@ -434,6 +439,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
static struct cpu_dev cyrix_cpu_dev __cpuinitdata = {
.c_vendor = "Cyrix",
.c_ident = { "CyrixInstead" },
+ .c_early_init = early_init_cyrix,
.c_init = init_cyrix,
.c_identify = cyrix_identify,
};
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c
index 0bf4d37a048..b96b69545fb 100644
--- a/arch/x86/kernel/cpu/feature_names.c
+++ b/arch/x86/kernel/cpu/feature_names.c
@@ -39,7 +39,8 @@ const char * const x86_cap_flags[NCAPINTS*32] = {
NULL, NULL, NULL, NULL,
"constant_tsc", "up", NULL, "arch_perfmon",
"pebs", "bts", NULL, NULL,
- "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ "rep_good", NULL, NULL, NULL,
+ "nopl", NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* Intel-defined (#2) */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index b75f2569b8f..f113ef4595f 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -222,10 +222,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_BTS);
if (!(l1 & (1<<12)))
set_cpu_cap(c, X86_FEATURE_PEBS);
+ ds_init_intel(c);
}
if (cpu_has_bts)
- ds_init_intel(c);
+ ptrace_bts_init_intel(c);
/*
* See if we have a good local APIC by checking for buggy Pentia,
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 65a339678ec..726a5fcdf34 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -759,6 +759,7 @@ static struct sysdev_class mce_sysclass = {
};
DEFINE_PER_CPU(struct sys_device, device_mce);
+void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata;
/* Why are there no generic functions for this? */
#define ACCESSOR(name, var, start) \
@@ -883,9 +884,13 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
mce_create_device(cpu);
+ if (threshold_cpu_callback)
+ threshold_cpu_callback(action, cpu);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
+ if (threshold_cpu_callback)
+ threshold_cpu_callback(action, cpu);
mce_remove_device(cpu);
break;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 88736cadbaa..5eb390a4b2e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -628,6 +628,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
deallocate_threshold_block(cpu, bank);
free_out:
+ kobject_del(b->kobj);
kobject_put(b->kobj);
kfree(b);
per_cpu(threshold_banks, cpu)[bank] = NULL;
@@ -645,14 +646,11 @@ static void threshold_remove_device(unsigned int cpu)
}
/* get notified when a cpu comes on/off */
-static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action,
+ unsigned int cpu)
{
- /* cpu was unsigned int to begin with */
- unsigned int cpu = (unsigned long)hcpu;
-
if (cpu >= NR_CPUS)
- goto out;
+ return;
switch (action) {
case CPU_ONLINE:
@@ -666,14 +664,8 @@ static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb,
default:
break;
}
- out:
- return NOTIFY_OK;
}
-static struct notifier_block threshold_cpu_notifier __cpuinitdata = {
- .notifier_call = threshold_cpu_callback,
-};
-
static __init int threshold_init_device(void)
{
unsigned lcpu = 0;
@@ -684,7 +676,7 @@ static __init int threshold_init_device(void)
if (err)
return err;
}
- register_hotcpu_notifier(&threshold_cpu_notifier);
+ threshold_cpu_callback = amd_64_threshold_cpu_callback;
return 0;
}
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 509bd3d9eac..4e8d77f01ee 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -379,6 +379,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
unsigned long *size, mtrr_type *type)
{
unsigned int mask_lo, mask_hi, base_lo, base_hi;
+ unsigned int tmp, hi;
rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
if ((mask_lo & 0x800) == 0) {
@@ -392,8 +393,18 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi);
/* Work out the shifted address mask. */
- mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT)
- | mask_lo >> PAGE_SHIFT;
+ tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT;
+ mask_lo = size_or_mask | tmp;
+ /* Expand tmp with high bits to all 1s*/
+ hi = fls(tmp);
+ if (hi > 0) {
+ tmp |= ~((1<<(hi - 1)) - 1);
+
+ if (tmp != mask_lo) {
+ WARN_ONCE(1, KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n");
+ mask_lo = tmp;
+ }
+ }
/* This works correctly if size is a power of two, i.e. a
contiguous range. */
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 84c480bb371..4c4214690dd 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -405,9 +405,9 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
}
/* RED-PEN: base can be > 32bit */
len += seq_printf(seq,
- "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n",
+ "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n",
i, base, base >> (20 - PAGE_SHIFT), size, factor,
- mtrr_attrib_to_str(type), mtrr_usage_table[i]);
+ mtrr_usage_table[i], mtrr_attrib_to_str(type));
}
}
return 0;
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index b5ade28ca8f..c78c04821ea 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -759,7 +759,8 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
/* take out UC ranges */
for (i = 0; i < num_var_ranges; i++) {
type = range_state[i].type;
- if (type != MTRR_TYPE_UNCACHABLE)
+ if (type != MTRR_TYPE_UNCACHABLE &&
+ type != MTRR_TYPE_WRPROT)
continue;
size = range_state[i].size_pfn;
if (!size)
@@ -834,7 +835,14 @@ static int __init enable_mtrr_cleanup_setup(char *str)
enable_mtrr_cleanup = 1;
return 0;
}
-early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
+early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
+
+static int __init mtrr_cleanup_debug_setup(char *str)
+{
+ debug_print = 1;
+ return 0;
+}
+early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
struct var_mtrr_state {
unsigned long range_startk;
@@ -898,6 +906,27 @@ set_var_mtrr_all(unsigned int address_bits)
}
}
+static unsigned long to_size_factor(unsigned long sizek, char *factorp)
+{
+ char factor;
+ unsigned long base = sizek;
+
+ if (base & ((1<<10) - 1)) {
+ /* not MB alignment */
+ factor = 'K';
+ } else if (base & ((1<<20) - 1)){
+ factor = 'M';
+ base >>= 10;
+ } else {
+ factor = 'G';
+ base >>= 20;
+ }
+
+ *factorp = factor;
+
+ return base;
+}
+
static unsigned int __init
range_to_mtrr(unsigned int reg, unsigned long range_startk,
unsigned long range_sizek, unsigned char type)
@@ -919,13 +948,21 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
align = max_align;
sizek = 1 << align;
- if (debug_print)
+ if (debug_print) {
+ char start_factor = 'K', size_factor = 'K';
+ unsigned long start_base, size_base;
+
+ start_base = to_size_factor(range_startk, &start_factor),
+ size_base = to_size_factor(sizek, &size_factor),
+
printk(KERN_DEBUG "Setting variable MTRR %d, "
- "base: %ldMB, range: %ldMB, type %s\n",
- reg, range_startk >> 10, sizek >> 10,
+ "base: %ld%cB, range: %ld%cB, type %s\n",
+ reg, start_base, start_factor,
+ size_base, size_factor,
(type == MTRR_TYPE_UNCACHABLE)?"UC":
((type == MTRR_TYPE_WRBACK)?"WB":"Other")
);
+ }
save_var_mtrr(reg++, range_startk, sizek, type);
range_startk += sizek;
range_sizek -= sizek;
@@ -970,6 +1007,8 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
/* try to append some small hole */
range0_basek = state->range_startk;
range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
+
+ /* no increase */
if (range0_sizek == state->range_sizek) {
if (debug_print)
printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
@@ -980,13 +1019,40 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
return 0;
}
- range0_sizek -= chunk_sizek;
- if (range0_sizek && sizek) {
- while (range0_basek + range0_sizek > (basek + sizek)) {
- range0_sizek -= chunk_sizek;
- if (!range0_sizek)
- break;
- }
+ /* only cut back, when it is not the last */
+ if (sizek) {
+ while (range0_basek + range0_sizek > (basek + sizek)) {
+ if (range0_sizek >= chunk_sizek)
+ range0_sizek -= chunk_sizek;
+ else
+ range0_sizek = 0;
+
+ if (!range0_sizek)
+ break;
+ }
+ }
+
+second_try:
+ range_basek = range0_basek + range0_sizek;
+
+ /* one hole in the middle */
+ if (range_basek > basek && range_basek <= (basek + sizek))
+ second_sizek = range_basek - basek;
+
+ if (range0_sizek > state->range_sizek) {
+
+ /* one hole in middle or at end */
+ hole_sizek = range0_sizek - state->range_sizek - second_sizek;
+
+ /* hole size should be less than half of range0 size */
+ if (hole_sizek >= (range0_sizek >> 1) &&
+ range0_sizek >= chunk_sizek) {
+ range0_sizek -= chunk_sizek;
+ second_sizek = 0;
+ hole_sizek = 0;
+
+ goto second_try;
+ }
}
if (range0_sizek) {
@@ -996,50 +1062,28 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
(range0_basek + range0_sizek)<<10);
state->reg = range_to_mtrr(state->reg, range0_basek,
range0_sizek, MTRR_TYPE_WRBACK);
-
- }
-
- range_basek = range0_basek + range0_sizek;
- range_sizek = chunk_sizek;
-
- if (range_basek + range_sizek > basek &&
- range_basek + range_sizek <= (basek + sizek)) {
- /* one hole */
- second_basek = basek;
- second_sizek = range_basek + range_sizek - basek;
}
- /* if last piece, only could one hole near end */
- if ((second_basek || !basek) &&
- range_sizek - (state->range_sizek - range0_sizek) - second_sizek <
- (chunk_sizek >> 1)) {
- /*
- * one hole in middle (second_sizek is 0) or at end
- * (second_sizek is 0 )
- */
- hole_sizek = range_sizek - (state->range_sizek - range0_sizek)
- - second_sizek;
- hole_basek = range_basek + range_sizek - hole_sizek
- - second_sizek;
- } else {
- /* fallback for big hole, or several holes */
+ if (range0_sizek < state->range_sizek) {
+ /* need to handle left over */
range_sizek = state->range_sizek - range0_sizek;
- second_basek = 0;
- second_sizek = 0;
+
+ if (debug_print)
+ printk(KERN_DEBUG "range: %016lx - %016lx\n",
+ range_basek<<10,
+ (range_basek + range_sizek)<<10);
+ state->reg = range_to_mtrr(state->reg, range_basek,
+ range_sizek, MTRR_TYPE_WRBACK);
}
- if (debug_print)
- printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10,
- (range_basek + range_sizek)<<10);
- state->reg = range_to_mtrr(state->reg, range_basek, range_sizek,
- MTRR_TYPE_WRBACK);
if (hole_sizek) {
+ hole_basek = range_basek - hole_sizek - second_sizek;
if (debug_print)
printk(KERN_DEBUG "hole: %016lx - %016lx\n",
- hole_basek<<10, (hole_basek + hole_sizek)<<10);
- state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek,
- MTRR_TYPE_UNCACHABLE);
-
+ hole_basek<<10,
+ (hole_basek + hole_sizek)<<10);
+ state->reg = range_to_mtrr(state->reg, hole_basek,
+ hole_sizek, MTRR_TYPE_UNCACHABLE);
}
return second_sizek;
@@ -1154,11 +1198,11 @@ struct mtrr_cleanup_result {
};
/*
- * gran_size: 1M, 2M, ..., 2G
- * chunk size: gran_size, ..., 4G
- * so we need (2+13)*6
+ * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G
+ * chunk size: gran_size, ..., 2G
+ * so we need (1+16)*8
*/
-#define NUM_RESULT 90
+#define NUM_RESULT 136
#define PSHIFT (PAGE_SHIFT - 10)
static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
@@ -1168,13 +1212,14 @@ static unsigned long __initdata min_loss_pfn[RANGE_NUM];
static int __init mtrr_cleanup(unsigned address_bits)
{
unsigned long extra_remove_base, extra_remove_size;
- unsigned long i, base, size, def, dummy;
+ unsigned long base, size, def, dummy;
mtrr_type type;
int nr_range, nr_range_new;
u64 chunk_size, gran_size;
unsigned long range_sums, range_sums_new;
int index_good;
int num_reg_good;
+ int i;
/* extra one for all 0 */
int num[MTRR_NUM_TYPES + 1];
@@ -1204,6 +1249,8 @@ static int __init mtrr_cleanup(unsigned address_bits)
continue;
if (!size)
type = MTRR_NUM_TYPES;
+ if (type == MTRR_TYPE_WRPROT)
+ type = MTRR_TYPE_UNCACHABLE;
num[type]++;
}
@@ -1216,23 +1263,57 @@ static int __init mtrr_cleanup(unsigned address_bits)
num_var_ranges - num[MTRR_NUM_TYPES])
return 0;
+ /* print original var MTRRs at first, for debugging: */
+ printk(KERN_DEBUG "original variable MTRRs\n");
+ for (i = 0; i < num_var_ranges; i++) {
+ char start_factor = 'K', size_factor = 'K';
+ unsigned long start_base, size_base;
+
+ size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
+ if (!size_base)
+ continue;
+
+ size_base = to_size_factor(size_base, &size_factor),
+ start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
+ start_base = to_size_factor(start_base, &start_factor),
+ type = range_state[i].type;
+
+ printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
+ i, start_base, start_factor,
+ size_base, size_factor,
+ (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
+ ((type == MTRR_TYPE_WRPROT) ? "WP" :
+ ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
+ );
+ }
+
memset(range, 0, sizeof(range));
extra_remove_size = 0;
- if (mtrr_tom2) {
- extra_remove_base = 1 << (32 - PAGE_SHIFT);
+ extra_remove_base = 1 << (32 - PAGE_SHIFT);
+ if (mtrr_tom2)
extra_remove_size =
(mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
- }
nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
extra_remove_size);
+ /*
+ * [0, 1M) should always be coverred by var mtrr with WB
+ * and fixed mtrrs should take effective before var mtrr for it
+ */
+ nr_range = add_range_with_merge(range, nr_range, 0,
+ (1ULL<<(20 - PAGE_SHIFT)) - 1);
+ /* sort the ranges */
+ sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+
range_sums = sum_ranges(range, nr_range);
printk(KERN_INFO "total RAM coverred: %ldM\n",
range_sums >> (20 - PAGE_SHIFT));
if (mtrr_chunk_size && mtrr_gran_size) {
int num_reg;
+ char gran_factor, chunk_factor, lose_factor;
+ unsigned long gran_base, chunk_base, lose_base;
- debug_print = 1;
+ debug_print++;
/* convert ranges to var ranges state */
num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
mtrr_gran_size);
@@ -1256,34 +1337,48 @@ static int __init mtrr_cleanup(unsigned address_bits)
result[i].lose_cover_sizek =
(range_sums - range_sums_new) << PSHIFT;
- printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
- result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10,
- result[i].chunk_sizek >> 10);
- printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n",
+ gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
+ chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
+ lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
+ printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
+ result[i].bad?"*BAD*":" ",
+ gran_base, gran_factor, chunk_base, chunk_factor);
+ printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
result[i].num_reg, result[i].bad?"-":"",
- result[i].lose_cover_sizek >> 10);
+ lose_base, lose_factor);
if (!result[i].bad) {
set_var_mtrr_all(address_bits);
return 1;
}
printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
"will find optimal one\n");
- debug_print = 0;
+ debug_print--;
memset(result, 0, sizeof(result[0]));
}
i = 0;
memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
memset(result, 0, sizeof(result));
- for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) {
- for (chunk_size = gran_size; chunk_size < (1ULL<<33);
+ for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
+ char gran_factor;
+ unsigned long gran_base;
+
+ if (debug_print)
+ gran_base = to_size_factor(gran_size >> 10, &gran_factor);
+
+ for (chunk_size = gran_size; chunk_size < (1ULL<<32);
chunk_size <<= 1) {
int num_reg;
- if (debug_print)
- printk(KERN_INFO
- "\ngran_size: %lldM chunk_size_size: %lldM\n",
- gran_size >> 20, chunk_size >> 20);
+ if (debug_print) {
+ char chunk_factor;
+ unsigned long chunk_base;
+
+ chunk_base = to_size_factor(chunk_size>>10, &chunk_factor),
+ printk(KERN_INFO "\n");
+ printk(KERN_INFO "gran_size: %ld%c chunk_size: %ld%c \n",
+ gran_base, gran_factor, chunk_base, chunk_factor);
+ }
if (i >= NUM_RESULT)
continue;
@@ -1326,12 +1421,18 @@ static int __init mtrr_cleanup(unsigned address_bits)
/* print out all */
for (i = 0; i < NUM_RESULT; i++) {
- printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
- result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10,
- result[i].chunk_sizek >> 10);
- printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n",
- result[i].num_reg, result[i].bad?"-":"",
- result[i].lose_cover_sizek >> 10);
+ char gran_factor, chunk_factor, lose_factor;
+ unsigned long gran_base, chunk_base, lose_base;
+
+ gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
+ chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
+ lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
+ printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
+ result[i].bad?"*BAD*":" ",
+ gran_base, gran_factor, chunk_base, chunk_factor);
+ printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
+ result[i].num_reg, result[i].bad?"-":"",
+ lose_base, lose_factor);
}
/* try to find the optimal index */
@@ -1339,10 +1440,8 @@ static int __init mtrr_cleanup(unsigned address_bits)
nr_mtrr_spare_reg = num_var_ranges - 1;
num_reg_good = -1;
for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
- if (!min_loss_pfn[i]) {
+ if (!min_loss_pfn[i])
num_reg_good = i;
- break;
- }
}
index_good = -1;
@@ -1358,21 +1457,26 @@ static int __init mtrr_cleanup(unsigned address_bits)
}
if (index_good != -1) {
+ char gran_factor, chunk_factor, lose_factor;
+ unsigned long gran_base, chunk_base, lose_base;
+
printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
i = index_good;
- printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t",
- result[i].gran_sizek >> 10,
- result[i].chunk_sizek >> 10);
- printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n",
- result[i].num_reg,
- result[i].lose_cover_sizek >> 10);
+ gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
+ chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
+ lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
+ printk(KERN_INFO "gran_size: %ld%c \tchunk_size: %ld%c \t",
+ gran_base, gran_factor, chunk_base, chunk_factor);
+ printk(KERN_CONT "num_reg: %d \tlose RAM: %ld%c\n",
+ result[i].num_reg, lose_base, lose_factor);
/* convert ranges to var ranges state */
chunk_size = result[i].chunk_sizek;
chunk_size <<= 10;
gran_size = result[i].gran_sizek;
gran_size <<= 10;
- debug_print = 1;
+ debug_print++;
x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
+ debug_print--;
set_var_mtrr_all(address_bits);
return 1;
}
@@ -1496,11 +1600,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
/* kvm/qemu doesn't have mtrr set right, don't trim them all */
if (!highest_pfn) {
- if (!kvm_para_available()) {
- printk(KERN_WARNING
+ WARN(!kvm_para_available(), KERN_WARNING
"WARNING: strange, CPU MTRRs all blank?\n");
- WARN_ON(1);
- }
return 0;
}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index de7439f82b9..6bff382094f 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -295,13 +295,19 @@ static int setup_k7_watchdog(unsigned nmi_hz)
/* setup the timer */
wrmsr(evntsel_msr, evntsel, 0);
write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- evntsel |= K7_EVNTSEL_ENABLE;
- wrmsr(evntsel_msr, evntsel, 0);
+ /* initialize the wd struct before enabling */
wd->perfctr_msr = perfctr_msr;
wd->evntsel_msr = evntsel_msr;
wd->cccr_msr = 0; /* unused */
+
+ /* ok, everything is initialized, announce that we're set */
+ cpu_nmi_set_wd_enabled();
+
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ evntsel |= K7_EVNTSEL_ENABLE;
+ wrmsr(evntsel_msr, evntsel, 0);
+
return 1;
}
@@ -379,13 +385,19 @@ static int setup_p6_watchdog(unsigned nmi_hz)
wrmsr(evntsel_msr, evntsel, 0);
nmi_hz = adjust_for_32bit_ctr(nmi_hz);
write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- evntsel |= P6_EVNTSEL0_ENABLE;
- wrmsr(evntsel_msr, evntsel, 0);
+ /* initialize the wd struct before enabling */
wd->perfctr_msr = perfctr_msr;
wd->evntsel_msr = evntsel_msr;
wd->cccr_msr = 0; /* unused */
+
+ /* ok, everything is initialized, announce that we're set */
+ cpu_nmi_set_wd_enabled();
+
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ evntsel |= P6_EVNTSEL0_ENABLE;
+ wrmsr(evntsel_msr, evntsel, 0);
+
return 1;
}
@@ -432,6 +444,27 @@ static const struct wd_ops p6_wd_ops = {
#define P4_CCCR_ENABLE (1 << 12)
#define P4_CCCR_OVF (1 << 31)
+#define P4_CONTROLS 18
+static unsigned int p4_controls[18] = {
+ MSR_P4_BPU_CCCR0,
+ MSR_P4_BPU_CCCR1,
+ MSR_P4_BPU_CCCR2,
+ MSR_P4_BPU_CCCR3,
+ MSR_P4_MS_CCCR0,
+ MSR_P4_MS_CCCR1,
+ MSR_P4_MS_CCCR2,
+ MSR_P4_MS_CCCR3,
+ MSR_P4_FLAME_CCCR0,
+ MSR_P4_FLAME_CCCR1,
+ MSR_P4_FLAME_CCCR2,
+ MSR_P4_FLAME_CCCR3,
+ MSR_P4_IQ_CCCR0,
+ MSR_P4_IQ_CCCR1,
+ MSR_P4_IQ_CCCR2,
+ MSR_P4_IQ_CCCR3,
+ MSR_P4_IQ_CCCR4,
+ MSR_P4_IQ_CCCR5,
+};
/*
* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
* CRU_ESCR0 (with any non-null event selector) through a complemented
@@ -473,12 +506,38 @@ static int setup_p4_watchdog(unsigned nmi_hz)
evntsel_msr = MSR_P4_CRU_ESCR0;
cccr_msr = MSR_P4_IQ_CCCR0;
cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
+
+ /*
+ * If we're on the kdump kernel or other situation, we may
+ * still have other performance counter registers set to
+ * interrupt and they'll keep interrupting forever because
+ * of the P4_CCCR_OVF quirk. So we need to ACK all the
+ * pending interrupts and disable all the registers here,
+ * before reenabling the NMI delivery. Refer to p4_rearm()
+ * about the P4_CCCR_OVF quirk.
+ */
+ if (reset_devices) {
+ unsigned int low, high;
+ int i;
+
+ for (i = 0; i < P4_CONTROLS; i++) {
+ rdmsr(p4_controls[i], low, high);
+ low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
+ wrmsr(p4_controls[i], low, high);
+ }
+ }
} else {
/* logical cpu 1 */
perfctr_msr = MSR_P4_IQ_PERFCTR1;
evntsel_msr = MSR_P4_CRU_ESCR0;
cccr_msr = MSR_P4_IQ_CCCR1;
- cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
+
+ /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
+ if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
+ cccr_val = P4_CCCR_OVF_PMI0;
+ else
+ cccr_val = P4_CCCR_OVF_PMI1;
+ cccr_val |= P4_CCCR_ESCR_SELECT(4);
}
evntsel = P4_ESCR_EVENT_SELECT(0x3F)
@@ -493,12 +552,17 @@ static int setup_p4_watchdog(unsigned nmi_hz)
wrmsr(evntsel_msr, evntsel, 0);
wrmsr(cccr_msr, cccr_val, 0);
write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- cccr_val |= P4_CCCR_ENABLE;
- wrmsr(cccr_msr, cccr_val, 0);
+
wd->perfctr_msr = perfctr_msr;
wd->evntsel_msr = evntsel_msr;
wd->cccr_msr = cccr_msr;
+
+ /* ok, everything is initialized, announce that we're set */
+ cpu_nmi_set_wd_enabled();
+
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ cccr_val |= P4_CCCR_ENABLE;
+ wrmsr(cccr_msr, cccr_val, 0);
return 1;
}
@@ -614,13 +678,17 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
wrmsr(evntsel_msr, evntsel, 0);
nmi_hz = adjust_for_32bit_ctr(nmi_hz);
write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
- wrmsr(evntsel_msr, evntsel, 0);
wd->perfctr_msr = perfctr_msr;
wd->evntsel_msr = evntsel_msr;
wd->cccr_msr = 0; /* unused */
+
+ /* ok, everything is initialized, announce that we're set */
+ cpu_nmi_set_wd_enabled();
+
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsr(evntsel_msr, evntsel, 0);
intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
return 1;
}
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 14b11b3be31..6a44d646599 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -36,7 +36,6 @@
#include <linux/smp_lock.h>
#include <linux/major.h>
#include <linux/fs.h>
-#include <linux/smp_lock.h>
#include <linux/device.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
@@ -89,6 +88,8 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
struct cpuid_regs cmd;
int cpu = iminor(file->f_path.dentry->d_inode);
u64 pos = *ppos;
+ ssize_t bytes = 0;
+ int err = 0;
if (count % 16)
return -EINVAL; /* Invalid chunk size */
@@ -96,14 +97,19 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
for (; count; count -= 16) {
cmd.eax = pos;
cmd.ecx = pos >> 32;
- smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1);
- if (copy_to_user(tmp, &cmd, 16))
- return -EFAULT;
+ err = smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1);
+ if (err)
+ break;
+ if (copy_to_user(tmp, &cmd, 16)) {
+ err = -EFAULT;
+ break;
+ }
tmp += 16;
+ bytes += 16;
*ppos = ++pos;
}
- return tmp - buf;
+ return bytes ? bytes : err;
}
static int cpuid_open(struct inode *inode, struct file *file)
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c
index 15e6c6bc4a4..e90a60ef10c 100644
--- a/arch/x86/kernel/crash_dump_64.c
+++ b/arch/x86/kernel/crash_dump_64.c
@@ -7,9 +7,8 @@
#include <linux/errno.h>
#include <linux/crash_dump.h>
-
-#include <asm/uaccess.h>
-#include <asm/io.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
/**
* copy_oldmem_page - copy one page from "oldmem"
@@ -25,7 +24,7 @@
* in the current kernel. We stitch up a pte, similar to kmap_atomic.
*/
ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
- size_t csize, unsigned long offset, int userbuf)
+ size_t csize, unsigned long offset, int userbuf)
{
void *vaddr;
@@ -33,14 +32,16 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
return 0;
vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE);
+ if (!vaddr)
+ return -ENOMEM;
if (userbuf) {
- if (copy_to_user(buf, (vaddr + offset), csize)) {
+ if (copy_to_user(buf, vaddr + offset, csize)) {
iounmap(vaddr);
return -EFAULT;
}
} else
- memcpy(buf, (vaddr + offset), csize);
+ memcpy(buf, vaddr + offset, csize);
iounmap(vaddr);
return csize;
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 11c11b8ec48..2b69994fd3a 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -2,26 +2,49 @@
* Debug Store support
*
* This provides a low-level interface to the hardware's Debug Store
- * feature that is used for last branch recording (LBR) and
+ * feature that is used for branch trace store (BTS) and
* precise-event based sampling (PEBS).
*
- * Different architectures use a different DS layout/pointer size.
- * The below functions therefore work on a void*.
+ * It manages:
+ * - per-thread and per-cpu allocation of BTS and PEBS
+ * - buffer memory allocation (optional)
+ * - buffer overflow handling
+ * - buffer access
*
+ * It assumes:
+ * - get_task_struct on all parameter tasks
+ * - current is allowed to trace parameter tasks
*
- * Since there is no user for PEBS, yet, only LBR (or branch
- * trace store, BTS) is supported.
*
- *
- * Copyright (C) 2007 Intel Corporation.
- * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
+ * Copyright (C) 2007-2008 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
*/
+
+#ifdef CONFIG_X86_DS
+
#include <asm/ds.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+
+/*
+ * The configuration for a particular DS hardware implementation.
+ */
+struct ds_configuration {
+ /* the size of the DS structure in bytes */
+ unsigned char sizeof_ds;
+ /* the size of one pointer-typed field in the DS structure in bytes;
+ this covers the first 8 fields related to buffer management. */
+ unsigned char sizeof_field;
+ /* the size of a BTS/PEBS record in bytes */
+ unsigned char sizeof_rec[2];
+};
+static struct ds_configuration ds_cfg;
/*
@@ -44,378 +67,747 @@
* (interrupt occurs when write pointer passes interrupt pointer)
* - value to which counter is reset following counter overflow
*
- * On later architectures, the last branch recording hardware uses
- * 64bit pointers even in 32bit mode.
- *
- *
- * Branch Trace Store (BTS) records store information about control
- * flow changes. They at least provide the following information:
- * - source linear address
- * - destination linear address
+ * Later architectures use 64bit pointers throughout, whereas earlier
+ * architectures use 32bit pointers in 32bit mode.
*
- * Netburst supported a predicated bit that had been dropped in later
- * architectures. We do not suppor it.
*
+ * We compute the base address for the first 8 fields based on:
+ * - the field size stored in the DS configuration
+ * - the relative field position
+ * - an offset giving the start of the respective region
*
- * In order to abstract from the actual DS and BTS layout, we describe
- * the access to the relevant fields.
- * Thanks to Andi Kleen for proposing this design.
+ * This offset is further used to index various arrays holding
+ * information for BTS and PEBS at the respective index.
*
- * The implementation, however, is not as general as it might seem. In
- * order to stay somewhat simple and efficient, we assume an
- * underlying unsigned type (mostly a pointer type) and we expect the
- * field to be at least as big as that type.
+ * On later 32bit processors, we only access the lower 32bit of the
+ * 64bit pointer fields. The upper halves will be zeroed out.
*/
-/*
- * A special from_ip address to indicate that the BTS record is an
- * info record that needs to be interpreted or skipped.
- */
-#define BTS_ESCAPE_ADDRESS (-1)
+enum ds_field {
+ ds_buffer_base = 0,
+ ds_index,
+ ds_absolute_maximum,
+ ds_interrupt_threshold,
+};
-/*
- * A field access descriptor
- */
-struct access_desc {
- unsigned char offset;
- unsigned char size;
+enum ds_qualifier {
+ ds_bts = 0,
+ ds_pebs
};
+static inline unsigned long ds_get(const unsigned char *base,
+ enum ds_qualifier qual, enum ds_field field)
+{
+ base += (ds_cfg.sizeof_field * (field + (4 * qual)));
+ return *(unsigned long *)base;
+}
+
+static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
+ enum ds_field field, unsigned long value)
+{
+ base += (ds_cfg.sizeof_field * (field + (4 * qual)));
+ (*(unsigned long *)base) = value;
+}
+
+
/*
- * The configuration for a particular DS/BTS hardware implementation.
+ * Locking is done only for allocating BTS or PEBS resources and for
+ * guarding context and buffer memory allocation.
+ *
+ * Most functions require the current task to own the ds context part
+ * they are going to access. All the locking is done when validating
+ * access to the context.
*/
-struct ds_configuration {
- /* the DS configuration */
- unsigned char sizeof_ds;
- struct access_desc bts_buffer_base;
- struct access_desc bts_index;
- struct access_desc bts_absolute_maximum;
- struct access_desc bts_interrupt_threshold;
- /* the BTS configuration */
- unsigned char sizeof_bts;
- struct access_desc from_ip;
- struct access_desc to_ip;
- /* BTS variants used to store additional information like
- timestamps */
- struct access_desc info_type;
- struct access_desc info_data;
- unsigned long debugctl_mask;
-};
+static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
/*
- * The global configuration used by the below accessor functions
+ * Validate that the current task is allowed to access the BTS/PEBS
+ * buffer of the parameter task.
+ *
+ * Returns 0, if access is granted; -Eerrno, otherwise.
*/
-static struct ds_configuration ds_cfg;
+static inline int ds_validate_access(struct ds_context *context,
+ enum ds_qualifier qual)
+{
+ if (!context)
+ return -EPERM;
+
+ if (context->owner[qual] == current)
+ return 0;
+
+ return -EPERM;
+}
+
/*
- * Accessor functions for some DS and BTS fields using the above
- * global ptrace_bts_cfg.
+ * We either support (system-wide) per-cpu or per-thread allocation.
+ * We distinguish the two based on the task_struct pointer, where a
+ * NULL pointer indicates per-cpu allocation for the current cpu.
+ *
+ * Allocations are use-counted. As soon as resources are allocated,
+ * further allocations must be of the same type (per-cpu or
+ * per-thread). We model this by counting allocations (i.e. the number
+ * of tracers of a certain type) for one type negatively:
+ * =0 no tracers
+ * >0 number of per-thread tracers
+ * <0 number of per-cpu tracers
+ *
+ * The below functions to get and put tracers and to check the
+ * allocation type require the ds_lock to be held by the caller.
+ *
+ * Tracers essentially gives the number of ds contexts for a certain
+ * type of allocation.
*/
-static inline unsigned long get_bts_buffer_base(char *base)
+static long tracers;
+
+static inline void get_tracer(struct task_struct *task)
{
- return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset);
+ tracers += (task ? 1 : -1);
}
-static inline void set_bts_buffer_base(char *base, unsigned long value)
+
+static inline void put_tracer(struct task_struct *task)
{
- (*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value;
+ tracers -= (task ? 1 : -1);
}
-static inline unsigned long get_bts_index(char *base)
+
+static inline int check_tracer(struct task_struct *task)
{
- return *(unsigned long *)(base + ds_cfg.bts_index.offset);
+ return (task ? (tracers >= 0) : (tracers <= 0));
}
-static inline void set_bts_index(char *base, unsigned long value)
+
+
+/*
+ * The DS context is either attached to a thread or to a cpu:
+ * - in the former case, the thread_struct contains a pointer to the
+ * attached context.
+ * - in the latter case, we use a static array of per-cpu context
+ * pointers.
+ *
+ * Contexts are use-counted. They are allocated on first access and
+ * deallocated when the last user puts the context.
+ *
+ * We distinguish between an allocating and a non-allocating get of a
+ * context:
+ * - the allocating get is used for requesting BTS/PEBS resources. It
+ * requires the caller to hold the global ds_lock.
+ * - the non-allocating get is used for all other cases. A
+ * non-existing context indicates an error. It acquires and releases
+ * the ds_lock itself for obtaining the context.
+ *
+ * A context and its DS configuration are allocated and deallocated
+ * together. A context always has a DS configuration of the
+ * appropriate size.
+ */
+static DEFINE_PER_CPU(struct ds_context *, system_context);
+
+#define this_system_context per_cpu(system_context, smp_processor_id())
+
+/*
+ * Returns the pointer to the parameter task's context or to the
+ * system-wide context, if task is NULL.
+ *
+ * Increases the use count of the returned context, if not NULL.
+ */
+static inline struct ds_context *ds_get_context(struct task_struct *task)
{
- (*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value;
+ struct ds_context *context;
+
+ spin_lock(&ds_lock);
+
+ context = (task ? task->thread.ds_ctx : this_system_context);
+ if (context)
+ context->count++;
+
+ spin_unlock(&ds_lock);
+
+ return context;
}
-static inline unsigned long get_bts_absolute_maximum(char *base)
+
+/*
+ * Same as ds_get_context, but allocates the context and it's DS
+ * structure, if necessary; returns NULL; if out of memory.
+ *
+ * pre: requires ds_lock to be held
+ */
+static inline struct ds_context *ds_alloc_context(struct task_struct *task)
{
- return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset);
+ struct ds_context **p_context =
+ (task ? &task->thread.ds_ctx : &this_system_context);
+ struct ds_context *context = *p_context;
+
+ if (!context) {
+ context = kzalloc(sizeof(*context), GFP_KERNEL);
+
+ if (!context)
+ return NULL;
+
+ context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
+ if (!context->ds) {
+ kfree(context);
+ return NULL;
+ }
+
+ *p_context = context;
+
+ context->this = p_context;
+ context->task = task;
+
+ if (task)
+ set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
+
+ if (!task || (task == current))
+ wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0);
+
+ get_tracer(task);
+ }
+
+ context->count++;
+
+ return context;
}
-static inline void set_bts_absolute_maximum(char *base, unsigned long value)
+
+/*
+ * Decreases the use count of the parameter context, if not NULL.
+ * Deallocates the context, if the use count reaches zero.
+ */
+static inline void ds_put_context(struct ds_context *context)
{
- (*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value;
+ if (!context)
+ return;
+
+ spin_lock(&ds_lock);
+
+ if (--context->count)
+ goto out;
+
+ *(context->this) = NULL;
+
+ if (context->task)
+ clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
+
+ if (!context->task || (context->task == current))
+ wrmsrl(MSR_IA32_DS_AREA, 0);
+
+ put_tracer(context->task);
+
+ /* free any leftover buffers from tracers that did not
+ * deallocate them properly. */
+ kfree(context->buffer[ds_bts]);
+ kfree(context->buffer[ds_pebs]);
+ kfree(context->ds);
+ kfree(context);
+ out:
+ spin_unlock(&ds_lock);
}
-static inline unsigned long get_bts_interrupt_threshold(char *base)
+
+
+/*
+ * Handle a buffer overflow
+ *
+ * task: the task whose buffers are overflowing;
+ * NULL for a buffer overflow on the current cpu
+ * context: the ds context
+ * qual: the buffer type
+ */
+static void ds_overflow(struct task_struct *task, struct ds_context *context,
+ enum ds_qualifier qual)
{
- return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset);
+ if (!context)
+ return;
+
+ if (context->callback[qual])
+ (*context->callback[qual])(task);
+
+ /* todo: do some more overflow handling */
}
-static inline void set_bts_interrupt_threshold(char *base, unsigned long value)
+
+
+/*
+ * Allocate a non-pageable buffer of the parameter size.
+ * Checks the memory and the locked memory rlimit.
+ *
+ * Returns the buffer, if successful;
+ * NULL, if out of memory or rlimit exceeded.
+ *
+ * size: the requested buffer size in bytes
+ * pages (out): if not NULL, contains the number of pages reserved
+ */
+static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
{
- (*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value;
+ unsigned long rlim, vm, pgsz;
+ void *buffer;
+
+ pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+ rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+ vm = current->mm->total_vm + pgsz;
+ if (rlim < vm)
+ return NULL;
+
+ rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+ vm = current->mm->locked_vm + pgsz;
+ if (rlim < vm)
+ return NULL;
+
+ buffer = kzalloc(size, GFP_KERNEL);
+ if (!buffer)
+ return NULL;
+
+ current->mm->total_vm += pgsz;
+ current->mm->locked_vm += pgsz;
+
+ if (pages)
+ *pages = pgsz;
+
+ return buffer;
}
-static inline unsigned long get_from_ip(char *base)
+
+static int ds_request(struct task_struct *task, void *base, size_t size,
+ ds_ovfl_callback_t ovfl, enum ds_qualifier qual)
{
- return *(unsigned long *)(base + ds_cfg.from_ip.offset);
+ struct ds_context *context;
+ unsigned long buffer, adj;
+ const unsigned long alignment = (1 << 3);
+ int error = 0;
+
+ if (!ds_cfg.sizeof_ds)
+ return -EOPNOTSUPP;
+
+ /* we require some space to do alignment adjustments below */
+ if (size < (alignment + ds_cfg.sizeof_rec[qual]))
+ return -EINVAL;
+
+ /* buffer overflow notification is not yet implemented */
+ if (ovfl)
+ return -EOPNOTSUPP;
+
+
+ spin_lock(&ds_lock);
+
+ if (!check_tracer(task))
+ return -EPERM;
+
+ error = -ENOMEM;
+ context = ds_alloc_context(task);
+ if (!context)
+ goto out_unlock;
+
+ error = -EALREADY;
+ if (context->owner[qual] == current)
+ goto out_unlock;
+ error = -EPERM;
+ if (context->owner[qual] != NULL)
+ goto out_unlock;
+ context->owner[qual] = current;
+
+ spin_unlock(&ds_lock);
+
+
+ error = -ENOMEM;
+ if (!base) {
+ base = ds_allocate_buffer(size, &context->pages[qual]);
+ if (!base)
+ goto out_release;
+
+ context->buffer[qual] = base;
+ }
+ error = 0;
+
+ context->callback[qual] = ovfl;
+
+ /* adjust the buffer address and size to meet alignment
+ * constraints:
+ * - buffer is double-word aligned
+ * - size is multiple of record size
+ *
+ * We checked the size at the very beginning; we have enough
+ * space to do the adjustment.
+ */
+ buffer = (unsigned long)base;
+
+ adj = ALIGN(buffer, alignment) - buffer;
+ buffer += adj;
+ size -= adj;
+
+ size /= ds_cfg.sizeof_rec[qual];
+ size *= ds_cfg.sizeof_rec[qual];
+
+ ds_set(context->ds, qual, ds_buffer_base, buffer);
+ ds_set(context->ds, qual, ds_index, buffer);
+ ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
+
+ if (ovfl) {
+ /* todo: select a suitable interrupt threshold */
+ } else
+ ds_set(context->ds, qual,
+ ds_interrupt_threshold, buffer + size + 1);
+
+ /* we keep the context until ds_release */
+ return error;
+
+ out_release:
+ context->owner[qual] = NULL;
+ ds_put_context(context);
+ return error;
+
+ out_unlock:
+ spin_unlock(&ds_lock);
+ ds_put_context(context);
+ return error;
}
-static inline void set_from_ip(char *base, unsigned long value)
+
+int ds_request_bts(struct task_struct *task, void *base, size_t size,
+ ds_ovfl_callback_t ovfl)
{
- (*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value;
+ return ds_request(task, base, size, ovfl, ds_bts);
}
-static inline unsigned long get_to_ip(char *base)
+
+int ds_request_pebs(struct task_struct *task, void *base, size_t size,
+ ds_ovfl_callback_t ovfl)
{
- return *(unsigned long *)(base + ds_cfg.to_ip.offset);
+ return ds_request(task, base, size, ovfl, ds_pebs);
}
-static inline void set_to_ip(char *base, unsigned long value)
+
+static int ds_release(struct task_struct *task, enum ds_qualifier qual)
{
- (*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value;
+ struct ds_context *context;
+ int error;
+
+ context = ds_get_context(task);
+ error = ds_validate_access(context, qual);
+ if (error < 0)
+ goto out;
+
+ kfree(context->buffer[qual]);
+ context->buffer[qual] = NULL;
+
+ current->mm->total_vm -= context->pages[qual];
+ current->mm->locked_vm -= context->pages[qual];
+ context->pages[qual] = 0;
+ context->owner[qual] = NULL;
+
+ /*
+ * we put the context twice:
+ * once for the ds_get_context
+ * once for the corresponding ds_request
+ */
+ ds_put_context(context);
+ out:
+ ds_put_context(context);
+ return error;
}
-static inline unsigned char get_info_type(char *base)
+
+int ds_release_bts(struct task_struct *task)
{
- return *(unsigned char *)(base + ds_cfg.info_type.offset);
+ return ds_release(task, ds_bts);
}
-static inline void set_info_type(char *base, unsigned char value)
+
+int ds_release_pebs(struct task_struct *task)
{
- (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value;
+ return ds_release(task, ds_pebs);
}
-static inline unsigned long get_info_data(char *base)
+
+static int ds_get_index(struct task_struct *task, size_t *pos,
+ enum ds_qualifier qual)
{
- return *(unsigned long *)(base + ds_cfg.info_data.offset);
+ struct ds_context *context;
+ unsigned long base, index;
+ int error;
+
+ context = ds_get_context(task);
+ error = ds_validate_access(context, qual);
+ if (error < 0)
+ goto out;
+
+ base = ds_get(context->ds, qual, ds_buffer_base);
+ index = ds_get(context->ds, qual, ds_index);
+
+ error = ((index - base) / ds_cfg.sizeof_rec[qual]);
+ if (pos)
+ *pos = error;
+ out:
+ ds_put_context(context);
+ return error;
}
-static inline void set_info_data(char *base, unsigned long value)
+
+int ds_get_bts_index(struct task_struct *task, size_t *pos)
{
- (*(unsigned long *)(base + ds_cfg.info_data.offset)) = value;
+ return ds_get_index(task, pos, ds_bts);
}
+int ds_get_pebs_index(struct task_struct *task, size_t *pos)
+{
+ return ds_get_index(task, pos, ds_pebs);
+}
-int ds_allocate(void **dsp, size_t bts_size_in_bytes)
+static int ds_get_end(struct task_struct *task, size_t *pos,
+ enum ds_qualifier qual)
{
- size_t bts_size_in_records;
- unsigned long bts;
- void *ds;
+ struct ds_context *context;
+ unsigned long base, end;
+ int error;
+
+ context = ds_get_context(task);
+ error = ds_validate_access(context, qual);
+ if (error < 0)
+ goto out;
+
+ base = ds_get(context->ds, qual, ds_buffer_base);
+ end = ds_get(context->ds, qual, ds_absolute_maximum);
+
+ error = ((end - base) / ds_cfg.sizeof_rec[qual]);
+ if (pos)
+ *pos = error;
+ out:
+ ds_put_context(context);
+ return error;
+}
- if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
- return -EOPNOTSUPP;
+int ds_get_bts_end(struct task_struct *task, size_t *pos)
+{
+ return ds_get_end(task, pos, ds_bts);
+}
- if (bts_size_in_bytes < 0)
- return -EINVAL;
+int ds_get_pebs_end(struct task_struct *task, size_t *pos)
+{
+ return ds_get_end(task, pos, ds_pebs);
+}
- bts_size_in_records =
- bts_size_in_bytes / ds_cfg.sizeof_bts;
- bts_size_in_bytes =
- bts_size_in_records * ds_cfg.sizeof_bts;
+static int ds_access(struct task_struct *task, size_t index,
+ const void **record, enum ds_qualifier qual)
+{
+ struct ds_context *context;
+ unsigned long base, idx;
+ int error;
- if (bts_size_in_bytes <= 0)
+ if (!record)
return -EINVAL;
- bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL);
-
- if (!bts)
- return -ENOMEM;
+ context = ds_get_context(task);
+ error = ds_validate_access(context, qual);
+ if (error < 0)
+ goto out;
- ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
+ base = ds_get(context->ds, qual, ds_buffer_base);
+ idx = base + (index * ds_cfg.sizeof_rec[qual]);
- if (!ds) {
- kfree((void *)bts);
- return -ENOMEM;
- }
-
- set_bts_buffer_base(ds, bts);
- set_bts_index(ds, bts);
- set_bts_absolute_maximum(ds, bts + bts_size_in_bytes);
- set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1);
+ error = -EINVAL;
+ if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
+ goto out;
- *dsp = ds;
- return 0;
+ *record = (const void *)idx;
+ error = ds_cfg.sizeof_rec[qual];
+ out:
+ ds_put_context(context);
+ return error;
}
-int ds_free(void **dsp)
+int ds_access_bts(struct task_struct *task, size_t index, const void **record)
{
- if (*dsp) {
- kfree((void *)get_bts_buffer_base(*dsp));
- kfree(*dsp);
- *dsp = NULL;
- }
- return 0;
+ return ds_access(task, index, record, ds_bts);
}
-int ds_get_bts_size(void *ds)
+int ds_access_pebs(struct task_struct *task, size_t index, const void **record)
{
- int size_in_bytes;
-
- if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
- return -EOPNOTSUPP;
-
- if (!ds)
- return 0;
-
- size_in_bytes =
- get_bts_absolute_maximum(ds) -
- get_bts_buffer_base(ds);
- return size_in_bytes;
+ return ds_access(task, index, record, ds_pebs);
}
-int ds_get_bts_end(void *ds)
+static int ds_write(struct task_struct *task, const void *record, size_t size,
+ enum ds_qualifier qual, int force)
{
- int size_in_bytes = ds_get_bts_size(ds);
-
- if (size_in_bytes <= 0)
- return size_in_bytes;
+ struct ds_context *context;
+ int error;
- return size_in_bytes / ds_cfg.sizeof_bts;
-}
+ if (!record)
+ return -EINVAL;
-int ds_get_bts_index(void *ds)
-{
- int index_offset_in_bytes;
+ error = -EPERM;
+ context = ds_get_context(task);
+ if (!context)
+ goto out;
- if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
- return -EOPNOTSUPP;
+ if (!force) {
+ error = ds_validate_access(context, qual);
+ if (error < 0)
+ goto out;
+ }
- index_offset_in_bytes =
- get_bts_index(ds) -
- get_bts_buffer_base(ds);
+ error = 0;
+ while (size) {
+ unsigned long base, index, end, write_end, int_th;
+ unsigned long write_size, adj_write_size;
+
+ /*
+ * write as much as possible without producing an
+ * overflow interrupt.
+ *
+ * interrupt_threshold must either be
+ * - bigger than absolute_maximum or
+ * - point to a record between buffer_base and absolute_maximum
+ *
+ * index points to a valid record.
+ */
+ base = ds_get(context->ds, qual, ds_buffer_base);
+ index = ds_get(context->ds, qual, ds_index);
+ end = ds_get(context->ds, qual, ds_absolute_maximum);
+ int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
+
+ write_end = min(end, int_th);
+
+ /* if we are already beyond the interrupt threshold,
+ * we fill the entire buffer */
+ if (write_end <= index)
+ write_end = end;
+
+ if (write_end <= index)
+ goto out;
+
+ write_size = min((unsigned long) size, write_end - index);
+ memcpy((void *)index, record, write_size);
+
+ record = (const char *)record + write_size;
+ size -= write_size;
+ error += write_size;
+
+ adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
+ adj_write_size *= ds_cfg.sizeof_rec[qual];
+
+ /* zero out trailing bytes */
+ memset((char *)index + write_size, 0,
+ adj_write_size - write_size);
+ index += adj_write_size;
+
+ if (index >= end)
+ index = base;
+ ds_set(context->ds, qual, ds_index, index);
+
+ if (index >= int_th)
+ ds_overflow(task, context, qual);
+ }
- return index_offset_in_bytes / ds_cfg.sizeof_bts;
+ out:
+ ds_put_context(context);
+ return error;
}
-int ds_set_overflow(void *ds, int method)
+int ds_write_bts(struct task_struct *task, const void *record, size_t size)
{
- switch (method) {
- case DS_O_SIGNAL:
- return -EOPNOTSUPP;
- case DS_O_WRAP:
- return 0;
- default:
- return -EINVAL;
- }
+ return ds_write(task, record, size, ds_bts, /* force = */ 0);
}
-int ds_get_overflow(void *ds)
+int ds_write_pebs(struct task_struct *task, const void *record, size_t size)
{
- return DS_O_WRAP;
+ return ds_write(task, record, size, ds_pebs, /* force = */ 0);
}
-int ds_clear(void *ds)
+int ds_unchecked_write_bts(struct task_struct *task,
+ const void *record, size_t size)
{
- int bts_size = ds_get_bts_size(ds);
- unsigned long bts_base;
-
- if (bts_size <= 0)
- return bts_size;
-
- bts_base = get_bts_buffer_base(ds);
- memset((void *)bts_base, 0, bts_size);
-
- set_bts_index(ds, bts_base);
- return 0;
+ return ds_write(task, record, size, ds_bts, /* force = */ 1);
}
-int ds_read_bts(void *ds, int index, struct bts_struct *out)
+int ds_unchecked_write_pebs(struct task_struct *task,
+ const void *record, size_t size)
{
- void *bts;
+ return ds_write(task, record, size, ds_pebs, /* force = */ 1);
+}
- if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
- return -EOPNOTSUPP;
+static int ds_reset_or_clear(struct task_struct *task,
+ enum ds_qualifier qual, int clear)
+{
+ struct ds_context *context;
+ unsigned long base, end;
+ int error;
- if (index < 0)
- return -EINVAL;
+ context = ds_get_context(task);
+ error = ds_validate_access(context, qual);
+ if (error < 0)
+ goto out;
- if (index >= ds_get_bts_size(ds))
- return -EINVAL;
+ base = ds_get(context->ds, qual, ds_buffer_base);
+ end = ds_get(context->ds, qual, ds_absolute_maximum);
- bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts));
+ if (clear)
+ memset((void *)base, 0, end - base);
- memset(out, 0, sizeof(*out));
- if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) {
- out->qualifier = get_info_type(bts);
- out->variant.jiffies = get_info_data(bts);
- } else {
- out->qualifier = BTS_BRANCH;
- out->variant.lbr.from_ip = get_from_ip(bts);
- out->variant.lbr.to_ip = get_to_ip(bts);
- }
+ ds_set(context->ds, qual, ds_index, base);
- return sizeof(*out);;
+ error = 0;
+ out:
+ ds_put_context(context);
+ return error;
}
-int ds_write_bts(void *ds, const struct bts_struct *in)
+int ds_reset_bts(struct task_struct *task)
{
- unsigned long bts;
-
- if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
- return -EOPNOTSUPP;
-
- if (ds_get_bts_size(ds) <= 0)
- return -ENXIO;
+ return ds_reset_or_clear(task, ds_bts, /* clear = */ 0);
+}
- bts = get_bts_index(ds);
+int ds_reset_pebs(struct task_struct *task)
+{
+ return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0);
+}
- memset((void *)bts, 0, ds_cfg.sizeof_bts);
- switch (in->qualifier) {
- case BTS_INVALID:
- break;
+int ds_clear_bts(struct task_struct *task)
+{
+ return ds_reset_or_clear(task, ds_bts, /* clear = */ 1);
+}
- case BTS_BRANCH:
- set_from_ip((void *)bts, in->variant.lbr.from_ip);
- set_to_ip((void *)bts, in->variant.lbr.to_ip);
- break;
+int ds_clear_pebs(struct task_struct *task)
+{
+ return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1);
+}
- case BTS_TASK_ARRIVES:
- case BTS_TASK_DEPARTS:
- set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS);
- set_info_type((void *)bts, in->qualifier);
- set_info_data((void *)bts, in->variant.jiffies);
- break;
+int ds_get_pebs_reset(struct task_struct *task, u64 *value)
+{
+ struct ds_context *context;
+ int error;
- default:
+ if (!value)
return -EINVAL;
- }
- bts = bts + ds_cfg.sizeof_bts;
- if (bts >= get_bts_absolute_maximum(ds))
- bts = get_bts_buffer_base(ds);
- set_bts_index(ds, bts);
+ context = ds_get_context(task);
+ error = ds_validate_access(context, ds_pebs);
+ if (error < 0)
+ goto out;
- return ds_cfg.sizeof_bts;
+ *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8));
+
+ error = 0;
+ out:
+ ds_put_context(context);
+ return error;
}
-unsigned long ds_debugctl_mask(void)
+int ds_set_pebs_reset(struct task_struct *task, u64 value)
{
- return ds_cfg.debugctl_mask;
-}
+ struct ds_context *context;
+ int error;
-#ifdef __i386__
-static const struct ds_configuration ds_cfg_netburst = {
- .sizeof_ds = 9 * 4,
- .bts_buffer_base = { 0, 4 },
- .bts_index = { 4, 4 },
- .bts_absolute_maximum = { 8, 4 },
- .bts_interrupt_threshold = { 12, 4 },
- .sizeof_bts = 3 * 4,
- .from_ip = { 0, 4 },
- .to_ip = { 4, 4 },
- .info_type = { 4, 1 },
- .info_data = { 8, 4 },
- .debugctl_mask = (1<<2)|(1<<3)
-};
+ context = ds_get_context(task);
+ error = ds_validate_access(context, ds_pebs);
+ if (error < 0)
+ goto out;
-static const struct ds_configuration ds_cfg_pentium_m = {
- .sizeof_ds = 9 * 4,
- .bts_buffer_base = { 0, 4 },
- .bts_index = { 4, 4 },
- .bts_absolute_maximum = { 8, 4 },
- .bts_interrupt_threshold = { 12, 4 },
- .sizeof_bts = 3 * 4,
- .from_ip = { 0, 4 },
- .to_ip = { 4, 4 },
- .info_type = { 4, 1 },
- .info_data = { 8, 4 },
- .debugctl_mask = (1<<6)|(1<<7)
+ *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value;
+
+ error = 0;
+ out:
+ ds_put_context(context);
+ return error;
+}
+
+static const struct ds_configuration ds_cfg_var = {
+ .sizeof_ds = sizeof(long) * 12,
+ .sizeof_field = sizeof(long),
+ .sizeof_rec[ds_bts] = sizeof(long) * 3,
+ .sizeof_rec[ds_pebs] = sizeof(long) * 10
};
-#endif /* _i386_ */
-
-static const struct ds_configuration ds_cfg_core2 = {
- .sizeof_ds = 9 * 8,
- .bts_buffer_base = { 0, 8 },
- .bts_index = { 8, 8 },
- .bts_absolute_maximum = { 16, 8 },
- .bts_interrupt_threshold = { 24, 8 },
- .sizeof_bts = 3 * 8,
- .from_ip = { 0, 8 },
- .to_ip = { 8, 8 },
- .info_type = { 8, 1 },
- .info_data = { 16, 8 },
- .debugctl_mask = (1<<6)|(1<<7)|(1<<9)
+static const struct ds_configuration ds_cfg_64 = {
+ .sizeof_ds = 8 * 12,
+ .sizeof_field = 8,
+ .sizeof_rec[ds_bts] = 8 * 3,
+ .sizeof_rec[ds_pebs] = 8 * 10
};
static inline void
@@ -429,14 +821,13 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
switch (c->x86) {
case 0x6:
switch (c->x86_model) {
-#ifdef __i386__
case 0xD:
case 0xE: /* Pentium M */
- ds_configure(&ds_cfg_pentium_m);
+ ds_configure(&ds_cfg_var);
break;
-#endif /* _i386_ */
case 0xF: /* Core2 */
- ds_configure(&ds_cfg_core2);
+ case 0x1C: /* Atom */
+ ds_configure(&ds_cfg_64);
break;
default:
/* sorry, don't know about them */
@@ -445,13 +836,11 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
break;
case 0xF:
switch (c->x86_model) {
-#ifdef __i386__
case 0x0:
case 0x1:
case 0x2: /* Netburst */
- ds_configure(&ds_cfg_netburst);
+ ds_configure(&ds_cfg_var);
break;
-#endif /* _i386_ */
default:
/* sorry, don't know about them */
break;
@@ -462,3 +851,14 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
break;
}
}
+
+void ds_free(struct ds_context *context)
+{
+ /* This is called when the task owning the parameter context
+ * is dying. There should not be any user of that context left
+ * to disturb us, anymore. */
+ unsigned long leftovers = context->count;
+ while (leftovers--)
+ ds_put_context(context);
+}
+#endif /* CONFIG_X86_DS */
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 9af89078f7b..66e48aa2dd1 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1203,7 +1203,7 @@ static int __init parse_memmap_opt(char *p)
if (!p)
return -EINVAL;
- if (!strcmp(p, "exactmap")) {
+ if (!strncmp(p, "exactmap", 8)) {
#ifdef CONFIG_CRASH_DUMP
/*
* If we are doing a crash dump, we still need to know
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 4353cf5e6fa..24bb5faf5ef 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -95,6 +95,20 @@ static void __init nvidia_bugs(int num, int slot, int func)
}
+#ifdef CONFIG_DMAR
+static void __init intel_g33_dmar(int num, int slot, int func)
+{
+ struct acpi_table_header *dmar_tbl;
+ acpi_status status;
+
+ status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
+ if (ACPI_SUCCESS(status)) {
+ printk(KERN_INFO "BIOS BUG: DMAR advertised on Intel G31/G33 chipset -- ignoring\n");
+ dmar_disabled = 1;
+ }
+}
+#endif
+
#define QFLAG_APPLY_ONCE 0x1
#define QFLAG_APPLIED 0x2
#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -114,6 +128,10 @@ static struct chipset early_qrk[] __initdata = {
PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs },
{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB,
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config },
+#ifdef CONFIG_DMAR
+ { PCI_VENDOR_ID_INTEL, 0x29c0,
+ PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar },
+#endif
{}
};
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 06cc8d4254b..945a31cdd81 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -414,9 +414,11 @@ void __init efi_init(void)
if (memmap.map == NULL)
printk(KERN_ERR "Could not map the EFI memory map!\n");
memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
+
if (memmap.desc_size != sizeof(efi_memory_desc_t))
- printk(KERN_WARNING "Kernel-defined memdesc"
- "doesn't match the one from EFI!\n");
+ printk(KERN_WARNING
+ "Kernel-defined memdesc doesn't match the one from EFI!\n");
+
if (add_efi_memmap)
do_add_efi_memmap();
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c
index 4b63c8e1f13..5cab48ee61a 100644
--- a/arch/x86/kernel/efi_32.c
+++ b/arch/x86/kernel/efi_32.c
@@ -53,7 +53,7 @@ void efi_call_phys_prelog(void)
* directory. If I have PAE, I just need to duplicate one entry in
* page directory.
*/
- cr4 = read_cr4();
+ cr4 = read_cr4_safe();
if (cr4 & X86_CR4_PAE) {
efi_bak_pg_dir_pointer[0].pgd =
@@ -91,7 +91,7 @@ void efi_call_phys_epilog(void)
gdt_descr.size = GDT_SIZE - 1;
load_gdt(&gdt_descr);
- cr4 = read_cr4();
+ cr4 = read_cr4_safe();
if (cr4 & X86_CR4_PAE) {
swapper_pg_dir[pgd_index(0)].pgd =
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 16a93ed7baf..ae2ffc8a400 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -286,7 +286,7 @@ static __init void map_low_mmrs(void)
enum map_type {map_wb, map_uc};
-static void map_high(char *id, unsigned long base, int shift, enum map_type map_type)
+static __init void map_high(char *id, unsigned long base, int shift, enum map_type map_type)
{
unsigned long bytes, paddr;
@@ -357,7 +357,9 @@ static __init void uv_rtc_init(void)
sn_rtc_cycles_per_second = ticks_per_sec;
}
-static __init void uv_system_init(void)
+static bool uv_system_inited;
+
+void __init uv_system_init(void)
{
union uvh_si_addr_map_config_u m_n_config;
union uvh_node_id_u node_id;
@@ -447,6 +449,7 @@ static __init void uv_system_init(void)
map_mmr_high(max_pnode);
map_config_high(max_pnode);
map_mmioh_high(max_pnode);
+ uv_system_inited = true;
}
/*
@@ -455,8 +458,7 @@ static __init void uv_system_init(void)
*/
void __cpuinit uv_cpu_init(void)
{
- if (!uv_node_to_blade)
- uv_system_init();
+ BUG_ON(!uv_system_inited);
uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 1b318e903bf..d16084f9064 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -88,6 +88,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
(__START_KERNEL & PGDIR_MASK)));
+ BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
/* clear bss before set_intr_gate with early_idt_handler */
clear_bss();
@@ -107,12 +108,11 @@ void __init x86_64_start_kernel(char * real_mode_data)
}
load_idt((const struct desc_ptr *)&idt_descr);
- early_printk("Kernel alive\n");
+ if (console_loglevel == 10)
+ early_printk("Kernel alive\n");
x86_64_init_pda();
- early_printk("Kernel really alive\n");
-
x86_64_start_reservations(real_mode_data);
}
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index a7010c3a377..e835b4eea70 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -172,10 +172,6 @@ num_subarch_entries = (. - subarch_entries) / 4
*
* Note that the stack is not yet set up!
*/
-#define PTE_ATTR 0x007 /* PRESENT+RW+USER */
-#define PDE_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */
-#define PGD_ATTR 0x001 /* PRESENT (no other attributes) */
-
default_entry:
#ifdef CONFIG_X86_PAE
@@ -196,9 +192,9 @@ default_entry:
movl $pa(pg0), %edi
movl %edi, pa(init_pg_tables_start)
movl $pa(swapper_pg_pmd), %edx
- movl $PTE_ATTR, %eax
+ movl $PTE_IDENT_ATTR, %eax
10:
- leal PDE_ATTR(%edi),%ecx /* Create PMD entry */
+ leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */
movl %ecx,(%edx) /* Store PMD entry */
/* Upper half already zero */
addl $8,%edx
@@ -215,7 +211,7 @@ default_entry:
* End condition: we must map up to and including INIT_MAP_BEYOND_END
* bytes beyond the end of our own page tables.
*/
- leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp
+ leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
cmpl %ebp,%eax
jb 10b
1:
@@ -224,7 +220,7 @@ default_entry:
movl %eax, pa(max_pfn_mapped)
/* Do early initialization of the fixmap area */
- movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
+ movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax
movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8)
#else /* Not PAE */
@@ -233,9 +229,9 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
movl $pa(pg0), %edi
movl %edi, pa(init_pg_tables_start)
movl $pa(swapper_pg_dir), %edx
- movl $PTE_ATTR, %eax
+ movl $PTE_IDENT_ATTR, %eax
10:
- leal PDE_ATTR(%edi),%ecx /* Create PDE entry */
+ leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */
movl %ecx,(%edx) /* Store identity PDE entry */
movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */
addl $4,%edx
@@ -249,7 +245,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
* bytes beyond the end of our own page tables; the +0x007 is
* the attribute bits
*/
- leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp
+ leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
cmpl %ebp,%eax
jb 10b
movl %edi,pa(init_pg_tables_end)
@@ -257,7 +253,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
movl %eax, pa(max_pfn_mapped)
/* Do early initialization of the fixmap area */
- movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
+ movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax
movl %eax,pa(swapper_pg_dir+0xffc)
#endif
jmp 3f
@@ -634,19 +630,19 @@ ENTRY(empty_zero_page)
/* Page-aligned for the benefit of paravirt? */
.align PAGE_SIZE_asm
ENTRY(swapper_pg_dir)
- .long pa(swapper_pg_pmd+PGD_ATTR),0 /* low identity map */
+ .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */
# if KPMDS == 3
- .long pa(swapper_pg_pmd+PGD_ATTR),0
- .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0
- .long pa(swapper_pg_pmd+PGD_ATTR+0x2000),0
+ .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0
+ .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0
+ .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x2000),0
# elif KPMDS == 2
.long 0,0
- .long pa(swapper_pg_pmd+PGD_ATTR),0
- .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0
+ .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0
+ .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0
# elif KPMDS == 1
.long 0,0
.long 0,0
- .long pa(swapper_pg_pmd+PGD_ATTR),0
+ .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0
# else
# error "Kernel PMDs should be 1, 2 or 3"
# endif
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index db3280afe88..26cfdc1d7c7 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -110,7 +110,7 @@ startup_64:
movq %rdi, %rax
shrq $PMD_SHIFT, %rax
andq $(PTRS_PER_PMD - 1), %rax
- leaq __PAGE_KERNEL_LARGE_EXEC(%rdi), %rdx
+ leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx
leaq level2_spare_pgt(%rip), %rbx
movq %rdx, 0(%rbx, %rax, 8)
ident_complete:
@@ -374,7 +374,7 @@ NEXT_PAGE(level2_ident_pgt)
/* Since I easily can, map the first 1G.
* Don't set NX because code runs from these pages.
*/
- PMDS(0, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
+ PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
NEXT_PAGE(level2_kernel_pgt)
/*
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ad2b15a1334..73deaffadd0 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -210,8 +210,8 @@ static void hpet_legacy_clockevent_register(void)
/* Calculate the min / max delta */
hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
&hpet_clockevent);
- hpet_clockevent.min_delta_ns = clockevent_delta2ns(0x30,
- &hpet_clockevent);
+ /* 5 usec minimum reprogramming delta. */
+ hpet_clockevent.min_delta_ns = 5000;
/*
* Start hpet with the boot cpu mask and make it
@@ -270,15 +270,22 @@ static void hpet_legacy_set_mode(enum clock_event_mode mode,
}
static int hpet_legacy_next_event(unsigned long delta,
- struct clock_event_device *evt)
+ struct clock_event_device *evt)
{
- unsigned long cnt;
+ u32 cnt;
cnt = hpet_readl(HPET_COUNTER);
- cnt += delta;
+ cnt += (u32) delta;
hpet_writel(cnt, HPET_T0_CMP);
- return ((long)(hpet_readl(HPET_COUNTER) - cnt ) > 0) ? -ETIME : 0;
+ /*
+ * We need to read back the CMP register to make sure that
+ * what we wrote hit the chip before we compare it to the
+ * counter.
+ */
+ WARN_ON((u32)hpet_readl(HPET_T0_CMP) != cnt);
+
+ return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
}
/*
@@ -359,6 +366,7 @@ static int hpet_clocksource_register(void)
int __init hpet_enable(void)
{
unsigned long id;
+ int i;
if (!is_hpet_capable())
return 0;
@@ -369,6 +377,29 @@ int __init hpet_enable(void)
* Read the period and check for a sane value:
*/
hpet_period = hpet_readl(HPET_PERIOD);
+
+ /*
+ * AMD SB700 based systems with spread spectrum enabled use a
+ * SMM based HPET emulation to provide proper frequency
+ * setting. The SMM code is initialized with the first HPET
+ * register access and takes some time to complete. During
+ * this time the config register reads 0xffffffff. We check
+ * for max. 1000 loops whether the config register reads a non
+ * 0xffffffff value to make sure that HPET is up and running
+ * before we go further. A counting loop is safe, as the HPET
+ * access takes thousands of CPU cycles. On non SB700 based
+ * machines this check is only done once and has no side
+ * effects.
+ */
+ for (i = 0; hpet_readl(HPET_CFG) == 0xFFFFFFFF; i++) {
+ if (i == 1000) {
+ printk(KERN_WARNING
+ "HPET config register value = 0xFFFFFFFF. "
+ "Disabling HPET\n");
+ goto out_nohpet;
+ }
+ }
+
if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD)
goto out_nohpet;
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
index 1c3a66a67f8..720d2607aac 100644
--- a/arch/x86/kernel/io_delay.c
+++ b/arch/x86/kernel/io_delay.c
@@ -92,6 +92,14 @@ static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = {
DMI_MATCH(DMI_BOARD_NAME, "30BF")
}
},
+ {
+ .callback = dmi_io_delay_0xed_port,
+ .ident = "Presario F700",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30D3")
+ }
+ },
{ }
};
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 1cf8c1fcc08..b71e02d42f4 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -325,7 +325,7 @@ skip:
for_each_online_cpu(j)
seq_printf(p, "%10u ",
per_cpu(irq_stat,j).irq_call_count);
- seq_printf(p, " function call interrupts\n");
+ seq_printf(p, " Function call interrupts\n");
seq_printf(p, "TLB: ");
for_each_online_cpu(j)
seq_printf(p, "%10u ",
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 1f78b238d8d..f065fe9071b 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -129,7 +129,7 @@ skip:
seq_printf(p, "CAL: ");
for_each_online_cpu(j)
seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count);
- seq_printf(p, " function call interrupts\n");
+ seq_printf(p, " Function call interrupts\n");
seq_printf(p, "TLB: ");
for_each_online_cpu(j)
seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/k8.c
index 7377ccb2133..304d8bad655 100644
--- a/arch/x86/kernel/k8.c
+++ b/arch/x86/kernel/k8.c
@@ -16,8 +16,9 @@ EXPORT_SYMBOL(num_k8_northbridges);
static u32 *flush_words;
struct pci_device_id k8_nb_ids[] = {
- { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
- { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_11H_NB_MISC) },
{}
};
EXPORT_SYMBOL(k8_nb_ids);
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index f2d43bc7551..ff7d3b0124f 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -139,6 +139,7 @@ static int __init create_setup_data_nodes(struct dentry *parent)
if (PageHighMem(pg)) {
data = ioremap_cache(pa_data, sizeof(*data));
if (!data) {
+ kfree(node);
error = -ENXIO;
goto err_dir;
}
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index f47f0eb886b..10435a120d2 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -69,6 +69,9 @@ static int gdb_x86vector = -1;
*/
void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
{
+#ifndef CONFIG_X86_32
+ u32 *gdb_regs32 = (u32 *)gdb_regs;
+#endif
gdb_regs[GDB_AX] = regs->ax;
gdb_regs[GDB_BX] = regs->bx;
gdb_regs[GDB_CX] = regs->cx;
@@ -76,9 +79,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
gdb_regs[GDB_SI] = regs->si;
gdb_regs[GDB_DI] = regs->di;
gdb_regs[GDB_BP] = regs->bp;
- gdb_regs[GDB_PS] = regs->flags;
gdb_regs[GDB_PC] = regs->ip;
#ifdef CONFIG_X86_32
+ gdb_regs[GDB_PS] = regs->flags;
gdb_regs[GDB_DS] = regs->ds;
gdb_regs[GDB_ES] = regs->es;
gdb_regs[GDB_CS] = regs->cs;
@@ -94,6 +97,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
gdb_regs[GDB_R13] = regs->r13;
gdb_regs[GDB_R14] = regs->r14;
gdb_regs[GDB_R15] = regs->r15;
+ gdb_regs32[GDB_PS] = regs->flags;
+ gdb_regs32[GDB_CS] = regs->cs;
+ gdb_regs32[GDB_SS] = regs->ss;
#endif
gdb_regs[GDB_SP] = regs->sp;
}
@@ -112,6 +118,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
*/
void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
{
+#ifndef CONFIG_X86_32
+ u32 *gdb_regs32 = (u32 *)gdb_regs;
+#endif
gdb_regs[GDB_AX] = 0;
gdb_regs[GDB_BX] = 0;
gdb_regs[GDB_CX] = 0;
@@ -129,8 +138,10 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
gdb_regs[GDB_FS] = 0xFFFF;
gdb_regs[GDB_GS] = 0xFFFF;
#else
- gdb_regs[GDB_PS] = *(unsigned long *)(p->thread.sp + 8);
- gdb_regs[GDB_PC] = 0;
+ gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8);
+ gdb_regs32[GDB_CS] = __KERNEL_CS;
+ gdb_regs32[GDB_SS] = __KERNEL_DS;
+ gdb_regs[GDB_PC] = p->thread.ip;
gdb_regs[GDB_R8] = 0;
gdb_regs[GDB_R9] = 0;
gdb_regs[GDB_R10] = 0;
@@ -153,6 +164,9 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
*/
void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
{
+#ifndef CONFIG_X86_32
+ u32 *gdb_regs32 = (u32 *)gdb_regs;
+#endif
regs->ax = gdb_regs[GDB_AX];
regs->bx = gdb_regs[GDB_BX];
regs->cx = gdb_regs[GDB_CX];
@@ -160,9 +174,9 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
regs->si = gdb_regs[GDB_SI];
regs->di = gdb_regs[GDB_DI];
regs->bp = gdb_regs[GDB_BP];
- regs->flags = gdb_regs[GDB_PS];
regs->ip = gdb_regs[GDB_PC];
#ifdef CONFIG_X86_32
+ regs->flags = gdb_regs[GDB_PS];
regs->ds = gdb_regs[GDB_DS];
regs->es = gdb_regs[GDB_ES];
regs->cs = gdb_regs[GDB_CS];
@@ -175,6 +189,9 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
regs->r13 = gdb_regs[GDB_R13];
regs->r14 = gdb_regs[GDB_R14];
regs->r15 = gdb_regs[GDB_R15];
+ regs->flags = gdb_regs32[GDB_PS];
+ regs->cs = gdb_regs32[GDB_CS];
+ regs->ss = gdb_regs32[GDB_SS];
#endif
}
@@ -378,10 +395,8 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
if (remcomInBuffer[0] == 's') {
linux_regs->flags |= X86_EFLAGS_TF;
kgdb_single_step = 1;
- if (kgdb_contthread) {
- atomic_set(&kgdb_cpu_doing_single_step,
- raw_smp_processor_id());
- }
+ atomic_set(&kgdb_cpu_doing_single_step,
+ raw_smp_processor_id());
}
get_debugreg(dr6, 6);
@@ -440,12 +455,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
return NOTIFY_DONE;
case DIE_NMI_IPI:
- if (atomic_read(&kgdb_active) != -1) {
- /* KGDB CPU roundup */
- kgdb_nmicallback(raw_smp_processor_id(), regs);
- was_in_debug_nmi[raw_smp_processor_id()] = 1;
- touch_nmi_watchdog();
- }
+ /* Just ignore, we will handle the roundup on DIE_NMI. */
return NOTIFY_DONE;
case DIE_NMIUNKNOWN:
@@ -466,9 +476,15 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
case DIE_DEBUG:
if (atomic_read(&kgdb_cpu_doing_single_step) ==
- raw_smp_processor_id() &&
- user_mode(regs))
- return single_step_cont(regs, args);
+ raw_smp_processor_id()) {
+ if (user_mode(regs))
+ return single_step_cont(regs, args);
+ break;
+ } else if (test_thread_flag(TIF_SINGLESTEP))
+ /* This means a user thread is single stepping
+ * a system call which should be ignored
+ */
+ return NOTIFY_DONE;
/* fall through */
default:
if (user_mode(regs))
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 8b7a3cf37d2..478bca986ec 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -178,7 +178,7 @@ static void kvm_flush_tlb(void)
kvm_deferred_mmu_op(&ftlb, sizeof ftlb);
}
-static void kvm_release_pt(u32 pfn)
+static void kvm_release_pt(unsigned long pfn)
{
struct kvm_mmu_op_release_pt rpt = {
.header.op = KVM_MMU_OP_RELEASE_PT,
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 9fe478d9840..0732adba05c 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -12,6 +12,7 @@
#include <linux/init.h>
#include <linux/numa.h>
#include <linux/ftrace.h>
+#include <linux/suspend.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -78,7 +79,7 @@ static void load_segments(void)
/*
* A architecture hook called to validate the
* proposed image and prepare the control pages
- * as needed. The pages for KEXEC_CONTROL_CODE_SIZE
+ * as needed. The pages for KEXEC_CONTROL_PAGE_SIZE
* have been allocated, but the segments have yet
* been copied into the kernel.
*
@@ -113,6 +114,7 @@ void machine_kexec(struct kimage *image)
{
unsigned long page_list[PAGES_NR];
void *control_page;
+ int save_ftrace_enabled;
asmlinkage unsigned long
(*relocate_kernel_ptr)(unsigned long indirection_page,
unsigned long control_page,
@@ -120,7 +122,12 @@ void machine_kexec(struct kimage *image)
unsigned int has_pae,
unsigned int preserve_context);
- tracer_disable();
+#ifdef CONFIG_KEXEC_JUMP
+ if (kexec_image->preserve_context)
+ save_processor_state();
+#endif
+
+ save_ftrace_enabled = __ftrace_enabled_save();
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
@@ -138,7 +145,7 @@ void machine_kexec(struct kimage *image)
}
control_page = page_address(image->control_code_page);
- memcpy(control_page, relocate_kernel, PAGE_SIZE/2);
+ memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
relocate_kernel_ptr = control_page;
page_list[PA_CONTROL_PAGE] = __pa(control_page);
@@ -178,6 +185,13 @@ void machine_kexec(struct kimage *image)
(unsigned long)page_list,
image->start, cpu_has_pae,
image->preserve_context);
+
+#ifdef CONFIG_KEXEC_JUMP
+ if (kexec_image->preserve_context)
+ restore_processor_state();
+#endif
+
+ __ftrace_enabled_restore(save_ftrace_enabled);
}
void arch_crash_save_vmcoreinfo(void)
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 07c0f828f48..3b599518c32 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -33,6 +33,8 @@
#include <linux/module.h>
#include <asm/geode.h>
+#define MFGPT_DEFAULT_IRQ 7
+
static struct mfgpt_timer_t {
unsigned int avail:1;
} mfgpt_timers[MFGPT_MAX_TIMERS];
@@ -157,29 +159,48 @@ int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable)
}
EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event);
-int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable)
+int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable)
{
- u32 val, dummy;
- int offset;
+ u32 zsel, lpc, dummy;
+ int shift;
if (timer < 0 || timer >= MFGPT_MAX_TIMERS)
return -EIO;
- if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable))
+ /*
+ * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA
+ * is using the same CMP of the timer's Siamese twin, the IRQ is set to
+ * 2, and we mustn't use nor change it.
+ * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the
+ * IRQ of the 1st. This can only happen if forcing an IRQ, calling this
+ * with *irq==0 is safe. Currently there _are_ no 2 drivers.
+ */
+ rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
+ shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer % 4) * 4;
+ if (((zsel >> shift) & 0xF) == 2)
return -EIO;
- rdmsr(MSR_PIC_ZSEL_LOW, val, dummy);
+ /* Choose IRQ: if none supplied, keep IRQ already set or use default */
+ if (!*irq)
+ *irq = (zsel >> shift) & 0xF;
+ if (!*irq)
+ *irq = MFGPT_DEFAULT_IRQ;
- offset = (timer % 4) * 4;
-
- val &= ~((0xF << offset) | (0xF << (offset + 16)));
+ /* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */
+ if (*irq < 1 || *irq == 2 || *irq > 15)
+ return -EIO;
+ rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy);
+ if (lpc & (1 << *irq))
+ return -EIO;
+ /* All chosen and checked - go for it */
+ if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable))
+ return -EIO;
if (enable) {
- val |= (irq & 0x0F) << (offset);
- val |= (irq & 0x0F) << (offset + 16);
+ zsel = (zsel & ~(0xF << shift)) | (*irq << shift);
+ wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
}
- wrmsr(MSR_PIC_ZSEL_LOW, val, dummy);
return 0;
}
@@ -242,7 +263,7 @@ EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer);
static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN;
static u16 mfgpt_event_clock;
-static int irq = 7;
+static int irq;
static int __init mfgpt_setup(char *str)
{
get_option(&str, &irq);
@@ -346,7 +367,7 @@ int __init mfgpt_timer_setup(void)
mfgpt_event_clock = timer;
/* Set up the IRQ on the MFGPT side */
- if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, irq)) {
+ if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, &irq)) {
printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq);
return -EIO;
}
@@ -374,13 +395,14 @@ int __init mfgpt_timer_setup(void)
&mfgpt_clockevent);
printk(KERN_INFO
- "mfgpt-timer: registering the MFGPT timer as a clock event.\n");
+ "mfgpt-timer: Registering MFGPT timer %d as a clock event, using IRQ %d\n",
+ timer, irq);
clockevents_register_device(&mfgpt_clockevent);
return 0;
err:
- geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, irq);
+ geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, &irq);
printk(KERN_ERR
"mfgpt-timer: Unable to set up the MFGPT clock source\n");
return -EIO;
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index fdfdc550b36..efc2f361fe8 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -238,7 +238,7 @@ static struct dmi_system_id __devinitdata mmconf_dmi_table[] = {
{}
};
-void __init check_enable_amd_mmconf_dmi(void)
+void __cpuinit check_enable_amd_mmconf_dmi(void)
{
dmi_check_system(mmconf_dmi_table);
}
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index e5d23675bb7..f98f4e1dba0 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -49,7 +49,7 @@ static int __init mpf_checksum(unsigned char *mp, int len)
return sum & 0xFF;
}
-static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
+static void __init MP_processor_info(struct mpc_config_processor *m)
{
int apicid;
char *bootup_cpu = "";
@@ -486,7 +486,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
}
-static void construct_ioapic_table(int mpc_default_type)
+static void __init construct_ioapic_table(int mpc_default_type)
{
struct mpc_config_ioapic ioapic;
struct mpc_config_bus bus;
@@ -531,7 +531,7 @@ static void construct_ioapic_table(int mpc_default_type)
construct_default_ioirq_mptable(mpc_default_type);
}
#else
-static inline void construct_ioapic_table(int mpc_default_type) { }
+static inline void __init construct_ioapic_table(int mpc_default_type) { }
#endif
static inline void __init construct_default_ISA_mptable(int mpc_default_type)
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 9fd80955244..2e2af5d1819 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -72,21 +72,28 @@ static ssize_t msr_read(struct file *file, char __user *buf,
u32 data[2];
u32 reg = *ppos;
int cpu = iminor(file->f_path.dentry->d_inode);
- int err;
+ int err = 0;
+ ssize_t bytes = 0;
if (count % 8)
return -EINVAL; /* Invalid chunk size */
for (; count; count -= 8) {
err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]);
- if (err)
- return -EIO;
- if (copy_to_user(tmp, &data, 8))
- return -EFAULT;
+ if (err) {
+ if (err == -EFAULT) /* Fix idiotic error code */
+ err = -EIO;
+ break;
+ }
+ if (copy_to_user(tmp, &data, 8)) {
+ err = -EFAULT;
+ break;
+ }
tmp += 2;
+ bytes += 8;
}
- return ((char __user *)tmp) - buf;
+ return bytes ? bytes : err;
}
static ssize_t msr_write(struct file *file, const char __user *buf,
@@ -96,21 +103,28 @@ static ssize_t msr_write(struct file *file, const char __user *buf,
u32 data[2];
u32 reg = *ppos;
int cpu = iminor(file->f_path.dentry->d_inode);
- int err;
+ int err = 0;
+ ssize_t bytes = 0;
if (count % 8)
return -EINVAL; /* Invalid chunk size */
for (; count; count -= 8) {
- if (copy_from_user(&data, tmp, 8))
- return -EFAULT;
+ if (copy_from_user(&data, tmp, 8)) {
+ err = -EFAULT;
+ break;
+ }
err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]);
- if (err)
- return -EIO;
+ if (err) {
+ if (err == -EFAULT) /* Fix idiotic error code */
+ err = -EIO;
+ break;
+ }
tmp += 2;
+ bytes += 8;
}
- return ((char __user *)tmp) - buf;
+ return bytes ? bytes : err;
}
static int msr_open(struct inode *inode, struct file *file)
@@ -131,7 +145,7 @@ static int msr_open(struct inode *inode, struct file *file)
ret = -EIO; /* MSR not supported */
out:
unlock_kernel();
- return 0;
+ return ret;
}
/*
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index ac6d51222e7..2c97f07f1c2 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -114,6 +114,23 @@ static __init void nmi_cpu_busy(void *data)
}
#endif
+static void report_broken_nmi(int cpu, int *prev_nmi_count)
+{
+ printk(KERN_CONT "\n");
+
+ printk(KERN_WARNING
+ "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
+ cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
+
+ printk(KERN_WARNING
+ "Please report this to bugzilla.kernel.org,\n");
+ printk(KERN_WARNING
+ "and attach the output of the 'dmesg' command.\n");
+
+ per_cpu(wd_enabled, cpu) = 0;
+ atomic_dec(&nmi_active);
+}
+
int __init check_nmi_watchdog(void)
{
unsigned int *prev_nmi_count;
@@ -141,15 +158,8 @@ int __init check_nmi_watchdog(void)
for_each_online_cpu(cpu) {
if (!per_cpu(wd_enabled, cpu))
continue;
- if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
- printk(KERN_WARNING "WARNING: CPU#%d: NMI "
- "appears to be stuck (%d->%d)!\n",
- cpu,
- prev_nmi_count[cpu],
- get_nmi_count(cpu));
- per_cpu(wd_enabled, cpu) = 0;
- atomic_dec(&nmi_active);
- }
+ if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
+ report_broken_nmi(cpu, prev_nmi_count);
}
endflag = 1;
if (!atomic_read(&nmi_active)) {
@@ -289,6 +299,15 @@ void acpi_nmi_disable(void)
on_each_cpu(__acpi_nmi_disable, NULL, 1);
}
+/*
+ * This function is called as soon the LAPIC NMI watchdog driver has everything
+ * in place and it's ready to check if the NMIs belong to the NMI watchdog
+ */
+void cpu_nmi_set_wd_enabled(void)
+{
+ __get_cpu_var(wd_enabled) = 1;
+}
+
void setup_apic_nmi_watchdog(void *unused)
{
if (__get_cpu_var(wd_enabled))
@@ -301,8 +320,6 @@ void setup_apic_nmi_watchdog(void *unused)
switch (nmi_watchdog) {
case NMI_LOCAL_APIC:
- /* enable it before to avoid race with handler */
- __get_cpu_var(wd_enabled) = 1;
if (lapic_watchdog_init(nmi_hz) < 0) {
__get_cpu_var(wd_enabled) = 0;
return;
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index 2434467ddf7..4caff39078e 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -73,7 +73,7 @@ static void __init smp_dump_qct(void)
}
-void __init numaq_tsc_disable(void)
+void __cpuinit numaq_tsc_disable(void)
{
if (!found_numaq)
return;
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
index 3e667227480..7a13fac63a1 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/kernel/olpc.c
@@ -190,12 +190,12 @@ EXPORT_SYMBOL_GPL(olpc_ec_cmd);
static void __init platform_detect(void)
{
size_t propsize;
- u32 rev;
+ __be32 rev;
if (ofw("getprop", 4, 1, NULL, "board-revision-int", &rev, 4,
&propsize) || propsize != 4) {
printk(KERN_ERR "ofw: getprop call failed!\n");
- rev = 0;
+ rev = cpu_to_be32(0);
}
olpc_platform_info.boardrev = be32_to_cpu(rev);
}
@@ -203,7 +203,7 @@ static void __init platform_detect(void)
static void __init platform_detect(void)
{
/* stopgap until OFW support is added to the kernel */
- olpc_platform_info.boardrev = be32_to_cpu(0xc2);
+ olpc_platform_info.boardrev = 0xc2;
}
#endif
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 5744789a78f..6b0bb73998d 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -330,6 +330,7 @@ struct pv_cpu_ops pv_cpu_ops = {
#endif
.wbinvd = native_wbinvd,
.read_msr = native_read_msr_safe,
+ .read_msr_amd = native_read_msr_amd_safe,
.write_msr = native_write_msr_safe,
.read_tsc = native_read_tsc,
.read_pmc = native_read_pmc,
@@ -469,7 +470,7 @@ struct pv_lock_ops pv_lock_ops = {
.spin_unlock = __ticket_spin_unlock,
#endif
};
-EXPORT_SYMBOL_GPL(pv_lock_ops);
+EXPORT_SYMBOL(pv_lock_ops);
EXPORT_SYMBOL_GPL(pv_time_ops);
EXPORT_SYMBOL (pv_cpu_ops);
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 58262218781..9fe644f4861 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -23,7 +23,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
start = start_##ops##_##x; \
end = end_##ops##_##x; \
goto patch_site
- switch(type) {
+ switch (type) {
PATCH_SITE(pv_irq_ops, irq_disable);
PATCH_SITE(pv_irq_ops, irq_enable);
PATCH_SITE(pv_irq_ops, restore_fl);
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 02d19328525..080d1d27f37 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -261,7 +261,7 @@ static void iommu_range_reserve(struct iommu_table *tbl,
badbit, tbl, start_addr, npages);
}
- set_bit_string(tbl->it_map, index, npages);
+ iommu_area_reserve(tbl->it_map, index, npages);
spin_unlock_irqrestore(&tbl->it_lock, flags);
}
@@ -343,9 +343,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
/* were we called with bad_dma_address? */
badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE);
if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) {
- printk(KERN_ERR "Calgary: driver tried unmapping bad DMA "
+ WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA "
"address 0x%Lx\n", dma_addr);
- WARN_ON(1);
return;
}
@@ -492,6 +491,8 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size,
npages = size >> PAGE_SHIFT;
order = get_order(size);
+ flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
+
/* alloc enough pages (and possibly more) */
ret = (void *)__get_free_pages(flag, order);
if (!ret)
@@ -511,8 +512,22 @@ error:
return ret;
}
+static void calgary_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle)
+{
+ unsigned int npages;
+ struct iommu_table *tbl = find_iommu_table(dev);
+
+ size = PAGE_ALIGN(size);
+ npages = size >> PAGE_SHIFT;
+
+ iommu_free(tbl, dma_handle, npages);
+ free_pages((unsigned long)vaddr, get_order(size));
+}
+
static struct dma_mapping_ops calgary_dma_ops = {
.alloc_coherent = calgary_alloc_coherent,
+ .free_coherent = calgary_free_coherent,
.map_single = calgary_map_single,
.unmap_single = calgary_unmap_single,
.map_sg = calgary_map_sg,
@@ -1269,13 +1284,15 @@ static inline int __init determine_tce_table_size(u64 ram)
static int __init build_detail_arrays(void)
{
unsigned long ptr;
- int i, scal_detail_size, rio_detail_size;
+ unsigned numnodes, i;
+ int scal_detail_size, rio_detail_size;
- if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){
+ numnodes = rio_table_hdr->num_scal_dev;
+ if (numnodes > MAX_NUMNODES){
printk(KERN_WARNING
"Calgary: MAX_NUMNODES too low! Defined as %d, "
"but system has %d nodes.\n",
- MAX_NUMNODES, rio_table_hdr->num_scal_dev);
+ MAX_NUMNODES, numnodes);
return -ENODEV;
}
@@ -1296,8 +1313,7 @@ static int __init build_detail_arrays(void)
}
ptr = ((unsigned long)rio_table_hdr) + 3;
- for (i = 0; i < rio_table_hdr->num_scal_dev;
- i++, ptr += scal_detail_size)
+ for (i = 0; i < numnodes; i++, ptr += scal_detail_size)
scal_devs[i] = (struct scal_detail *)ptr;
for (i = 0; i < rio_table_hdr->num_rio_dev;
@@ -1350,7 +1366,7 @@ static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
* Function for kdump case. Get the tce tables from first kernel
* by reading the contents of the base adress register of calgary iommu
*/
-static void get_tce_space_from_tar(void)
+static void __init get_tce_space_from_tar(void)
{
int bus;
void __iomem *target;
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 87d4d6964ec..0a3824e837b 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -41,11 +41,12 @@ EXPORT_SYMBOL(bad_dma_address);
/* Dummy device used for NULL arguments (normally ISA). Better would
be probably a smaller DMA mask, but this is bug-to-bug compatible
to older i386. */
-struct device fallback_dev = {
+struct device x86_dma_fallback_dev = {
.bus_id = "fallback device",
.coherent_dma_mask = DMA_32BIT_MASK,
- .dma_mask = &fallback_dev.coherent_dma_mask,
+ .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask,
};
+EXPORT_SYMBOL(x86_dma_fallback_dev);
int dma_set_mask(struct device *dev, u64 mask)
{
@@ -82,7 +83,7 @@ void __init dma32_reserve_bootmem(void)
* using 512M as goal
*/
align = 64ULL<<20;
- size = round_up(dma32_bootmem_size, align);
+ size = roundup(dma32_bootmem_size, align);
dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
512ULL<<20);
if (dma32_bootmem_ptr)
@@ -133,6 +134,37 @@ unsigned long iommu_num_pages(unsigned long addr, unsigned long len)
EXPORT_SYMBOL(iommu_num_pages);
#endif
+void *dma_generic_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_addr, gfp_t flag)
+{
+ unsigned long dma_mask;
+ struct page *page;
+ dma_addr_t addr;
+
+ dma_mask = dma_alloc_coherent_mask(dev, flag);
+
+ flag |= __GFP_ZERO;
+again:
+ page = alloc_pages_node(dev_to_node(dev), flag, get_order(size));
+ if (!page)
+ return NULL;
+
+ addr = page_to_phys(page);
+ if (!is_buffer_dma_capable(dma_mask, addr, size)) {
+ __free_pages(page, get_order(size));
+
+ if (dma_mask < DMA_32BIT_MASK && !(flag & GFP_DMA)) {
+ flag = (flag & ~GFP_DMA32) | GFP_DMA;
+ goto again;
+ }
+
+ return NULL;
+ }
+
+ *dma_addr = addr;
+ return page_address(page);
+}
+
/*
* See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
* documentation.
@@ -241,147 +273,6 @@ int dma_supported(struct device *dev, u64 mask)
}
EXPORT_SYMBOL(dma_supported);
-/* Allocate DMA memory on node near device */
-static noinline struct page *
-dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
-{
- int node;
-
- node = dev_to_node(dev);
-
- return alloc_pages_node(node, gfp, order);
-}
-
-/*
- * Allocate memory for a coherent mapping.
- */
-void *
-dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t gfp)
-{
- struct dma_mapping_ops *ops = get_dma_ops(dev);
- void *memory = NULL;
- struct page *page;
- unsigned long dma_mask = 0;
- dma_addr_t bus;
- int noretry = 0;
-
- /* ignore region specifiers */
- gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
-
- if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
- return memory;
-
- if (!dev) {
- dev = &fallback_dev;
- gfp |= GFP_DMA;
- }
- dma_mask = dev->coherent_dma_mask;
- if (dma_mask == 0)
- dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK;
-
- /* Device not DMA able */
- if (dev->dma_mask == NULL)
- return NULL;
-
- /* Don't invoke OOM killer or retry in lower 16MB DMA zone */
- if (gfp & __GFP_DMA)
- noretry = 1;
-
-#ifdef CONFIG_X86_64
- /* Why <=? Even when the mask is smaller than 4GB it is often
- larger than 16MB and in this case we have a chance of
- finding fitting memory in the next higher zone first. If
- not retry with true GFP_DMA. -AK */
- if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
- gfp |= GFP_DMA32;
- if (dma_mask < DMA_32BIT_MASK)
- noretry = 1;
- }
-#endif
-
- again:
- page = dma_alloc_pages(dev,
- noretry ? gfp | __GFP_NORETRY : gfp, get_order(size));
- if (page == NULL)
- return NULL;
-
- {
- int high, mmu;
- bus = page_to_phys(page);
- memory = page_address(page);
- high = (bus + size) >= dma_mask;
- mmu = high;
- if (force_iommu && !(gfp & GFP_DMA))
- mmu = 1;
- else if (high) {
- free_pages((unsigned long)memory,
- get_order(size));
-
- /* Don't use the 16MB ZONE_DMA unless absolutely
- needed. It's better to use remapping first. */
- if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
- gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
- goto again;
- }
-
- /* Let low level make its own zone decisions */
- gfp &= ~(GFP_DMA32|GFP_DMA);
-
- if (ops->alloc_coherent)
- return ops->alloc_coherent(dev, size,
- dma_handle, gfp);
- return NULL;
- }
-
- memset(memory, 0, size);
- if (!mmu) {
- *dma_handle = bus;
- return memory;
- }
- }
-
- if (ops->alloc_coherent) {
- free_pages((unsigned long)memory, get_order(size));
- gfp &= ~(GFP_DMA|GFP_DMA32);
- return ops->alloc_coherent(dev, size, dma_handle, gfp);
- }
-
- if (ops->map_simple) {
- *dma_handle = ops->map_simple(dev, virt_to_phys(memory),
- size,
- PCI_DMA_BIDIRECTIONAL);
- if (*dma_handle != bad_dma_address)
- return memory;
- }
-
- if (panic_on_overflow)
- panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",
- (unsigned long)size);
- free_pages((unsigned long)memory, get_order(size));
- return NULL;
-}
-EXPORT_SYMBOL(dma_alloc_coherent);
-
-/*
- * Unmap coherent memory.
- * The caller must ensure that the device has finished accessing the mapping.
- */
-void dma_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t bus)
-{
- struct dma_mapping_ops *ops = get_dma_ops(dev);
-
- int order = get_order(size);
- WARN_ON(irqs_disabled()); /* for portability */
- if (dma_release_from_coherent(dev, order, vaddr))
- return;
- if (ops->unmap_single)
- ops->unmap_single(dev, bus, size, 0);
- free_pages((unsigned long)vaddr, order);
-}
-EXPORT_SYMBOL(dma_free_coherent);
-
static int __init pci_iommu_init(void)
{
calgary_iommu_init();
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 49285f8fd4d..145f1c83369 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -27,8 +27,8 @@
#include <linux/scatterlist.h>
#include <linux/iommu-helper.h>
#include <linux/sysdev.h>
+#include <linux/io.h>
#include <asm/atomic.h>
-#include <asm/io.h>
#include <asm/mtrr.h>
#include <asm/pgtable.h>
#include <asm/proto.h>
@@ -80,9 +80,10 @@ AGPEXTERN int agp_memory_reserved;
AGPEXTERN __u32 *agp_gatt_table;
static unsigned long next_bit; /* protected by iommu_bitmap_lock */
-static int need_flush; /* global flush state. set for each gart wrap */
+static bool need_flush; /* global flush state. set for each gart wrap */
-static unsigned long alloc_iommu(struct device *dev, int size)
+static unsigned long alloc_iommu(struct device *dev, int size,
+ unsigned long align_mask)
{
unsigned long offset, flags;
unsigned long boundary_size;
@@ -90,26 +91,27 @@ static unsigned long alloc_iommu(struct device *dev, int size)
base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev),
PAGE_SIZE) >> PAGE_SHIFT;
- boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+ boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1,
PAGE_SIZE) >> PAGE_SHIFT;
spin_lock_irqsave(&iommu_bitmap_lock, flags);
offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit,
- size, base_index, boundary_size, 0);
+ size, base_index, boundary_size, align_mask);
if (offset == -1) {
- need_flush = 1;
+ need_flush = true;
offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0,
- size, base_index, boundary_size, 0);
+ size, base_index, boundary_size,
+ align_mask);
}
if (offset != -1) {
next_bit = offset+size;
if (next_bit >= iommu_pages) {
next_bit = 0;
- need_flush = 1;
+ need_flush = true;
}
}
if (iommu_fullflush)
- need_flush = 1;
+ need_flush = true;
spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
return offset;
@@ -134,7 +136,7 @@ static void flush_gart(void)
spin_lock_irqsave(&iommu_bitmap_lock, flags);
if (need_flush) {
k8_flush_garts();
- need_flush = 0;
+ need_flush = false;
}
spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
}
@@ -173,7 +175,8 @@ static void dump_leak(void)
iommu_leak_pages);
for (i = 0; i < iommu_leak_pages; i += 2) {
printk(KERN_DEBUG "%lu: ", iommu_pages-i);
- printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], 0);
+ printk_address((unsigned long) iommu_leak_tab[iommu_pages-i],
+ 0);
printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' ');
}
printk(KERN_DEBUG "\n");
@@ -212,34 +215,24 @@ static void iommu_full(struct device *dev, size_t size, int dir)
static inline int
need_iommu(struct device *dev, unsigned long addr, size_t size)
{
- u64 mask = *dev->dma_mask;
- int high = addr + size > mask;
- int mmu = high;
-
- if (force_iommu)
- mmu = 1;
-
- return mmu;
+ return force_iommu ||
+ !is_buffer_dma_capable(*dev->dma_mask, addr, size);
}
static inline int
nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
{
- u64 mask = *dev->dma_mask;
- int high = addr + size > mask;
- int mmu = high;
-
- return mmu;
+ return !is_buffer_dma_capable(*dev->dma_mask, addr, size);
}
/* Map a single continuous physical area into the IOMMU.
* Caller needs to check if the iommu is needed and flush.
*/
static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
- size_t size, int dir)
+ size_t size, int dir, unsigned long align_mask)
{
unsigned long npages = iommu_num_pages(phys_mem, size);
- unsigned long iommu_page = alloc_iommu(dev, npages);
+ unsigned long iommu_page = alloc_iommu(dev, npages, align_mask);
int i;
if (iommu_page == -1) {
@@ -259,16 +252,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
}
-static dma_addr_t
-gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir)
-{
- dma_addr_t map = dma_map_area(dev, paddr, size, dir);
-
- flush_gart();
-
- return map;
-}
-
/* Map a single area into the IOMMU */
static dma_addr_t
gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir)
@@ -276,12 +259,13 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir)
unsigned long bus;
if (!dev)
- dev = &fallback_dev;
+ dev = &x86_dma_fallback_dev;
if (!need_iommu(dev, paddr, size))
return paddr;
- bus = gart_map_simple(dev, paddr, size, dir);
+ bus = dma_map_area(dev, paddr, size, dir, 0);
+ flush_gart();
return bus;
}
@@ -340,7 +324,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
unsigned long addr = sg_phys(s);
if (nonforced_iommu(dev, addr, s->length)) {
- addr = dma_map_area(dev, addr, s->length, dir);
+ addr = dma_map_area(dev, addr, s->length, dir, 0);
if (addr == bad_dma_address) {
if (i > 0)
gart_unmap_sg(dev, sg, i, dir);
@@ -362,7 +346,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start,
int nelems, struct scatterlist *sout,
unsigned long pages)
{
- unsigned long iommu_start = alloc_iommu(dev, pages);
+ unsigned long iommu_start = alloc_iommu(dev, pages, 0);
unsigned long iommu_page = iommu_start;
struct scatterlist *s;
int i;
@@ -427,7 +411,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
return 0;
if (!dev)
- dev = &fallback_dev;
+ dev = &x86_dma_fallback_dev;
out = 0;
start = 0;
@@ -499,6 +483,46 @@ error:
return 0;
}
+/* allocate and map a coherent mapping */
+static void *
+gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
+ gfp_t flag)
+{
+ dma_addr_t paddr;
+ unsigned long align_mask;
+ struct page *page;
+
+ if (force_iommu && !(flag & GFP_DMA)) {
+ flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
+ page = alloc_pages(flag | __GFP_ZERO, get_order(size));
+ if (!page)
+ return NULL;
+
+ align_mask = (1UL << get_order(size)) - 1;
+ paddr = dma_map_area(dev, page_to_phys(page), size,
+ DMA_BIDIRECTIONAL, align_mask);
+
+ flush_gart();
+ if (paddr != bad_dma_address) {
+ *dma_addr = paddr;
+ return page_address(page);
+ }
+ __free_pages(page, get_order(size));
+ } else
+ return dma_generic_alloc_coherent(dev, size, dma_addr, flag);
+
+ return NULL;
+}
+
+/* free a coherent mapping */
+static void
+gart_free_coherent(struct device *dev, size_t size, void *vaddr,
+ dma_addr_t dma_addr)
+{
+ gart_unmap_single(dev, dma_addr, size, DMA_BIDIRECTIONAL);
+ free_pages((unsigned long)vaddr, get_order(size));
+}
+
static int no_agp;
static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
@@ -626,7 +650,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
struct pci_dev *dev;
void *gatt;
int i, error;
- unsigned long start_pfn, end_pfn;
printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
aper_size = aper_base = info->aper_size = 0;
@@ -650,13 +673,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
info->aper_size = aper_size >> 20;
gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
- gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size));
+ gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(gatt_size));
if (!gatt)
panic("Cannot allocate GATT table");
if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT))
panic("Could not set GART PTEs to uncacheable pages");
- memset(gatt, 0, gatt_size);
agp_gatt_table = gatt;
enable_gart_translations();
@@ -665,19 +688,14 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
if (!error)
error = sysdev_register(&device_gart);
if (error)
- panic("Could not register gart_sysdev -- would corrupt data on next suspend");
+ panic("Could not register gart_sysdev -- "
+ "would corrupt data on next suspend");
flush_gart();
printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
aper_base, aper_size>>10);
- /* need to map that range */
- end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
- if (end_pfn > max_low_pfn_mapped) {
- start_pfn = (aper_base>>PAGE_SHIFT);
- init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
- }
return 0;
nommu:
@@ -687,20 +705,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
return -1;
}
-extern int agp_amd64_init(void);
-
static struct dma_mapping_ops gart_dma_ops = {
.map_single = gart_map_single,
- .map_simple = gart_map_simple,
.unmap_single = gart_unmap_single,
- .sync_single_for_cpu = NULL,
- .sync_single_for_device = NULL,
- .sync_single_range_for_cpu = NULL,
- .sync_single_range_for_device = NULL,
- .sync_sg_for_cpu = NULL,
- .sync_sg_for_device = NULL,
.map_sg = gart_map_sg,
.unmap_sg = gart_unmap_sg,
+ .alloc_coherent = gart_alloc_coherent,
+ .free_coherent = gart_free_coherent,
};
void gart_iommu_shutdown(void)
@@ -727,7 +738,8 @@ void __init gart_iommu_init(void)
{
struct agp_kern_info info;
unsigned long iommu_start;
- unsigned long aper_size;
+ unsigned long aper_base, aper_size;
+ unsigned long start_pfn, end_pfn;
unsigned long scratch;
long i;
@@ -759,30 +771,35 @@ void __init gart_iommu_init(void)
(no_agp && init_k8_gatt(&info) < 0)) {
if (max_pfn > MAX_DMA32_PFN) {
printk(KERN_WARNING "More than 4GB of memory "
- "but GART IOMMU not available.\n"
- KERN_WARNING "falling back to iommu=soft.\n");
+ "but GART IOMMU not available.\n");
+ printk(KERN_WARNING "falling back to iommu=soft.\n");
}
return;
}
+ /* need to map that range */
+ aper_size = info.aper_size << 20;
+ aper_base = info.aper_base;
+ end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
+ if (end_pfn > max_low_pfn_mapped) {
+ start_pfn = (aper_base>>PAGE_SHIFT);
+ init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
+ }
+
printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
- aper_size = info.aper_size * 1024 * 1024;
iommu_size = check_iommu_size(info.aper_base, aper_size);
iommu_pages = iommu_size >> PAGE_SHIFT;
- iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL,
+ iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
get_order(iommu_pages/8));
if (!iommu_gart_bitmap)
panic("Cannot allocate iommu bitmap\n");
- memset(iommu_gart_bitmap, 0, iommu_pages/8);
#ifdef CONFIG_IOMMU_LEAK
if (leak_trace) {
- iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL,
+ iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
get_order(iommu_pages*sizeof(void *)));
- if (iommu_leak_tab)
- memset(iommu_leak_tab, 0, iommu_pages * 8);
- else
+ if (!iommu_leak_tab)
printk(KERN_DEBUG
"PCI-DMA: Cannot allocate leak trace area\n");
}
@@ -792,7 +809,7 @@ void __init gart_iommu_init(void)
* Out of IOMMU space handling.
* Reserve some invalid pages at the beginning of the GART.
*/
- set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
+ iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
agp_memory_reserved = iommu_size;
printk(KERN_INFO
@@ -850,7 +867,8 @@ void __init gart_parse_options(char *p)
if (!strncmp(p, "leak", 4)) {
leak_trace = 1;
p += 4;
- if (*p == '=') ++p;
+ if (*p == '=')
+ ++p;
if (isdigit(*p) && get_option(&p, &arg))
iommu_leak_pages = arg;
}
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 3f91f71cdc3..c70ab5a5d4c 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -14,7 +14,7 @@
static int
check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
{
- if (hwdev && bus + size > *hwdev->dma_mask) {
+ if (hwdev && !is_buffer_dma_capable(*hwdev->dma_mask, bus, size)) {
if (*hwdev->dma_mask >= DMA_32BIT_MASK)
printk(KERN_ERR
"nommu_%s: overflow %Lx+%zu of device mask %Lx\n",
@@ -72,7 +72,15 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
return nents;
}
+static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
+ dma_addr_t dma_addr)
+{
+ free_pages((unsigned long)vaddr, get_order(size));
+}
+
struct dma_mapping_ops nommu_dma_ops = {
+ .alloc_coherent = dma_generic_alloc_coherent,
+ .free_coherent = nommu_free_coherent,
.map_single = nommu_map_single,
.map_sg = nommu_map_sg,
.is_phys = 1,
diff --git a/arch/x86/kernel/pcspeaker.c b/arch/x86/kernel/pcspeaker.c
index bc1f2d3ea27..a311ffcaad1 100644
--- a/arch/x86/kernel/pcspeaker.c
+++ b/arch/x86/kernel/pcspeaker.c
@@ -1,20 +1,13 @@
#include <linux/platform_device.h>
-#include <linux/errno.h>
+#include <linux/err.h>
#include <linux/init.h>
static __init int add_pcspkr(void)
{
struct platform_device *pd;
- int ret;
- pd = platform_device_alloc("pcspkr", -1);
- if (!pd)
- return -ENOMEM;
+ pd = platform_device_register_simple("pcspkr", -1, NULL, 0);
- ret = platform_device_add(pd);
- if (ret)
- platform_device_put(pd);
-
- return ret;
+ return IS_ERR(pd) ? PTR_ERR(pd) : 0;
}
device_initcall(add_pcspkr);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 7fc4d5b0a6a..ec7a2ba9bce 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -185,7 +185,8 @@ static void mwait_idle(void)
static void poll_idle(void)
{
local_irq_enable();
- cpu_relax();
+ while (!need_resched())
+ cpu_relax();
}
/*
@@ -246,6 +247,14 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
return 1;
}
+static cpumask_t c1e_mask = CPU_MASK_NONE;
+static int c1e_detected;
+
+void c1e_remove_cpu(int cpu)
+{
+ cpu_clear(cpu, c1e_mask);
+}
+
/*
* C1E aware idle routine. We check for C1E active in the interrupt
* pending message MSR. If we detect C1E, then we handle it the same
@@ -253,9 +262,6 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
*/
static void c1e_idle(void)
{
- static cpumask_t c1e_mask = CPU_MASK_NONE;
- static int c1e_detected;
-
if (need_resched())
return;
@@ -265,8 +271,10 @@ static void c1e_idle(void)
rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
if (lo & K8_INTP_C1E_ACTIVE_MASK) {
c1e_detected = 1;
- mark_tsc_unstable("TSC halt in C1E");
- printk(KERN_INFO "System has C1E enabled\n");
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+ mark_tsc_unstable("TSC halt in AMD C1E");
+ printk(KERN_INFO "System has AMD C1E enabled\n");
+ set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E);
}
}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 7b6e44a7c62..205188db962 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -37,6 +37,7 @@
#include <linux/tick.h>
#include <linux/percpu.h>
#include <linux/prctl.h>
+#include <linux/dmi.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -55,6 +56,7 @@
#include <asm/tlbflush.h>
#include <asm/cpu.h>
#include <asm/kdebug.h>
+#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/smp.h>
@@ -90,6 +92,7 @@ static void cpu_exit_clear(void)
cpu_clear(cpu, cpu_callin_map);
numa_remove_cpu(cpu);
+ c1e_remove_cpu(cpu);
}
/* We don't actually take CPU down, just spin without interrupts. */
@@ -97,7 +100,6 @@ static inline void play_dead(void)
{
/* This must be done before dead CPU ack */
cpu_exit_clear();
- wbinvd();
mb();
/* Ack it */
__get_cpu_var(cpu_state) = CPU_DEAD;
@@ -106,8 +108,8 @@ static inline void play_dead(void)
* With physical CPU hotplug, we should halt the cpu
*/
local_irq_disable();
- while (1)
- halt();
+ /* mask all interrupts, flush any and all caches, and halt */
+ wbinvd_halt();
}
#else
static inline void play_dead(void)
@@ -162,6 +164,7 @@ void __show_registers(struct pt_regs *regs, int all)
unsigned long d0, d1, d2, d3, d6, d7;
unsigned long sp;
unsigned short ss, gs;
+ const char *board;
if (user_mode_vm(regs)) {
sp = regs->sp;
@@ -174,11 +177,15 @@ void __show_registers(struct pt_regs *regs, int all)
}
printk("\n");
- printk("Pid: %d, comm: %s %s (%s %.*s)\n",
+
+ board = dmi_get_system_info(DMI_PRODUCT_NAME);
+ if (!board)
+ board = "";
+ printk("Pid: %d, comm: %s %s (%s %.*s) %s\n",
task_pid_nr(current), current->comm,
print_tainted(), init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
- init_utsname()->version);
+ init_utsname()->version, board);
printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
(u16)regs->cs, regs->ip, regs->flags,
@@ -278,6 +285,14 @@ void exit_thread(void)
tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
put_cpu();
}
+#ifdef CONFIG_X86_DS
+ /* Free any DS contexts that have not been properly released. */
+ if (unlikely(current->thread.ds_ctx)) {
+ /* we clear debugctl to make sure DS is not used. */
+ update_debugctlmsr(0);
+ ds_free(current->thread.ds_ctx);
+ }
+#endif /* CONFIG_X86_DS */
}
void flush_thread(void)
@@ -439,6 +454,35 @@ int set_tsc_mode(unsigned int val)
return 0;
}
+#ifdef CONFIG_X86_DS
+static int update_debugctl(struct thread_struct *prev,
+ struct thread_struct *next, unsigned long debugctl)
+{
+ unsigned long ds_prev = 0;
+ unsigned long ds_next = 0;
+
+ if (prev->ds_ctx)
+ ds_prev = (unsigned long)prev->ds_ctx->ds;
+ if (next->ds_ctx)
+ ds_next = (unsigned long)next->ds_ctx->ds;
+
+ if (ds_next != ds_prev) {
+ /* we clear debugctl to make sure DS
+ * is not in use when we change it */
+ debugctl = 0;
+ update_debugctlmsr(0);
+ wrmsr(MSR_IA32_DS_AREA, ds_next, 0);
+ }
+ return debugctl;
+}
+#else
+static int update_debugctl(struct thread_struct *prev,
+ struct thread_struct *next, unsigned long debugctl)
+{
+ return debugctl;
+}
+#endif /* CONFIG_X86_DS */
+
static noinline void
__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss)
@@ -449,14 +493,7 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
prev = &prev_p->thread;
next = &next_p->thread;
- debugctl = prev->debugctlmsr;
- if (next->ds_area_msr != prev->ds_area_msr) {
- /* we clear debugctl to make sure DS
- * is not in use when we change it */
- debugctl = 0;
- update_debugctlmsr(0);
- wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
- }
+ debugctl = update_debugctl(prev, next, prev->debugctlmsr);
if (next->debugctlmsr != debugctl)
update_debugctlmsr(next->debugctlmsr);
@@ -480,13 +517,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
hard_enable_TSC();
}
-#ifdef X86_BTS
+#ifdef CONFIG_X86_PTRACE_BTS
if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
-#endif
+#endif /* CONFIG_X86_PTRACE_BTS */
if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 87d7dfdcf46..2a8ccb9238b 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -37,11 +37,11 @@
#include <linux/kdebug.h>
#include <linux/tick.h>
#include <linux/prctl.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
-#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/system.h>
-#include <asm/io.h>
#include <asm/processor.h>
#include <asm/i387.h>
#include <asm/mmu_context.h>
@@ -89,19 +89,20 @@ void exit_idle(void)
#ifdef CONFIG_HOTPLUG_CPU
DECLARE_PER_CPU(int, cpu_state);
-#include <asm/nmi.h>
+#include <linux/nmi.h>
/* We halt the CPU with physical CPU hotplug */
static inline void play_dead(void)
{
idle_task_exit();
- wbinvd();
+ c1e_remove_cpu(raw_smp_processor_id());
+
mb();
/* Ack it */
__get_cpu_var(cpu_state) = CPU_DEAD;
local_irq_disable();
- while (1)
- halt();
+ /* mask all interrupts, flush any and all caches, and halt */
+ wbinvd_halt();
}
#else
static inline void play_dead(void)
@@ -153,7 +154,7 @@ void cpu_idle(void)
}
/* Prints also some state that isn't saved in the pt_regs */
-void __show_regs(struct pt_regs * regs)
+void __show_regs(struct pt_regs *regs)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
unsigned long d0, d1, d2, d3, d6, d7;
@@ -162,59 +163,61 @@ void __show_regs(struct pt_regs * regs)
printk("\n");
print_modules();
- printk("Pid: %d, comm: %.20s %s %s %.*s\n",
+ printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
current->pid, current->comm, print_tainted(),
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
- printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
+ printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
printk_address(regs->ip, 1);
- printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp,
- regs->flags);
- printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
+ printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
+ regs->sp, regs->flags);
+ printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
regs->ax, regs->bx, regs->cx);
- printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
+ printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
regs->dx, regs->si, regs->di);
- printk("RBP: %016lx R08: %016lx R09: %016lx\n",
+ printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
regs->bp, regs->r8, regs->r9);
- printk("R10: %016lx R11: %016lx R12: %016lx\n",
- regs->r10, regs->r11, regs->r12);
- printk("R13: %016lx R14: %016lx R15: %016lx\n",
- regs->r13, regs->r14, regs->r15);
-
- asm("movl %%ds,%0" : "=r" (ds));
- asm("movl %%cs,%0" : "=r" (cs));
- asm("movl %%es,%0" : "=r" (es));
+ printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
+ regs->r10, regs->r11, regs->r12);
+ printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
+ regs->r13, regs->r14, regs->r15);
+
+ asm("movl %%ds,%0" : "=r" (ds));
+ asm("movl %%cs,%0" : "=r" (cs));
+ asm("movl %%es,%0" : "=r" (es));
asm("movl %%fs,%0" : "=r" (fsindex));
asm("movl %%gs,%0" : "=r" (gsindex));
rdmsrl(MSR_FS_BASE, fs);
- rdmsrl(MSR_GS_BASE, gs);
- rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
+ rdmsrl(MSR_GS_BASE, gs);
+ rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
cr0 = read_cr0();
cr2 = read_cr2();
cr3 = read_cr3();
cr4 = read_cr4();
- printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
- fs,fsindex,gs,gsindex,shadowgs);
- printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
- printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
+ printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
+ fs, fsindex, gs, gsindex, shadowgs);
+ printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
+ es, cr0);
+ printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
+ cr4);
get_debugreg(d0, 0);
get_debugreg(d1, 1);
get_debugreg(d2, 2);
- printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
+ printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
get_debugreg(d3, 3);
get_debugreg(d6, 6);
get_debugreg(d7, 7);
- printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
+ printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
}
void show_regs(struct pt_regs *regs)
{
- printk("CPU %d:", smp_processor_id());
+ printk(KERN_INFO "CPU %d:", smp_processor_id());
__show_regs(regs);
show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
}
@@ -240,6 +243,14 @@ void exit_thread(void)
t->io_bitmap_max = 0;
put_cpu();
}
+#ifdef CONFIG_X86_DS
+ /* Free any DS contexts that have not been properly released. */
+ if (unlikely(t->ds_ctx)) {
+ /* we clear debugctl to make sure DS is not used. */
+ update_debugctlmsr(0);
+ ds_free(t->ds_ctx);
+ }
+#endif /* CONFIG_X86_DS */
}
void flush_thread(void)
@@ -315,10 +326,10 @@ void prepare_to_copy(struct task_struct *tsk)
int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
unsigned long unused,
- struct task_struct * p, struct pt_regs * regs)
+ struct task_struct *p, struct pt_regs *regs)
{
int err;
- struct pt_regs * childregs;
+ struct pt_regs *childregs;
struct task_struct *me = current;
childregs = ((struct pt_regs *)
@@ -363,10 +374,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
if (test_thread_flag(TIF_IA32))
err = do_set_thread_area(p, -1,
(struct user_desc __user *)childregs->si, 0);
- else
-#endif
- err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
- if (err)
+ else
+#endif
+ err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
+ if (err)
goto out;
}
err = 0;
@@ -473,13 +484,27 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
next = &next_p->thread;
debugctl = prev->debugctlmsr;
- if (next->ds_area_msr != prev->ds_area_msr) {
- /* we clear debugctl to make sure DS
- * is not in use when we change it */
- debugctl = 0;
- update_debugctlmsr(0);
- wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
+
+#ifdef CONFIG_X86_DS
+ {
+ unsigned long ds_prev = 0, ds_next = 0;
+
+ if (prev->ds_ctx)
+ ds_prev = (unsigned long)prev->ds_ctx->ds;
+ if (next->ds_ctx)
+ ds_next = (unsigned long)next->ds_ctx->ds;
+
+ if (ds_next != ds_prev) {
+ /*
+ * We clear debugctl to make sure DS
+ * is not in use when we change it:
+ */
+ debugctl = 0;
+ update_debugctlmsr(0);
+ wrmsrl(MSR_IA32_DS_AREA, ds_next);
+ }
}
+#endif /* CONFIG_X86_DS */
if (next->debugctlmsr != debugctl)
update_debugctlmsr(next->debugctlmsr);
@@ -517,13 +542,13 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
}
-#ifdef X86_BTS
+#ifdef CONFIG_X86_PTRACE_BTS
if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
-#endif
+#endif /* CONFIG_X86_PTRACE_BTS */
}
/*
@@ -545,7 +570,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
unsigned fsindex, gsindex;
/* we're going to use this soon, after a few expensive things */
- if (next_p->fpu_counter>5)
+ if (next_p->fpu_counter > 5)
prefetch(next->xstate);
/*
@@ -553,13 +578,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
load_sp0(tss, next);
- /*
+ /*
* Switch DS and ES.
* This won't pick up thread selector changes, but I guess that is ok.
*/
savesegment(es, prev->es);
if (unlikely(next->es | prev->es))
- loadsegment(es, next->es);
+ loadsegment(es, next->es);
savesegment(ds, prev->ds);
if (unlikely(next->ds | prev->ds))
@@ -585,7 +610,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
arch_leave_lazy_cpu_mode();
- /*
+ /*
* Switch FS and GS.
*
* Segment register != 0 always requires a reload. Also
@@ -594,13 +619,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
if (unlikely(fsindex | next->fsindex | prev->fs)) {
loadsegment(fs, next->fsindex);
- /*
+ /*
* Check if the user used a selector != 0; if yes
* clear 64bit base, since overloaded base is always
* mapped to the Null selector
*/
if (fsindex)
- prev->fs = 0;
+ prev->fs = 0;
}
/* when next process has a 64bit base use it */
if (next->fs)
@@ -610,7 +635,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
if (unlikely(gsindex | next->gsindex | prev->gs)) {
load_gs_index(next->gsindex);
if (gsindex)
- prev->gs = 0;
+ prev->gs = 0;
}
if (next->gs)
wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
@@ -619,12 +644,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/* Must be after DS reload */
unlazy_fpu(prev_p);
- /*
+ /*
* Switch the PDA and FPU contexts.
*/
prev->usersp = read_pda(oldrsp);
write_pda(oldrsp, next->usersp);
- write_pda(pcurrent, next_p);
+ write_pda(pcurrent, next_p);
write_pda(kernelstack,
(unsigned long)task_stack_page(next_p) +
@@ -665,7 +690,7 @@ long sys_execve(char __user *name, char __user * __user *argv,
char __user * __user *envp, struct pt_regs *regs)
{
long error;
- char * filename;
+ char *filename;
filename = getname(name);
error = PTR_ERR(filename);
@@ -723,55 +748,55 @@ asmlinkage long sys_vfork(struct pt_regs *regs)
unsigned long get_wchan(struct task_struct *p)
{
unsigned long stack;
- u64 fp,ip;
+ u64 fp, ip;
int count = 0;
- if (!p || p == current || p->state==TASK_RUNNING)
- return 0;
+ if (!p || p == current || p->state == TASK_RUNNING)
+ return 0;
stack = (unsigned long)task_stack_page(p);
if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
return 0;
fp = *(u64 *)(p->thread.sp);
- do {
+ do {
if (fp < (unsigned long)stack ||
fp > (unsigned long)stack+THREAD_SIZE)
- return 0;
+ return 0;
ip = *(u64 *)(fp+8);
if (!in_sched_functions(ip))
return ip;
- fp = *(u64 *)fp;
- } while (count++ < 16);
+ fp = *(u64 *)fp;
+ } while (count++ < 16);
return 0;
}
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
-{
- int ret = 0;
+{
+ int ret = 0;
int doit = task == current;
int cpu;
- switch (code) {
+ switch (code) {
case ARCH_SET_GS:
if (addr >= TASK_SIZE_OF(task))
- return -EPERM;
+ return -EPERM;
cpu = get_cpu();
- /* handle small bases via the GDT because that's faster to
+ /* handle small bases via the GDT because that's faster to
switch. */
- if (addr <= 0xffffffff) {
- set_32bit_tls(task, GS_TLS, addr);
- if (doit) {
+ if (addr <= 0xffffffff) {
+ set_32bit_tls(task, GS_TLS, addr);
+ if (doit) {
load_TLS(&task->thread, cpu);
- load_gs_index(GS_TLS_SEL);
+ load_gs_index(GS_TLS_SEL);
}
- task->thread.gsindex = GS_TLS_SEL;
+ task->thread.gsindex = GS_TLS_SEL;
task->thread.gs = 0;
- } else {
+ } else {
task->thread.gsindex = 0;
task->thread.gs = addr;
if (doit) {
load_gs_index(0);
ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
- }
+ }
}
put_cpu();
break;
@@ -825,8 +850,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
rdmsrl(MSR_KERNEL_GS_BASE, base);
else
base = task->thread.gs;
- }
- else
+ } else
base = task->thread.gs;
ret = put_user(base, (unsigned long __user *)addr);
break;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index fc3e8dcd9da..e375b658efc 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -14,6 +14,7 @@
#include <linux/errno.h>
#include <linux/ptrace.h>
#include <linux/regset.h>
+#include <linux/tracehook.h>
#include <linux/user.h>
#include <linux/elf.h>
#include <linux/security.h>
@@ -554,45 +555,115 @@ static int ptrace_set_debugreg(struct task_struct *child,
return 0;
}
-#ifdef X86_BTS
+#ifdef CONFIG_X86_PTRACE_BTS
+/*
+ * The configuration for a particular BTS hardware implementation.
+ */
+struct bts_configuration {
+ /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */
+ unsigned char sizeof_bts;
+ /* the size of a field in the BTS record in bytes */
+ unsigned char sizeof_field;
+ /* a bitmask to enable/disable BTS in DEBUGCTL MSR */
+ unsigned long debugctl_mask;
+};
+static struct bts_configuration bts_cfg;
+
+#define BTS_MAX_RECORD_SIZE (8 * 3)
+
+
+/*
+ * Branch Trace Store (BTS) uses the following format. Different
+ * architectures vary in the size of those fields.
+ * - source linear address
+ * - destination linear address
+ * - flags
+ *
+ * Later architectures use 64bit pointers throughout, whereas earlier
+ * architectures use 32bit pointers in 32bit mode.
+ *
+ * We compute the base address for the first 8 fields based on:
+ * - the field size stored in the DS configuration
+ * - the relative field position
+ *
+ * In order to store additional information in the BTS buffer, we use
+ * a special source address to indicate that the record requires
+ * special interpretation.
+ *
+ * Netburst indicated via a bit in the flags field whether the branch
+ * was predicted; this is ignored.
+ */
+
+enum bts_field {
+ bts_from = 0,
+ bts_to,
+ bts_flags,
+
+ bts_escape = (unsigned long)-1,
+ bts_qual = bts_to,
+ bts_jiffies = bts_flags
+};
+
+static inline unsigned long bts_get(const char *base, enum bts_field field)
+{
+ base += (bts_cfg.sizeof_field * field);
+ return *(unsigned long *)base;
+}
-static int ptrace_bts_get_size(struct task_struct *child)
+static inline void bts_set(char *base, enum bts_field field, unsigned long val)
{
- if (!child->thread.ds_area_msr)
- return -ENXIO;
+ base += (bts_cfg.sizeof_field * field);;
+ (*(unsigned long *)base) = val;
+}
- return ds_get_bts_index((void *)child->thread.ds_area_msr);
+/*
+ * Translate a BTS record from the raw format into the bts_struct format
+ *
+ * out (out): bts_struct interpretation
+ * raw: raw BTS record
+ */
+static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw)
+{
+ memset(out, 0, sizeof(*out));
+ if (bts_get(raw, bts_from) == bts_escape) {
+ out->qualifier = bts_get(raw, bts_qual);
+ out->variant.jiffies = bts_get(raw, bts_jiffies);
+ } else {
+ out->qualifier = BTS_BRANCH;
+ out->variant.lbr.from_ip = bts_get(raw, bts_from);
+ out->variant.lbr.to_ip = bts_get(raw, bts_to);
+ }
}
-static int ptrace_bts_read_record(struct task_struct *child,
- long index,
+static int ptrace_bts_read_record(struct task_struct *child, size_t index,
struct bts_struct __user *out)
{
struct bts_struct ret;
- int retval;
- int bts_end;
- int bts_index;
-
- if (!child->thread.ds_area_msr)
- return -ENXIO;
+ const void *bts_record;
+ size_t bts_index, bts_end;
+ int error;
- if (index < 0)
- return -EINVAL;
+ error = ds_get_bts_end(child, &bts_end);
+ if (error < 0)
+ return error;
- bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr);
if (bts_end <= index)
return -EINVAL;
+ error = ds_get_bts_index(child, &bts_index);
+ if (error < 0)
+ return error;
+
/* translate the ptrace bts index into the ds bts index */
- bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr);
- bts_index -= (index + 1);
- if (bts_index < 0)
- bts_index += bts_end;
+ bts_index += bts_end - (index + 1);
+ if (bts_end <= bts_index)
+ bts_index -= bts_end;
- retval = ds_read_bts((void *)child->thread.ds_area_msr,
- bts_index, &ret);
- if (retval < 0)
- return retval;
+ error = ds_access_bts(child, bts_index, &bts_record);
+ if (error < 0)
+ return error;
+
+ ptrace_bts_translate_record(&ret, bts_record);
if (copy_to_user(out, &ret, sizeof(ret)))
return -EFAULT;
@@ -600,101 +671,106 @@ static int ptrace_bts_read_record(struct task_struct *child,
return sizeof(ret);
}
-static int ptrace_bts_clear(struct task_struct *child)
-{
- if (!child->thread.ds_area_msr)
- return -ENXIO;
-
- return ds_clear((void *)child->thread.ds_area_msr);
-}
-
static int ptrace_bts_drain(struct task_struct *child,
long size,
struct bts_struct __user *out)
{
- int end, i;
- void *ds = (void *)child->thread.ds_area_msr;
-
- if (!ds)
- return -ENXIO;
+ struct bts_struct ret;
+ const unsigned char *raw;
+ size_t end, i;
+ int error;
- end = ds_get_bts_index(ds);
- if (end <= 0)
- return end;
+ error = ds_get_bts_index(child, &end);
+ if (error < 0)
+ return error;
if (size < (end * sizeof(struct bts_struct)))
return -EIO;
- for (i = 0; i < end; i++, out++) {
- struct bts_struct ret;
- int retval;
+ error = ds_access_bts(child, 0, (const void **)&raw);
+ if (error < 0)
+ return error;
- retval = ds_read_bts(ds, i, &ret);
- if (retval < 0)
- return retval;
+ for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) {
+ ptrace_bts_translate_record(&ret, raw);
if (copy_to_user(out, &ret, sizeof(ret)))
return -EFAULT;
}
- ds_clear(ds);
+ error = ds_clear_bts(child);
+ if (error < 0)
+ return error;
return end;
}
+static void ptrace_bts_ovfl(struct task_struct *child)
+{
+ send_sig(child->thread.bts_ovfl_signal, child, 0);
+}
+
static int ptrace_bts_config(struct task_struct *child,
long cfg_size,
const struct ptrace_bts_config __user *ucfg)
{
struct ptrace_bts_config cfg;
- int bts_size, ret = 0;
- void *ds;
+ int error = 0;
+
+ error = -EOPNOTSUPP;
+ if (!bts_cfg.sizeof_bts)
+ goto errout;
+ error = -EIO;
if (cfg_size < sizeof(cfg))
- return -EIO;
+ goto errout;
+ error = -EFAULT;
if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
- return -EFAULT;
+ goto errout;
- if ((int)cfg.size < 0)
- return -EINVAL;
+ error = -EINVAL;
+ if ((cfg.flags & PTRACE_BTS_O_SIGNAL) &&
+ !(cfg.flags & PTRACE_BTS_O_ALLOC))
+ goto errout;
- bts_size = 0;
- ds = (void *)child->thread.ds_area_msr;
- if (ds) {
- bts_size = ds_get_bts_size(ds);
- if (bts_size < 0)
- return bts_size;
- }
- cfg.size = PAGE_ALIGN(cfg.size);
+ if (cfg.flags & PTRACE_BTS_O_ALLOC) {
+ ds_ovfl_callback_t ovfl = NULL;
+ unsigned int sig = 0;
+
+ /* we ignore the error in case we were not tracing child */
+ (void)ds_release_bts(child);
- if (bts_size != cfg.size) {
- ret = ptrace_bts_realloc(child, cfg.size,
- cfg.flags & PTRACE_BTS_O_CUT_SIZE);
- if (ret < 0)
+ if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
+ if (!cfg.signal)
+ goto errout;
+
+ sig = cfg.signal;
+ ovfl = ptrace_bts_ovfl;
+ }
+
+ error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl);
+ if (error < 0)
goto errout;
- ds = (void *)child->thread.ds_area_msr;
+ child->thread.bts_ovfl_signal = sig;
}
- if (cfg.flags & PTRACE_BTS_O_SIGNAL)
- ret = ds_set_overflow(ds, DS_O_SIGNAL);
- else
- ret = ds_set_overflow(ds, DS_O_WRAP);
- if (ret < 0)
+ error = -EINVAL;
+ if (!child->thread.ds_ctx && cfg.flags)
goto errout;
if (cfg.flags & PTRACE_BTS_O_TRACE)
- child->thread.debugctlmsr |= ds_debugctl_mask();
+ child->thread.debugctlmsr |= bts_cfg.debugctl_mask;
else
- child->thread.debugctlmsr &= ~ds_debugctl_mask();
+ child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
if (cfg.flags & PTRACE_BTS_O_SCHED)
set_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
else
clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
- ret = sizeof(cfg);
+ error = sizeof(cfg);
out:
if (child->thread.debugctlmsr)
@@ -702,10 +778,10 @@ out:
else
clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
- return ret;
+ return error;
errout:
- child->thread.debugctlmsr &= ~ds_debugctl_mask();
+ child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
goto out;
}
@@ -714,29 +790,40 @@ static int ptrace_bts_status(struct task_struct *child,
long cfg_size,
struct ptrace_bts_config __user *ucfg)
{
- void *ds = (void *)child->thread.ds_area_msr;
struct ptrace_bts_config cfg;
+ size_t end;
+ const void *base, *max;
+ int error;
if (cfg_size < sizeof(cfg))
return -EIO;
- memset(&cfg, 0, sizeof(cfg));
+ error = ds_get_bts_end(child, &end);
+ if (error < 0)
+ return error;
- if (ds) {
- cfg.size = ds_get_bts_size(ds);
+ error = ds_access_bts(child, /* index = */ 0, &base);
+ if (error < 0)
+ return error;
- if (ds_get_overflow(ds) == DS_O_SIGNAL)
- cfg.flags |= PTRACE_BTS_O_SIGNAL;
+ error = ds_access_bts(child, /* index = */ end, &max);
+ if (error < 0)
+ return error;
- if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
- child->thread.debugctlmsr & ds_debugctl_mask())
- cfg.flags |= PTRACE_BTS_O_TRACE;
+ memset(&cfg, 0, sizeof(cfg));
+ cfg.size = (max - base);
+ cfg.signal = child->thread.bts_ovfl_signal;
+ cfg.bts_size = sizeof(struct bts_struct);
- if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
- cfg.flags |= PTRACE_BTS_O_SCHED;
- }
+ if (cfg.signal)
+ cfg.flags |= PTRACE_BTS_O_SIGNAL;
- cfg.bts_size = sizeof(struct bts_struct);
+ if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
+ child->thread.debugctlmsr & bts_cfg.debugctl_mask)
+ cfg.flags |= PTRACE_BTS_O_TRACE;
+
+ if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
+ cfg.flags |= PTRACE_BTS_O_SCHED;
if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
return -EFAULT;
@@ -744,89 +831,38 @@ static int ptrace_bts_status(struct task_struct *child,
return sizeof(cfg);
}
-
static int ptrace_bts_write_record(struct task_struct *child,
const struct bts_struct *in)
{
- int retval;
+ unsigned char bts_record[BTS_MAX_RECORD_SIZE];
- if (!child->thread.ds_area_msr)
- return -ENXIO;
+ BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts);
- retval = ds_write_bts((void *)child->thread.ds_area_msr, in);
- if (retval)
- return retval;
+ memset(bts_record, 0, bts_cfg.sizeof_bts);
+ switch (in->qualifier) {
+ case BTS_INVALID:
+ break;
- return sizeof(*in);
-}
+ case BTS_BRANCH:
+ bts_set(bts_record, bts_from, in->variant.lbr.from_ip);
+ bts_set(bts_record, bts_to, in->variant.lbr.to_ip);
+ break;
-static int ptrace_bts_realloc(struct task_struct *child,
- int size, int reduce_size)
-{
- unsigned long rlim, vm;
- int ret, old_size;
+ case BTS_TASK_ARRIVES:
+ case BTS_TASK_DEPARTS:
+ bts_set(bts_record, bts_from, bts_escape);
+ bts_set(bts_record, bts_qual, in->qualifier);
+ bts_set(bts_record, bts_jiffies, in->variant.jiffies);
+ break;
- if (size < 0)
+ default:
return -EINVAL;
-
- old_size = ds_get_bts_size((void *)child->thread.ds_area_msr);
- if (old_size < 0)
- return old_size;
-
- ret = ds_free((void **)&child->thread.ds_area_msr);
- if (ret < 0)
- goto out;
-
- size >>= PAGE_SHIFT;
- old_size >>= PAGE_SHIFT;
-
- current->mm->total_vm -= old_size;
- current->mm->locked_vm -= old_size;
-
- if (size == 0)
- goto out;
-
- rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
- vm = current->mm->total_vm + size;
- if (rlim < vm) {
- ret = -ENOMEM;
-
- if (!reduce_size)
- goto out;
-
- size = rlim - current->mm->total_vm;
- if (size <= 0)
- goto out;
- }
-
- rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
- vm = current->mm->locked_vm + size;
- if (rlim < vm) {
- ret = -ENOMEM;
-
- if (!reduce_size)
- goto out;
-
- size = rlim - current->mm->locked_vm;
- if (size <= 0)
- goto out;
}
- ret = ds_allocate((void **)&child->thread.ds_area_msr,
- size << PAGE_SHIFT);
- if (ret < 0)
- goto out;
-
- current->mm->total_vm += size;
- current->mm->locked_vm += size;
-
-out:
- if (child->thread.ds_area_msr)
- set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
- else
- clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
-
- return ret;
+ /* The writing task will be the switched-to task on a context
+ * switch. It needs to write into the switched-from task's BTS
+ * buffer. */
+ return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts);
}
void ptrace_bts_take_timestamp(struct task_struct *tsk,
@@ -839,7 +875,66 @@ void ptrace_bts_take_timestamp(struct task_struct *tsk,
ptrace_bts_write_record(tsk, &rec);
}
-#endif /* X86_BTS */
+
+static const struct bts_configuration bts_cfg_netburst = {
+ .sizeof_bts = sizeof(long) * 3,
+ .sizeof_field = sizeof(long),
+ .debugctl_mask = (1<<2)|(1<<3)|(1<<5)
+};
+
+static const struct bts_configuration bts_cfg_pentium_m = {
+ .sizeof_bts = sizeof(long) * 3,
+ .sizeof_field = sizeof(long),
+ .debugctl_mask = (1<<6)|(1<<7)
+};
+
+static const struct bts_configuration bts_cfg_core2 = {
+ .sizeof_bts = 8 * 3,
+ .sizeof_field = 8,
+ .debugctl_mask = (1<<6)|(1<<7)|(1<<9)
+};
+
+static inline void bts_configure(const struct bts_configuration *cfg)
+{
+ bts_cfg = *cfg;
+}
+
+void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
+{
+ switch (c->x86) {
+ case 0x6:
+ switch (c->x86_model) {
+ case 0xD:
+ case 0xE: /* Pentium M */
+ bts_configure(&bts_cfg_pentium_m);
+ break;
+ case 0xF: /* Core2 */
+ case 0x1C: /* Atom */
+ bts_configure(&bts_cfg_core2);
+ break;
+ default:
+ /* sorry, don't know about them */
+ break;
+ }
+ break;
+ case 0xF:
+ switch (c->x86_model) {
+ case 0x0:
+ case 0x1:
+ case 0x2: /* Netburst */
+ bts_configure(&bts_cfg_netburst);
+ break;
+ default:
+ /* sorry, don't know about them */
+ break;
+ }
+ break;
+ default:
+ /* sorry, don't know about them */
+ break;
+ }
+}
+#endif /* CONFIG_X86_PTRACE_BTS */
/*
* Called by kernel/ptrace.c when detaching..
@@ -852,15 +947,15 @@ void ptrace_disable(struct task_struct *child)
#ifdef TIF_SYSCALL_EMU
clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
#endif
- if (child->thread.ds_area_msr) {
-#ifdef X86_BTS
- ptrace_bts_realloc(child, 0, 0);
-#endif
- child->thread.debugctlmsr &= ~ds_debugctl_mask();
- if (!child->thread.debugctlmsr)
- clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
- clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
- }
+#ifdef CONFIG_X86_PTRACE_BTS
+ (void)ds_release_bts(child);
+
+ child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
+ if (!child->thread.debugctlmsr)
+ clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+
+ clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
+#endif /* CONFIG_X86_PTRACE_BTS */
}
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -980,7 +1075,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
/*
* These bits need more cooking - not enabled yet:
*/
-#ifdef X86_BTS
+#ifdef CONFIG_X86_PTRACE_BTS
case PTRACE_BTS_CONFIG:
ret = ptrace_bts_config
(child, data, (struct ptrace_bts_config __user *)addr);
@@ -992,7 +1087,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
break;
case PTRACE_BTS_SIZE:
- ret = ptrace_bts_get_size(child);
+ ret = ds_get_bts_index(child, /* pos = */ NULL);
break;
case PTRACE_BTS_GET:
@@ -1001,14 +1096,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
break;
case PTRACE_BTS_CLEAR:
- ret = ptrace_bts_clear(child);
+ ret = ds_clear_bts(child);
break;
case PTRACE_BTS_DRAIN:
ret = ptrace_bts_drain
(child, data, (struct bts_struct __user *) addr);
break;
-#endif
+#endif /* CONFIG_X86_PTRACE_BTS */
default:
ret = ptrace_request(child, request, addr, data);
@@ -1375,30 +1470,6 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
force_sig_info(SIGTRAP, &info, tsk);
}
-static void syscall_trace(struct pt_regs *regs)
-{
- if (!(current->ptrace & PT_PTRACED))
- return;
-
-#if 0
- printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
- current->comm,
- regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0),
- current_thread_info()->flags, current->ptrace);
-#endif
-
- ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
- ? 0x80 : 0));
- /*
- * this isn't the same as continuing with a signal, but it will do
- * for normal use. strace only continues with a signal if the
- * stopping signal is not SIGTRAP. -brl
- */
- if (current->exit_code) {
- send_sig(current->exit_code, current, 1);
- current->exit_code = 0;
- }
-}
#ifdef CONFIG_X86_32
# define IS_IA32 1
@@ -1432,8 +1503,9 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
ret = -1L;
- if (ret || test_thread_flag(TIF_SYSCALL_TRACE))
- syscall_trace(regs);
+ if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
+ tracehook_report_syscall_entry(regs))
+ ret = -1L;
if (unlikely(current->audit_context)) {
if (IS_IA32)
@@ -1459,7 +1531,7 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
if (test_thread_flag(TIF_SYSCALL_TRACE))
- syscall_trace(regs);
+ tracehook_report_syscall_exit(regs, 0);
/*
* If TIF_SYSCALL_EMU is set, we only get here because of
@@ -1475,6 +1547,6 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
* system call instruction.
*/
if (test_thread_flag(TIF_SINGLESTEP) &&
- (current->ptrace & PT_PTRACED))
+ tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL))
send_sigtrap(current, regs, 0);
}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 724adfc63cb..f4c93f1cfc1 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -29,7 +29,11 @@ EXPORT_SYMBOL(pm_power_off);
static const struct desc_ptr no_idt = {};
static int reboot_mode;
-enum reboot_type reboot_type = BOOT_KBD;
+/*
+ * Keyboard reset and triple fault may result in INIT, not RESET, which
+ * doesn't work when we're in vmx root mode. Try ACPI first.
+ */
+enum reboot_type reboot_type = BOOT_ACPI;
int reboot_force;
#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index 703310a9902..6f50664b2ba 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -20,10 +20,11 @@
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
#define PAE_PGD_ATTR (_PAGE_PRESENT)
-/* control_page + PAGE_SIZE/2 ~ control_page + PAGE_SIZE * 3/4 are
- * used to save some data for jumping back
+/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE
+ * ~ control_page + PAGE_SIZE are used as data storage and stack for
+ * jumping back
*/
-#define DATA(offset) (PAGE_SIZE/2+(offset))
+#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
/* Minimal CPU state */
#define ESP DATA(0x0)
@@ -376,3 +377,6 @@ swap_pages:
popl %ebx
popl %ebp
ret
+
+ .globl kexec_control_code_size
+.set kexec_control_code_size, . - relocate_kernel
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 59f07e14d08..46c98efbbf8 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -223,6 +223,9 @@ unsigned long saved_video_mode;
#define RAMDISK_LOAD_FLAG 0x4000
static char __initdata command_line[COMMAND_LINE_SIZE];
+#ifdef CONFIG_CMDLINE_BOOL
+static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
+#endif
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
struct edd edd;
@@ -445,7 +448,7 @@ static void __init reserve_early_setup_data(void)
* @size: Size of the crashkernel memory to reserve.
* Returns the base address on success, and -1ULL on failure.
*/
-unsigned long long find_and_reserve_crashkernel(unsigned long long size)
+unsigned long long __init find_and_reserve_crashkernel(unsigned long long size)
{
const unsigned long long alignment = 16<<20; /* 16M */
unsigned long long start = 0LL;
@@ -604,14 +607,6 @@ void __init setup_arch(char **cmdline_p)
early_cpu_init();
early_ioremap_init();
-#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
- /*
- * Must be before kernel pagetables are setup
- * or fixmap area is touched.
- */
- vmi_init();
-#endif
-
ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
screen_info = boot_params.screen_info;
edid_info = boot_params.edid_info;
@@ -673,11 +668,36 @@ void __init setup_arch(char **cmdline_p)
bss_resource.start = virt_to_phys(&__bss_start);
bss_resource.end = virt_to_phys(&__bss_stop)-1;
+#ifdef CONFIG_CMDLINE_BOOL
+#ifdef CONFIG_CMDLINE_OVERRIDE
+ strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+#else
+ if (builtin_cmdline[0]) {
+ /* append boot loader cmdline to builtin */
+ strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE);
+ strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+ strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+ }
+#endif
+#endif
+
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;
parse_early_param();
+#ifdef CONFIG_X86_64
+ check_efer();
+#endif
+
+#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
+ /*
+ * Must be before kernel pagetables are setup
+ * or fixmap area is touched.
+ */
+ vmi_init();
+#endif
+
/* after early param, so could get panic from serial */
reserve_early_setup_data();
@@ -738,7 +758,6 @@ void __init setup_arch(char **cmdline_p)
#else
num_physpages = max_pfn;
- check_efer();
if (cpu_has_x2apic)
check_x2apic();
diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h
index 72bbb519d2d..8b4956e800a 100644
--- a/arch/x86/kernel/sigframe.h
+++ b/arch/x86/kernel/sigframe.h
@@ -24,4 +24,9 @@ struct rt_sigframe {
struct ucontext uc;
struct siginfo info;
};
+
+int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs *regs);
+int ia32_setup_frame(int sig, struct k_sigaction *ka,
+ sigset_t *set, struct pt_regs *regs);
#endif
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 0c727f64e79..2a2435d3037 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -17,6 +17,7 @@
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/wait.h>
+#include <linux/tracehook.h>
#include <linux/elf.h>
#include <linux/smp.h>
#include <linux/mm.h>
@@ -559,8 +560,6 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
* handler too.
*/
regs->flags &= ~X86_EFLAGS_TF;
- if (test_thread_flag(TIF_SINGLESTEP))
- ptrace_notify(SIGTRAP);
spin_lock_irq(&current->sighand->siglock);
sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
@@ -569,6 +568,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
+ tracehook_signal_handler(sig, info, ka, regs,
+ test_thread_flag(TIF_SINGLESTEP));
+
return 0;
}
@@ -662,5 +664,10 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs);
+ if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+ clear_thread_flag(TIF_NOTIFY_RESUME);
+ tracehook_notify_resume(regs);
+ }
+
clear_thread_flag(TIF_IRET);
}
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 2f146405005..694aa888bb1 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -15,17 +15,20 @@
#include <linux/errno.h>
#include <linux/wait.h>
#include <linux/ptrace.h>
+#include <linux/tracehook.h>
#include <linux/unistd.h>
#include <linux/stddef.h>
#include <linux/personality.h>
#include <linux/compiler.h>
+#include <linux/uaccess.h>
+
#include <asm/processor.h>
#include <asm/ucontext.h>
-#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/proto.h>
#include <asm/ia32_unistd.h>
#include <asm/mce.h>
+#include <asm/syscall.h>
#include <asm/syscalls.h>
#include "sigframe.h"
@@ -42,11 +45,6 @@
# define FIX_EFLAGS __FIX_EFLAGS
#endif
-int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
- sigset_t *set, struct pt_regs * regs);
-int ia32_setup_frame(int sig, struct k_sigaction *ka,
- sigset_t *set, struct pt_regs * regs);
-
asmlinkage long
sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
struct pt_regs *regs)
@@ -129,7 +127,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
/* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
-#define COPY(x) err |= __get_user(regs->x, &sc->x)
+#define COPY(x) (err |= __get_user(regs->x, &sc->x))
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip);
@@ -159,7 +157,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
}
{
- struct _fpstate __user * buf;
+ struct _fpstate __user *buf;
err |= __get_user(buf, &sc->fpstate);
if (buf) {
@@ -199,7 +197,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
current->blocked = set;
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
-
+
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
goto badframe;
@@ -209,16 +207,17 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
return ax;
badframe:
- signal_fault(regs,frame,"sigreturn");
+ signal_fault(regs, frame, "sigreturn");
return 0;
-}
+}
/*
* Set up a signal frame.
*/
static inline int
-setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned long mask, struct task_struct *me)
+setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
+ unsigned long mask, struct task_struct *me)
{
int err = 0;
@@ -274,35 +273,35 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
}
static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
- sigset_t *set, struct pt_regs * regs)
+ sigset_t *set, struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
- struct _fpstate __user *fp = NULL;
+ struct _fpstate __user *fp = NULL;
int err = 0;
struct task_struct *me = current;
if (used_math()) {
- fp = get_stack(ka, regs, sizeof(struct _fpstate));
+ fp = get_stack(ka, regs, sizeof(struct _fpstate));
frame = (void __user *)round_down(
(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate)))
goto give_sigsegv;
- if (save_i387(fp) < 0)
- err |= -1;
+ if (save_i387(fp) < 0)
+ err |= -1;
} else
frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv;
- if (ka->sa.sa_flags & SA_SIGINFO) {
+ if (ka->sa.sa_flags & SA_SIGINFO) {
err |= copy_siginfo_to_user(&frame->info, info);
if (err)
goto give_sigsegv;
}
-
+
/* Create the ucontext. */
err |= __put_user(0, &frame->uc.uc_flags);
err |= __put_user(0, &frame->uc.uc_link);
@@ -312,9 +311,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me);
err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate);
- if (sizeof(*set) == 16) {
+ if (sizeof(*set) == 16) {
__put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
- __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
+ __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
} else
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
@@ -325,7 +324,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
} else {
/* could use a vstub here */
- goto give_sigsegv;
+ goto give_sigsegv;
}
if (err)
@@ -333,7 +332,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
/* Set up registers for signal handler */
regs->di = sig;
- /* In case the signal handler was declared without prototypes */
+ /* In case the signal handler was declared without prototypes */
regs->ax = 0;
/* This also works for non SA_SIGINFO handlers because they expect the
@@ -356,37 +355,8 @@ give_sigsegv:
}
/*
- * Return -1L or the syscall number that @regs is executing.
- */
-static long current_syscall(struct pt_regs *regs)
-{
- /*
- * We always sign-extend a -1 value being set here,
- * so this is always either -1L or a syscall number.
- */
- return regs->orig_ax;
-}
-
-/*
- * Return a value that is -EFOO if the system call in @regs->orig_ax
- * returned an error. This only works for @regs from @current.
- */
-static long current_syscall_ret(struct pt_regs *regs)
-{
-#ifdef CONFIG_IA32_EMULATION
- if (test_thread_flag(TIF_IA32))
- /*
- * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
- * and will match correctly in comparisons.
- */
- return (int) regs->ax;
-#endif
- return regs->ax;
-}
-
-/*
* OK, we're invoking a handler
- */
+ */
static int
handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
@@ -395,9 +365,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
int ret;
/* Are we from a system call? */
- if (current_syscall(regs) >= 0) {
+ if (syscall_get_nr(current, regs) >= 0) {
/* If so, check system call restarting.. */
- switch (current_syscall_ret(regs)) {
+ switch (syscall_get_error(current, regs)) {
case -ERESTART_RESTARTBLOCK:
case -ERESTARTNOHAND:
regs->ax = -EINTR;
@@ -430,7 +400,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs);
else
ret = ia32_setup_frame(sig, ka, oldset, regs);
- } else
+ } else
#endif
ret = setup_rt_frame(sig, ka, info, oldset, regs);
@@ -454,15 +424,16 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
* handler too.
*/
regs->flags &= ~X86_EFLAGS_TF;
- if (test_thread_flag(TIF_SINGLESTEP))
- ptrace_notify(SIGTRAP);
spin_lock_irq(&current->sighand->siglock);
- sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+ sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
if (!(ka->sa.sa_flags & SA_NODEFER))
- sigaddset(&current->blocked,sig);
+ sigaddset(&current->blocked, sig);
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
+
+ tracehook_signal_handler(sig, info, ka, regs,
+ test_thread_flag(TIF_SINGLESTEP));
}
return ret;
@@ -519,9 +490,9 @@ static void do_signal(struct pt_regs *regs)
}
/* Did we come from a system call? */
- if (current_syscall(regs) >= 0) {
+ if (syscall_get_nr(current, regs) >= 0) {
/* Restart the system call - no handlers present */
- switch (current_syscall_ret(regs)) {
+ switch (syscall_get_error(current, regs)) {
case -ERESTARTNOHAND:
case -ERESTARTSYS:
case -ERESTARTNOINTR:
@@ -559,17 +530,23 @@ void do_notify_resume(struct pt_regs *regs, void *unused,
/* deal with pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs);
+
+ if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+ clear_thread_flag(TIF_NOTIFY_RESUME);
+ tracehook_notify_resume(regs);
+ }
}
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
-{
- struct task_struct *me = current;
+{
+ struct task_struct *me = current;
if (show_unhandled_signals && printk_ratelimit()) {
printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
- me->comm,me->pid,where,frame,regs->ip,regs->sp,regs->orig_ax);
+ me->comm, me->pid, where, frame, regs->ip,
+ regs->sp, regs->orig_ax);
print_vma_addr(" in ", regs->ip);
printk("\n");
}
- force_sig(SIGSEGV, me);
-}
+ force_sig(SIGSEGV, me);
+}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0133a952d11..2ff0bbcd5bd 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -749,6 +749,14 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
}
#ifdef CONFIG_X86_64
+
+/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */
+static void __ref free_bootmem_pda(struct x8664_pda *oldpda)
+{
+ if (!after_bootmem)
+ free_bootmem((unsigned long)oldpda, sizeof(*oldpda));
+}
+
/*
* Allocate node local memory for the AP pda.
*
@@ -777,8 +785,7 @@ int __cpuinit get_local_pda(int cpu)
if (oldpda) {
memcpy(newpda, oldpda, size);
- if (!after_bootmem)
- free_bootmem((unsigned long)oldpda, size);
+ free_bootmem_pda(oldpda);
}
newpda->in_bootmem = 0;
@@ -987,17 +994,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
flush_tlb_all();
low_mappings = 1;
-#ifdef CONFIG_X86_PC
- if (def_to_bigsmp && apicid > 8) {
- printk(KERN_WARNING
- "More than 8 CPUs detected - skipping them.\n"
- "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n");
- err = -1;
- } else
- err = do_boot_cpu(apicid, cpu);
-#else
err = do_boot_cpu(apicid, cpu);
-#endif
zap_low_mappings();
low_mappings = 0;
@@ -1051,6 +1048,34 @@ static __init void disable_smp(void)
static int __init smp_sanity_check(unsigned max_cpus)
{
preempt_disable();
+
+#if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32)
+ if (def_to_bigsmp && nr_cpu_ids > 8) {
+ unsigned int cpu;
+ unsigned nr;
+
+ printk(KERN_WARNING
+ "More than 8 CPUs detected - skipping them.\n"
+ "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n");
+
+ nr = 0;
+ for_each_present_cpu(cpu) {
+ if (nr >= 8)
+ cpu_clear(cpu, cpu_present_map);
+ nr++;
+ }
+
+ nr = 0;
+ for_each_possible_cpu(cpu) {
+ if (nr >= 8)
+ cpu_clear(cpu, cpu_possible_map);
+ nr++;
+ }
+
+ nr_cpu_ids = 8;
+ }
+#endif
+
if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
printk(KERN_WARNING "weird, boot CPU (#%d) not listed"
"by the BIOS.\n", hard_smp_processor_id());
@@ -1196,6 +1221,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
printk(KERN_INFO "CPU%d: ", 0);
print_cpu_info(&cpu_data(0));
setup_boot_clock();
+
+ if (is_uv_system())
+ uv_system_init();
out:
preempt_enable();
}
@@ -1285,16 +1313,13 @@ __init void prefill_possible_map(void)
if (!num_processors)
num_processors = 1;
-#ifdef CONFIG_HOTPLUG_CPU
if (additional_cpus == -1) {
if (disabled_cpus > 0)
additional_cpus = disabled_cpus;
else
additional_cpus = 0;
}
-#else
- additional_cpus = 0;
-#endif
+
possible = num_processors + additional_cpus;
if (possible > NR_CPUS)
possible = NR_CPUS;
@@ -1386,17 +1411,3 @@ void __cpu_die(unsigned int cpu)
BUG();
}
#endif
-
-/*
- * If the BIOS enumerates physical processors before logical,
- * maxcpus=N at enumeration-time can be used to disable HT.
- */
-static int __init parse_maxcpus(char *arg)
-{
- extern unsigned int maxcpus;
-
- if (arg)
- maxcpus = simple_strtoul(arg, NULL, 0);
- return 0;
-}
-early_param("maxcpus", parse_maxcpus);
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
index 99941b37eca..397e309839d 100644
--- a/arch/x86/kernel/smpcommon.c
+++ b/arch/x86/kernel/smpcommon.c
@@ -8,18 +8,21 @@
DEFINE_PER_CPU(unsigned long, this_cpu_off);
EXPORT_PER_CPU_SYMBOL(this_cpu_off);
-/* Initialize the CPU's GDT. This is either the boot CPU doing itself
- (still using the master per-cpu area), or a CPU doing it for a
- secondary which will soon come up. */
+/*
+ * Initialize the CPU's GDT. This is either the boot CPU doing itself
+ * (still using the master per-cpu area), or a CPU doing it for a
+ * secondary which will soon come up.
+ */
__cpuinit void init_gdt(int cpu)
{
- struct desc_struct *gdt = get_cpu_gdt_table(cpu);
+ struct desc_struct gdt;
- pack_descriptor(&gdt[GDT_ENTRY_PERCPU],
- __per_cpu_offset[cpu], 0xFFFFF,
+ pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF,
0x2 | DESCTYPE_S, 0x8);
+ gdt.s = 1;
- gdt[GDT_ENTRY_PERCPU].s = 1;
+ write_gdt_entry(get_cpu_gdt_table(cpu),
+ GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
per_cpu(cpu_number, cpu) = cpu;
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index c9288c883e2..6bc211accf0 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -13,16 +13,17 @@
#include <linux/utsname.h>
#include <linux/personality.h>
#include <linux/random.h>
+#include <linux/uaccess.h>
-#include <asm/uaccess.h>
#include <asm/ia32.h>
#include <asm/syscalls.h>
-asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long off)
+asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long off)
{
long error;
- struct file * file;
+ struct file *file;
error = -EINVAL;
if (off & ~PAGE_MASK)
@@ -57,9 +58,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
unmapped base down for this case. This can give
conflicts with the heap, but we assume that glibc
malloc knows how to fall back to mmap. Give it 1GB
- of playground for now. -AK */
- *begin = 0x40000000;
- *end = 0x80000000;
+ of playground for now. -AK */
+ *begin = 0x40000000;
+ *end = 0x80000000;
if (current->flags & PF_RANDOMIZE) {
new_begin = randomize_range(*begin, *begin + 0x02000000, 0);
if (new_begin)
@@ -67,9 +68,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
}
} else {
*begin = TASK_UNMAPPED_BASE;
- *end = TASK_SIZE;
+ *end = TASK_SIZE;
}
-}
+}
unsigned long
arch_get_unmapped_area(struct file *filp, unsigned long addr,
@@ -79,11 +80,11 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
struct vm_area_struct *vma;
unsigned long start_addr;
unsigned long begin, end;
-
+
if (flags & MAP_FIXED)
return addr;
- find_start_end(flags, &begin, &end);
+ find_start_end(flags, &begin, &end);
if (len > end)
return -ENOMEM;
@@ -97,12 +98,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
}
if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32))
&& len <= mm->cached_hole_size) {
- mm->cached_hole_size = 0;
+ mm->cached_hole_size = 0;
mm->free_area_cache = begin;
}
addr = mm->free_area_cache;
- if (addr < begin)
- addr = begin;
+ if (addr < begin)
+ addr = begin;
start_addr = addr;
full_search:
@@ -128,7 +129,7 @@ full_search:
return addr;
}
if (addr + mm->cached_hole_size < vma->vm_start)
- mm->cached_hole_size = vma->vm_start - addr;
+ mm->cached_hole_size = vma->vm_start - addr;
addr = vma->vm_end;
}
@@ -178,7 +179,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
vma = find_vma(mm, addr-len);
if (!vma || addr <= vma->vm_start)
/* remember the address as a hint for next time */
- return (mm->free_area_cache = addr-len);
+ return mm->free_area_cache = addr-len;
}
if (mm->mmap_base < len)
@@ -195,7 +196,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
vma = find_vma(mm, addr);
if (!vma || addr+len <= vma->vm_start)
/* remember the address as a hint for next time */
- return (mm->free_area_cache = addr);
+ return mm->free_area_cache = addr;
/* remember the largest hole we saw so far */
if (addr + mm->cached_hole_size < vma->vm_start)
@@ -225,13 +226,13 @@ bottomup:
}
-asmlinkage long sys_uname(struct new_utsname __user * name)
+asmlinkage long sys_uname(struct new_utsname __user *name)
{
int err;
down_read(&uts_sem);
- err = copy_to_user(name, utsname(), sizeof (*name));
+ err = copy_to_user(name, utsname(), sizeof(*name));
up_read(&uts_sem);
- if (personality(current->personality) == PER_LINUX32)
- err |= copy_to_user(&name->machine, "i686", 5);
+ if (personality(current->personality) == PER_LINUX32)
+ err |= copy_to_user(&name->machine, "i686", 5);
return err ? -EFAULT : 0;
}
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index d0fbb7712ab..8b8c0d6640f 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -17,6 +17,7 @@
#include <asm/genapic.h>
#include <asm/idle.h>
#include <asm/tsc.h>
+#include <asm/irq_vectors.h>
#include <mach_apic.h>
@@ -783,7 +784,7 @@ static int __init uv_bau_init(void)
uv_init_blade(blade, node, cur_cpu);
cur_cpu += uv_blade_nr_possible_cpus(blade);
}
- set_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1);
+ alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1);
uv_enable_timeouts();
return 0;
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 513caaca711..7a31f104bef 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -32,6 +32,8 @@
#include <linux/bug.h>
#include <linux/nmi.h>
#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/io.h>
#if defined(CONFIG_EDAC)
#include <linux/edac.h>
@@ -45,9 +47,6 @@
#include <asm/unwind.h>
#include <asm/desc.h>
#include <asm/i387.h>
-#include <asm/nmi.h>
-#include <asm/smp.h>
-#include <asm/io.h>
#include <asm/pgalloc.h>
#include <asm/proto.h>
#include <asm/pda.h>
@@ -85,7 +84,8 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
void printk_address(unsigned long address, int reliable)
{
- printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address);
+ printk(" [<%016lx>] %s%pS\n",
+ address, reliable ? "" : "? ", (void *) address);
}
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
@@ -98,7 +98,8 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
[STACKFAULT_STACK - 1] = "#SS",
[MCE_STACK - 1] = "#MC",
#if DEBUG_STKSZ > EXCEPTION_STKSZ
- [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
+ [N_EXCEPTION_STACKS ...
+ N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
#endif
};
unsigned k;
@@ -163,7 +164,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
}
/*
- * x86-64 can have up to three kernel stacks:
+ * x86-64 can have up to three kernel stacks:
* process stack
* interrupt stack
* severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
@@ -219,7 +220,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
const struct stacktrace_ops *ops, void *data)
{
const unsigned cpu = get_cpu();
- unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
+ unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
unsigned used = 0;
struct thread_info *tinfo;
@@ -237,7 +238,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
if (!bp) {
if (task == current) {
/* Grab bp right from our regs */
- asm("movq %%rbp, %0" : "=r" (bp) :);
+ asm("movq %%rbp, %0" : "=r" (bp) : );
} else {
/* bp is the last reg pushed by switch_to */
bp = *(unsigned long *) task->thread.sp;
@@ -339,9 +340,8 @@ static void
show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp, char *log_lvl)
{
- printk("\nCall Trace:\n");
+ printk("Call Trace:\n");
dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
- printk("\n");
}
void show_trace(struct task_struct *task, struct pt_regs *regs,
@@ -357,11 +357,15 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack;
int i;
const int cpu = smp_processor_id();
- unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr);
- unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
+ unsigned long *irqstack_end =
+ (unsigned long *) (cpu_pda(cpu)->irqstackptr);
+ unsigned long *irqstack =
+ (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
- // debugging aid: "show_stack(NULL, NULL);" prints the
- // back trace for this cpu.
+ /*
+ * debugging aid: "show_stack(NULL, NULL);" prints the
+ * back trace for this cpu.
+ */
if (sp == NULL) {
if (task)
@@ -386,6 +390,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
printk(" %016lx", *stack++);
touch_nmi_watchdog();
}
+ printk("\n");
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
}
@@ -404,7 +409,7 @@ void dump_stack(void)
#ifdef CONFIG_FRAME_POINTER
if (!bp)
- asm("movq %%rbp, %0" : "=r" (bp):);
+ asm("movq %%rbp, %0" : "=r" (bp) : );
#endif
printk("Pid: %d, comm: %.20s %s %s %.*s\n",
@@ -414,7 +419,6 @@ void dump_stack(void)
init_utsname()->version);
show_trace(NULL, NULL, &stack, bp);
}
-
EXPORT_SYMBOL(dump_stack);
void show_registers(struct pt_regs *regs)
@@ -443,7 +447,6 @@ void show_registers(struct pt_regs *regs)
printk("Stack: ");
show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
regs->bp, "");
- printk("\n");
printk(KERN_EMERG "Code: ");
@@ -493,7 +496,7 @@ unsigned __kprobes long oops_begin(void)
raw_local_irq_save(flags);
cpu = smp_processor_id();
if (!__raw_spin_trylock(&die_lock)) {
- if (cpu == die_owner)
+ if (cpu == die_owner)
/* nested oops. should stop eventually */;
else
__raw_spin_lock(&die_lock);
@@ -638,7 +641,7 @@ kernel_trap:
}
#define DO_ERROR(trapnr, signr, str, name) \
-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+asmlinkage void do_##name(struct pt_regs *regs, long error_code) \
{ \
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
== NOTIFY_STOP) \
@@ -648,7 +651,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
}
#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+asmlinkage void do_##name(struct pt_regs *regs, long error_code) \
{ \
siginfo_t info; \
info.si_signo = signr; \
@@ -683,7 +686,7 @@ asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code)
preempt_conditional_cli(regs);
}
-asmlinkage void do_double_fault(struct pt_regs * regs, long error_code)
+asmlinkage void do_double_fault(struct pt_regs *regs, long error_code)
{
static const char str[] = "double fault";
struct task_struct *tsk = current;
@@ -778,9 +781,10 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
}
static notrace __kprobes void
-unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
{
- if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
+ if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
+ NOTIFY_STOP)
return;
printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
reason);
@@ -882,7 +886,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
else if (user_mode(eregs))
regs = task_pt_regs(current);
/* Exception from kernel and interrupts are enabled. Move to
- kernel process stack. */
+ kernel process stack. */
else if (eregs->flags & X86_EFLAGS_IF)
regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
if (eregs != regs)
@@ -891,7 +895,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
}
/* runs on IST stack. */
-asmlinkage void __kprobes do_debug(struct pt_regs * regs,
+asmlinkage void __kprobes do_debug(struct pt_regs *regs,
unsigned long error_code)
{
struct task_struct *tsk = current;
@@ -1035,7 +1039,7 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs)
asmlinkage void bad_intr(void)
{
- printk("bad interrupt");
+ printk("bad interrupt");
}
asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
@@ -1047,7 +1051,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
conditional_sti(regs);
if (!user_mode(regs) &&
- kernel_math_error(regs, "kernel simd math error", 19))
+ kernel_math_error(regs, "kernel simd math error", 19))
return;
/*
@@ -1092,7 +1096,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
force_sig_info(SIGFPE, &info, task);
}
-asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs)
+asmlinkage void do_spurious_interrupt_bug(struct pt_regs *regs)
{
}
@@ -1149,8 +1153,10 @@ void __init trap_init(void)
set_intr_gate(0, &divide_error);
set_intr_gate_ist(1, &debug, DEBUG_STACK);
set_intr_gate_ist(2, &nmi, NMI_STACK);
- set_system_gate_ist(3, &int3, DEBUG_STACK); /* int3 can be called from all */
- set_system_gate(4, &overflow); /* int4 can be called from all */
+ /* int3 can be called from all */
+ set_system_gate_ist(3, &int3, DEBUG_STACK);
+ /* int4 can be called from all */
+ set_system_gate(4, &overflow);
set_intr_gate(5, &bounds);
set_intr_gate(6, &invalid_op);
set_intr_gate(7, &device_not_available);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 7603c055390..161bb850fc4 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -104,7 +104,7 @@ __setup("notsc", notsc_setup);
/*
* Read TSC and the reference counters. Take care of SMI disturbance
*/
-static u64 __init tsc_read_refs(u64 *pm, u64 *hpet)
+static u64 tsc_read_refs(u64 *p, int hpet)
{
u64 t1, t2;
int i;
@@ -112,9 +112,9 @@ static u64 __init tsc_read_refs(u64 *pm, u64 *hpet)
for (i = 0; i < MAX_RETRIES; i++) {
t1 = get_cycles();
if (hpet)
- *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
+ *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
else
- *pm = acpi_pm_read_early();
+ *p = acpi_pm_read_early();
t2 = get_cycles();
if ((t2 - t1) < SMI_TRESHOLD)
return t2;
@@ -122,80 +122,390 @@ static u64 __init tsc_read_refs(u64 *pm, u64 *hpet)
return ULLONG_MAX;
}
-/**
- * native_calibrate_tsc - calibrate the tsc on boot
+/*
+ * Calculate the TSC frequency from HPET reference
*/
-unsigned long native_calibrate_tsc(void)
+static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2)
{
- unsigned long flags;
- u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2;
- int hpet = is_hpet_enabled();
- unsigned int tsc_khz_val = 0;
+ u64 tmp;
- local_irq_save(flags);
+ if (hpet2 < hpet1)
+ hpet2 += 0x100000000ULL;
+ hpet2 -= hpet1;
+ tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
+ do_div(tmp, 1000000);
+ do_div(deltatsc, tmp);
+
+ return (unsigned long) deltatsc;
+}
+
+/*
+ * Calculate the TSC frequency from PMTimer reference
+ */
+static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2)
+{
+ u64 tmp;
- tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL);
+ if (!pm1 && !pm2)
+ return ULONG_MAX;
+
+ if (pm2 < pm1)
+ pm2 += (u64)ACPI_PM_OVRRUN;
+ pm2 -= pm1;
+ tmp = pm2 * 1000000000LL;
+ do_div(tmp, PMTMR_TICKS_PER_SEC);
+ do_div(deltatsc, tmp);
+
+ return (unsigned long) deltatsc;
+}
+
+#define CAL_MS 10
+#define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS))
+#define CAL_PIT_LOOPS 1000
+
+#define CAL2_MS 50
+#define CAL2_LATCH (CLOCK_TICK_RATE / (1000 / CAL2_MS))
+#define CAL2_PIT_LOOPS 5000
+
+
+/*
+ * Try to calibrate the TSC against the Programmable
+ * Interrupt Timer and return the frequency of the TSC
+ * in kHz.
+ *
+ * Return ULONG_MAX on failure to calibrate.
+ */
+static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
+{
+ u64 tsc, t1, t2, delta;
+ unsigned long tscmin, tscmax;
+ int pitcnt;
+ /* Set the Gate high, disable speaker */
outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+ /*
+ * Setup CTC channel 2* for mode 0, (interrupt on terminal
+ * count mode), binary count. Set the latch register to 50ms
+ * (LSB then MSB) to begin countdown.
+ */
outb(0xb0, 0x43);
- outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
- outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42);
- tr1 = get_cycles();
- while ((inb(0x61) & 0x20) == 0);
- tr2 = get_cycles();
+ outb(latch & 0xff, 0x42);
+ outb(latch >> 8, 0x42);
+
+ tsc = t1 = t2 = get_cycles();
+
+ pitcnt = 0;
+ tscmax = 0;
+ tscmin = ULONG_MAX;
+ while ((inb(0x61) & 0x20) == 0) {
+ t2 = get_cycles();
+ delta = t2 - tsc;
+ tsc = t2;
+ if ((unsigned long) delta < tscmin)
+ tscmin = (unsigned int) delta;
+ if ((unsigned long) delta > tscmax)
+ tscmax = (unsigned int) delta;
+ pitcnt++;
+ }
+
+ /*
+ * Sanity checks:
+ *
+ * If we were not able to read the PIT more than loopmin
+ * times, then we have been hit by a massive SMI
+ *
+ * If the maximum is 10 times larger than the minimum,
+ * then we got hit by an SMI as well.
+ */
+ if (pitcnt < loopmin || tscmax > 10 * tscmin)
+ return ULONG_MAX;
+
+ /* Calculate the PIT value */
+ delta = t2 - t1;
+ do_div(delta, ms);
+ return delta;
+}
- tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL);
+/*
+ * This reads the current MSB of the PIT counter, and
+ * checks if we are running on sufficiently fast and
+ * non-virtualized hardware.
+ *
+ * Our expectations are:
+ *
+ * - the PIT is running at roughly 1.19MHz
+ *
+ * - each IO is going to take about 1us on real hardware,
+ * but we allow it to be much faster (by a factor of 10) or
+ * _slightly_ slower (ie we allow up to a 2us read+counter
+ * update - anything else implies a unacceptably slow CPU
+ * or PIT for the fast calibration to work.
+ *
+ * - with 256 PIT ticks to read the value, we have 214us to
+ * see the same MSB (and overhead like doing a single TSC
+ * read per MSB value etc).
+ *
+ * - We're doing 2 reads per loop (LSB, MSB), and we expect
+ * them each to take about a microsecond on real hardware.
+ * So we expect a count value of around 100. But we'll be
+ * generous, and accept anything over 50.
+ *
+ * - if the PIT is stuck, and we see *many* more reads, we
+ * return early (and the next caller of pit_expect_msb()
+ * then consider it a failure when they don't see the
+ * next expected value).
+ *
+ * These expectations mean that we know that we have seen the
+ * transition from one expected value to another with a fairly
+ * high accuracy, and we didn't miss any events. We can thus
+ * use the TSC value at the transitions to calculate a pretty
+ * good value for the TSC frequencty.
+ */
+static inline int pit_expect_msb(unsigned char val)
+{
+ int count = 0;
+ for (count = 0; count < 50000; count++) {
+ /* Ignore LSB */
+ inb(0x42);
+ if (inb(0x42) != val)
+ break;
+ }
+ return count > 50;
+}
+
+/*
+ * How many MSB values do we want to see? We aim for a
+ * 15ms calibration, which assuming a 2us counter read
+ * error should give us roughly 150 ppm precision for
+ * the calibration.
+ */
+#define QUICK_PIT_MS 15
+#define QUICK_PIT_ITERATIONS (QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
+
+static unsigned long quick_pit_calibrate(void)
+{
+ /* Set the Gate high, disable speaker */
+ outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+
+ /*
+ * Counter 2, mode 0 (one-shot), binary count
+ *
+ * NOTE! Mode 2 decrements by two (and then the
+ * output is flipped each time, giving the same
+ * final output frequency as a decrement-by-one),
+ * so mode 0 is much better when looking at the
+ * individual counts.
+ */
+ outb(0xb0, 0x43);
+
+ /* Start at 0xffff */
+ outb(0xff, 0x42);
+ outb(0xff, 0x42);
+
+ if (pit_expect_msb(0xff)) {
+ int i;
+ u64 t1, t2, delta;
+ unsigned char expect = 0xfe;
+
+ t1 = get_cycles();
+ for (i = 0; i < QUICK_PIT_ITERATIONS; i++, expect--) {
+ if (!pit_expect_msb(expect))
+ goto failed;
+ }
+ t2 = get_cycles();
+
+ /*
+ * Make sure we can rely on the second TSC timestamp:
+ */
+ if (!pit_expect_msb(expect))
+ goto failed;
+
+ /*
+ * Ok, if we get here, then we've seen the
+ * MSB of the PIT decrement QUICK_PIT_ITERATIONS
+ * times, and each MSB had many hits, so we never
+ * had any sudden jumps.
+ *
+ * As a result, we can depend on there not being
+ * any odd delays anywhere, and the TSC reads are
+ * reliable.
+ *
+ * kHz = ticks / time-in-seconds / 1000;
+ * kHz = (t2 - t1) / (QPI * 256 / PIT_TICK_RATE) / 1000
+ * kHz = ((t2 - t1) * PIT_TICK_RATE) / (QPI * 256 * 1000)
+ */
+ delta = (t2 - t1)*PIT_TICK_RATE;
+ do_div(delta, QUICK_PIT_ITERATIONS*256*1000);
+ printk("Fast TSC calibration using PIT\n");
+ return delta;
+ }
+failed:
+ return 0;
+}
+
+/**
+ * native_calibrate_tsc - calibrate the tsc on boot
+ */
+unsigned long native_calibrate_tsc(void)
+{
+ u64 tsc1, tsc2, delta, ref1, ref2;
+ unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
+ unsigned long flags, latch, ms, fast_calibrate;
+ int hpet = is_hpet_enabled(), i, loopmin;
+
+ local_irq_save(flags);
+ fast_calibrate = quick_pit_calibrate();
local_irq_restore(flags);
+ if (fast_calibrate)
+ return fast_calibrate;
/*
- * Preset the result with the raw and inaccurate PIT
- * calibration value
+ * Run 5 calibration loops to get the lowest frequency value
+ * (the best estimate). We use two different calibration modes
+ * here:
+ *
+ * 1) PIT loop. We set the PIT Channel 2 to oneshot mode and
+ * load a timeout of 50ms. We read the time right after we
+ * started the timer and wait until the PIT count down reaches
+ * zero. In each wait loop iteration we read the TSC and check
+ * the delta to the previous read. We keep track of the min
+ * and max values of that delta. The delta is mostly defined
+ * by the IO time of the PIT access, so we can detect when a
+ * SMI/SMM disturbance happend between the two reads. If the
+ * maximum time is significantly larger than the minimum time,
+ * then we discard the result and have another try.
+ *
+ * 2) Reference counter. If available we use the HPET or the
+ * PMTIMER as a reference to check the sanity of that value.
+ * We use separate TSC readouts and check inside of the
+ * reference read for a SMI/SMM disturbance. We dicard
+ * disturbed values here as well. We do that around the PIT
+ * calibration delay loop as we have to wait for a certain
+ * amount of time anyway.
*/
- delta = (tr2 - tr1);
- do_div(delta, 50);
- tsc_khz_val = delta;
-
- /* hpet or pmtimer available ? */
- if (!hpet && !pm1 && !pm2) {
- printk(KERN_INFO "TSC calibrated against PIT\n");
- goto out;
+
+ /* Preset PIT loop values */
+ latch = CAL_LATCH;
+ ms = CAL_MS;
+ loopmin = CAL_PIT_LOOPS;
+
+ for (i = 0; i < 3; i++) {
+ unsigned long tsc_pit_khz;
+
+ /*
+ * Read the start value and the reference count of
+ * hpet/pmtimer when available. Then do the PIT
+ * calibration, which will take at least 50ms, and
+ * read the end value.
+ */
+ local_irq_save(flags);
+ tsc1 = tsc_read_refs(&ref1, hpet);
+ tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin);
+ tsc2 = tsc_read_refs(&ref2, hpet);
+ local_irq_restore(flags);
+
+ /* Pick the lowest PIT TSC calibration so far */
+ tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);
+
+ /* hpet or pmtimer available ? */
+ if (!hpet && !ref1 && !ref2)
+ continue;
+
+ /* Check, whether the sampling was disturbed by an SMI */
+ if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX)
+ continue;
+
+ tsc2 = (tsc2 - tsc1) * 1000000LL;
+ if (hpet)
+ tsc2 = calc_hpet_ref(tsc2, ref1, ref2);
+ else
+ tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2);
+
+ tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2);
+
+ /* Check the reference deviation */
+ delta = ((u64) tsc_pit_min) * 100;
+ do_div(delta, tsc_ref_min);
+
+ /*
+ * If both calibration results are inside a 10% window
+ * then we can be sure, that the calibration
+ * succeeded. We break out of the loop right away. We
+ * use the reference value, as it is more precise.
+ */
+ if (delta >= 90 && delta <= 110) {
+ printk(KERN_INFO
+ "TSC: PIT calibration matches %s. %d loops\n",
+ hpet ? "HPET" : "PMTIMER", i + 1);
+ return tsc_ref_min;
+ }
+
+ /*
+ * Check whether PIT failed more than once. This
+ * happens in virtualized environments. We need to
+ * give the virtual PC a slightly longer timeframe for
+ * the HPET/PMTIMER to make the result precise.
+ */
+ if (i == 1 && tsc_pit_min == ULONG_MAX) {
+ latch = CAL2_LATCH;
+ ms = CAL2_MS;
+ loopmin = CAL2_PIT_LOOPS;
+ }
}
- /* Check, whether the sampling was disturbed by an SMI */
- if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) {
- printk(KERN_WARNING "TSC calibration disturbed by SMI, "
- "using PIT calibration result\n");
- goto out;
+ /*
+ * Now check the results.
+ */
+ if (tsc_pit_min == ULONG_MAX) {
+ /* PIT gave no useful value */
+ printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n");
+
+ /* We don't have an alternative source, disable TSC */
+ if (!hpet && !ref1 && !ref2) {
+ printk("TSC: No reference (HPET/PMTIMER) available\n");
+ return 0;
+ }
+
+ /* The alternative source failed as well, disable TSC */
+ if (tsc_ref_min == ULONG_MAX) {
+ printk(KERN_WARNING "TSC: HPET/PMTIMER calibration "
+ "failed.\n");
+ return 0;
+ }
+
+ /* Use the alternative source */
+ printk(KERN_INFO "TSC: using %s reference calibration\n",
+ hpet ? "HPET" : "PMTIMER");
+
+ return tsc_ref_min;
}
- tsc2 = (tsc2 - tsc1) * 1000000LL;
-
- if (hpet) {
- printk(KERN_INFO "TSC calibrated against HPET\n");
- if (hpet2 < hpet1)
- hpet2 += 0x100000000ULL;
- hpet2 -= hpet1;
- tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
- do_div(tsc1, 1000000);
- } else {
- printk(KERN_INFO "TSC calibrated against PM_TIMER\n");
- if (pm2 < pm1)
- pm2 += (u64)ACPI_PM_OVRRUN;
- pm2 -= pm1;
- tsc1 = pm2 * 1000000000LL;
- do_div(tsc1, PMTMR_TICKS_PER_SEC);
+ /* We don't have an alternative source, use the PIT calibration value */
+ if (!hpet && !ref1 && !ref2) {
+ printk(KERN_INFO "TSC: Using PIT calibration value\n");
+ return tsc_pit_min;
}
- do_div(tsc2, tsc1);
- tsc_khz_val = tsc2;
+ /* The alternative source failed, use the PIT calibration value */
+ if (tsc_ref_min == ULONG_MAX) {
+ printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. "
+ "Using PIT calibration\n");
+ return tsc_pit_min;
+ }
-out:
- return tsc_khz_val;
+ /*
+ * The calibration values differ too much. In doubt, we use
+ * the PIT value as we know that there are PMTIMERs around
+ * running at double speed. At least we let the user know:
+ */
+ printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n",
+ hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
+ printk(KERN_INFO "TSC: Using PIT calibration value\n");
+ return tsc_pit_min;
}
-
#ifdef CONFIG_X86_32
/* Only called from the Powernow K7 cpu freq driver */
int recalibrate_cpu_khz(void)
@@ -314,7 +624,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
mark_tsc_unstable("cpufreq changes");
}
- set_cyc2ns_scale(tsc_khz_ref, freq->cpu);
+ set_cyc2ns_scale(tsc_khz, freq->cpu);
return 0;
}
@@ -325,6 +635,10 @@ static struct notifier_block time_cpufreq_notifier_block = {
static int __init cpufreq_tsc(void)
{
+ if (!cpu_has_tsc)
+ return 0;
+ if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+ return 0;
cpufreq_register_notifier(&time_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
return 0;
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 0577825cf89..9ffb01c31c4 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -88,11 +88,9 @@ static __cpuinit void check_tsc_warp(void)
__raw_spin_unlock(&sync_lock);
}
}
- if (!(now-start)) {
- printk("Warning: zero tsc calibration delta: %Ld [max: %Ld]\n",
+ WARN(!(now-start),
+ "Warning: zero tsc calibration delta: %Ld [max: %Ld]\n",
now-start, end-start);
- WARN_ON(1);
- }
}
/*
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 41e01b145c4..61a97e616f7 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -25,45 +25,31 @@
#include <asm/visws/cobalt.h>
#include <asm/visws/piix4.h>
#include <asm/arch_hooks.h>
+#include <asm/io_apic.h>
#include <asm/fixmap.h>
#include <asm/reboot.h>
#include <asm/setup.h>
#include <asm/e820.h>
-#include <asm/smp.h>
#include <asm/io.h>
#include <mach_ipi.h>
#include "mach_apic.h"
-#include <linux/init.h>
-#include <linux/smp.h>
-
#include <linux/kernel_stat.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <asm/io.h>
-#include <asm/apic.h>
#include <asm/i8259.h>
#include <asm/irq_vectors.h>
-#include <asm/visws/cobalt.h>
#include <asm/visws/lithium.h>
-#include <asm/visws/piix4.h>
#include <linux/sched.h>
#include <linux/kernel.h>
-#include <linux/init.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
extern int no_broadcast;
-#include <asm/io.h>
#include <asm/apic.h>
-#include <asm/arch_hooks.h>
-#include <asm/visws/cobalt.h>
-#include <asm/visws/lithium.h>
char visws_board_type = -1;
char visws_board_rev = -1;
@@ -184,8 +170,6 @@ static int __init visws_get_smp_config(unsigned int early)
return 1;
}
-extern unsigned int __cpuinitdata maxcpus;
-
/*
* The Visual Workstation is Intel MP compliant in the hardware
* sense, but it doesn't have a BIOS(-configuration table).
@@ -244,8 +228,8 @@ static int __init visws_find_smp_config(unsigned int reserve)
ncpus = CO_CPU_MAX;
}
- if (ncpus > maxcpus)
- ncpus = maxcpus;
+ if (ncpus > setup_max_cpus)
+ ncpus = setup_max_cpus;
#ifdef CONFIG_X86_LOCAL_APIC
smp_found_config = 1;
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 61531d5c950..8b6c393ab9f 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -235,7 +235,7 @@ static void vmi_write_ldt_entry(struct desc_struct *dt, int entry,
const void *desc)
{
u32 *ldt_entry = (u32 *)desc;
- vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]);
+ vmi_ops.write_ldt_entry(dt, entry, ldt_entry[0], ldt_entry[1]);
}
static void vmi_load_sp0(struct tss_struct *tss,
@@ -393,13 +393,13 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
}
#endif
-static void vmi_allocate_pte(struct mm_struct *mm, u32 pfn)
+static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn)
{
vmi_set_page_type(pfn, VMI_PAGE_L1);
vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
}
-static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn)
+static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn)
{
/*
* This call comes in very early, before mem_map is setup.
@@ -410,20 +410,20 @@ static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn)
vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0);
}
-static void vmi_allocate_pmd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count)
+static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count)
{
vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE);
vmi_check_page_type(clonepfn, VMI_PAGE_L2);
vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count);
}
-static void vmi_release_pte(u32 pfn)
+static void vmi_release_pte(unsigned long pfn)
{
vmi_ops.release_page(pfn, VMI_PAGE_L1);
vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
}
-static void vmi_release_pmd(u32 pfn)
+static void vmi_release_pmd(unsigned long pfn)
{
vmi_ops.release_page(pfn, VMI_PAGE_L2);
vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index cdb2363697d..af5bdad8460 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -209,3 +209,11 @@ SECTIONS
DWARF_DEBUG
}
+
+#ifdef CONFIG_KEXEC
+/* Link time checks */
+#include <asm/kexec.h>
+
+ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
+ "kexec control code size is too big")
+#endif
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 0c029e8959c..7766d36983f 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -61,7 +61,7 @@ static void vsmp_irq_enable(void)
native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
}
-static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf,
+static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf,
unsigned long addr, unsigned len)
{
switch (type) {
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 0bfe2bd305e..3da2508eb22 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -711,6 +711,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
u64 *spte;
int young = 0;
+ /* always return old for EPT */
+ if (!shadow_accessed_mask)
+ return 0;
+
spte = rmap_next(kvm, rmapp, NULL);
while (spte) {
int _young;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index f72ac1fa35f..4a814bff21f 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -345,7 +345,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
shadow_addr = __pa(shadow_page->spt);
shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK
| PT_WRITABLE_MASK | PT_USER_MASK;
- *shadow_ent = shadow_pte;
+ set_shadow_pte(shadow_ent, shadow_pte);
}
mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e2ee264740c..8233b86c778 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -62,6 +62,7 @@ static int npt = 1;
module_param(npt, int, S_IRUGO);
static void kvm_reput_irq(struct vcpu_svm *svm);
+static void svm_flush_tlb(struct kvm_vcpu *vcpu);
static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
{
@@ -878,6 +879,10 @@ set:
static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE;
+ unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
+
+ if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
+ force_new_asid(vcpu);
vcpu->arch.cr4 = cr4;
if (!npt_enabled)
@@ -1027,6 +1032,13 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
KVMTRACE_3D(TDP_FAULT, &svm->vcpu, error_code,
(u32)fault_address, (u32)(fault_address >> 32),
handler);
+ /*
+ * FIXME: Tis shouldn't be necessary here, but there is a flush
+ * missing in the MMU code. Until we find this bug, flush the
+ * complete TLB here on an NPF
+ */
+ if (npt_enabled)
+ svm_flush_tlb(&svm->vcpu);
if (event_injection)
kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2a69773e3b2..7041cc52b56 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3301,8 +3301,7 @@ static int __init vmx_init(void)
kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
VMX_EPT_WRITABLE_MASK |
VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
- kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK,
- VMX_EPT_FAKE_DIRTY_MASK, 0ull,
+ kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
VMX_EPT_EXECUTABLE_MASK);
kvm_enable_tdp();
} else
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 425a13436b3..23e8373507a 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -370,8 +370,6 @@ enum vmcs_field {
#define VMX_EPT_READABLE_MASK 0x1ull
#define VMX_EPT_WRITABLE_MASK 0x2ull
#define VMX_EPT_EXECUTABLE_MASK 0x4ull
-#define VMX_EPT_FAKE_ACCESSED_MASK (1ull << 62)
-#define VMX_EPT_FAKE_DIRTY_MASK (1ull << 63)
#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
diff --git a/arch/x86/lib/msr-on-cpu.c b/arch/x86/lib/msr-on-cpu.c
index d5a2b39f882..321cf720dbb 100644
--- a/arch/x86/lib/msr-on-cpu.c
+++ b/arch/x86/lib/msr-on-cpu.c
@@ -16,36 +16,46 @@ static void __rdmsr_on_cpu(void *info)
rdmsr(rv->msr_no, rv->l, rv->h);
}
-static void __rdmsr_safe_on_cpu(void *info)
+static void __wrmsr_on_cpu(void *info)
{
struct msr_info *rv = info;
- rv->err = rdmsr_safe(rv->msr_no, &rv->l, &rv->h);
+ wrmsr(rv->msr_no, rv->l, rv->h);
}
-static int _rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h, int safe)
+int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
{
- int err = 0;
+ int err;
struct msr_info rv;
rv.msr_no = msr_no;
- if (safe) {
- smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
- err = rv.err;
- } else {
- smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
- }
+ err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
*l = rv.l;
*h = rv.h;
return err;
}
-static void __wrmsr_on_cpu(void *info)
+int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
+{
+ int err;
+ struct msr_info rv;
+
+ rv.msr_no = msr_no;
+ rv.l = l;
+ rv.h = h;
+ err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
+
+ return err;
+}
+
+/* These "safe" variants are slower and should be used when the target MSR
+ may not actually exist. */
+static void __rdmsr_safe_on_cpu(void *info)
{
struct msr_info *rv = info;
- wrmsr(rv->msr_no, rv->l, rv->h);
+ rv->err = rdmsr_safe(rv->msr_no, &rv->l, &rv->h);
}
static void __wrmsr_safe_on_cpu(void *info)
@@ -55,44 +65,30 @@ static void __wrmsr_safe_on_cpu(void *info)
rv->err = wrmsr_safe(rv->msr_no, rv->l, rv->h);
}
-static int _wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h, int safe)
+int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
{
- int err = 0;
+ int err;
struct msr_info rv;
rv.msr_no = msr_no;
- rv.l = l;
- rv.h = h;
- if (safe) {
- smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1);
- err = rv.err;
- } else {
- smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
- }
-
- return err;
-}
-
-void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
-{
- _wrmsr_on_cpu(cpu, msr_no, l, h, 0);
-}
+ err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
+ *l = rv.l;
+ *h = rv.h;
-void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
-{
- _rdmsr_on_cpu(cpu, msr_no, l, h, 0);
+ return err ? err : rv.err;
}
-/* These "safe" variants are slower and should be used when the target MSR
- may not actually exist. */
int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
{
- return _wrmsr_on_cpu(cpu, msr_no, l, h, 1);
-}
+ int err;
+ struct msr_info rv;
-int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
-{
- return _rdmsr_on_cpu(cpu, msr_no, l, h, 1);
+ rv.msr_no = msr_no;
+ rv.l = l;
+ rv.h = h;
+ err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1);
+
+ return err ? err : rv.err;
}
EXPORT_SYMBOL(rdmsr_on_cpu);
diff --git a/arch/x86/lib/string_32.c b/arch/x86/lib/string_32.c
index 94972e7c094..82004d2bf05 100644
--- a/arch/x86/lib/string_32.c
+++ b/arch/x86/lib/string_32.c
@@ -22,7 +22,7 @@ char *strcpy(char *dest, const char *src)
"testb %%al,%%al\n\t"
"jne 1b"
: "=&S" (d0), "=&D" (d1), "=&a" (d2)
- :"0" (src), "1" (dest) : "memory");
+ : "0" (src), "1" (dest) : "memory");
return dest;
}
EXPORT_SYMBOL(strcpy);
@@ -42,7 +42,7 @@ char *strncpy(char *dest, const char *src, size_t count)
"stosb\n"
"2:"
: "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
- :"0" (src), "1" (dest), "2" (count) : "memory");
+ : "0" (src), "1" (dest), "2" (count) : "memory");
return dest;
}
EXPORT_SYMBOL(strncpy);
@@ -60,7 +60,7 @@ char *strcat(char *dest, const char *src)
"testb %%al,%%al\n\t"
"jne 1b"
: "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
- : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu): "memory");
+ : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu) : "memory");
return dest;
}
EXPORT_SYMBOL(strcat);
@@ -105,9 +105,9 @@ int strcmp(const char *cs, const char *ct)
"2:\tsbbl %%eax,%%eax\n\t"
"orb $1,%%al\n"
"3:"
- :"=a" (res), "=&S" (d0), "=&D" (d1)
- :"1" (cs), "2" (ct)
- :"memory");
+ : "=a" (res), "=&S" (d0), "=&D" (d1)
+ : "1" (cs), "2" (ct)
+ : "memory");
return res;
}
EXPORT_SYMBOL(strcmp);
@@ -130,9 +130,9 @@ int strncmp(const char *cs, const char *ct, size_t count)
"3:\tsbbl %%eax,%%eax\n\t"
"orb $1,%%al\n"
"4:"
- :"=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
- :"1" (cs), "2" (ct), "3" (count)
- :"memory");
+ : "=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
+ : "1" (cs), "2" (ct), "3" (count)
+ : "memory");
return res;
}
EXPORT_SYMBOL(strncmp);
@@ -152,9 +152,9 @@ char *strchr(const char *s, int c)
"movl $1,%1\n"
"2:\tmovl %1,%0\n\t"
"decl %0"
- :"=a" (res), "=&S" (d0)
- :"1" (s), "0" (c)
- :"memory");
+ : "=a" (res), "=&S" (d0)
+ : "1" (s), "0" (c)
+ : "memory");
return res;
}
EXPORT_SYMBOL(strchr);
@@ -169,9 +169,9 @@ size_t strlen(const char *s)
"scasb\n\t"
"notl %0\n\t"
"decl %0"
- :"=c" (res), "=&D" (d0)
- :"1" (s), "a" (0), "0" (0xffffffffu)
- :"memory");
+ : "=c" (res), "=&D" (d0)
+ : "1" (s), "a" (0), "0" (0xffffffffu)
+ : "memory");
return res;
}
EXPORT_SYMBOL(strlen);
@@ -189,9 +189,9 @@ void *memchr(const void *cs, int c, size_t count)
"je 1f\n\t"
"movl $1,%0\n"
"1:\tdecl %0"
- :"=D" (res), "=&c" (d0)
- :"a" (c), "0" (cs), "1" (count)
- :"memory");
+ : "=D" (res), "=&c" (d0)
+ : "a" (c), "0" (cs), "1" (count)
+ : "memory");
return res;
}
EXPORT_SYMBOL(memchr);
@@ -228,9 +228,9 @@ size_t strnlen(const char *s, size_t count)
"cmpl $-1,%1\n\t"
"jne 1b\n"
"3:\tsubl %2,%0"
- :"=a" (res), "=&d" (d0)
- :"c" (s), "1" (count)
- :"memory");
+ : "=a" (res), "=&d" (d0)
+ : "c" (s), "1" (count)
+ : "memory");
return res;
}
EXPORT_SYMBOL(strnlen);
diff --git a/arch/x86/lib/strstr_32.c b/arch/x86/lib/strstr_32.c
index 42e8a50303f..8e2d55f754b 100644
--- a/arch/x86/lib/strstr_32.c
+++ b/arch/x86/lib/strstr_32.c
@@ -23,9 +23,9 @@ __asm__ __volatile__(
"jne 1b\n\t"
"xorl %%eax,%%eax\n\t"
"2:"
- :"=a" (__res), "=&c" (d0), "=&S" (d1)
- :"0" (0), "1" (0xffffffff), "2" (cs), "g" (ct)
- :"dx", "di");
+ : "=a" (__res), "=&c" (d0), "=&S" (d1)
+ : "0" (0), "1" (0xffffffff), "2" (cs), "g" (ct)
+ : "dx", "di");
return __res;
}
diff --git a/arch/x86/mach-rdc321x/platform.c b/arch/x86/mach-rdc321x/platform.c
index a037041817c..4f4e50c3ad3 100644
--- a/arch/x86/mach-rdc321x/platform.c
+++ b/arch/x86/mach-rdc321x/platform.c
@@ -25,7 +25,6 @@
#include <linux/list.h>
#include <linux/device.h>
#include <linux/platform_device.h>
-#include <linux/version.h>
#include <linux/leds.h>
#include <asm/gpio.h>
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 62fa440678d..847c164725f 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -328,7 +328,7 @@ void __init initmem_init(unsigned long start_pfn,
get_memcfg_numa();
- kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE);
+ kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE);
kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE);
do {
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index a20d1fa64b4..e7277cbcfb4 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -148,8 +148,8 @@ static void note_page(struct seq_file *m, struct pg_state *st,
* we have now. "break" is either changing perms, levels or
* address space marker.
*/
- prot = pgprot_val(new_prot) & ~(PTE_PFN_MASK);
- cur = pgprot_val(st->current_prot) & ~(PTE_PFN_MASK);
+ prot = pgprot_val(new_prot) & PTE_FLAGS_MASK;
+ cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK;
if (!st->level) {
/* First entry */
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 4974e97dedf..c3789bb1930 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -195,11 +195,30 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base,
pgd_t *pgd;
pmd_t *pmd;
pte_t *pte;
- unsigned pages_2m = 0, pages_4k = 0;
+ unsigned pages_2m, pages_4k;
+ int mapping_iter;
+
+ /*
+ * First iteration will setup identity mapping using large/small pages
+ * based on use_pse, with other attributes same as set by
+ * the early code in head_32.S
+ *
+ * Second iteration will setup the appropriate attributes (NX, GLOBAL..)
+ * as desired for the kernel identity mapping.
+ *
+ * This two pass mechanism conforms to the TLB app note which says:
+ *
+ * "Software should not write to a paging-structure entry in a way
+ * that would change, for any linear address, both the page size
+ * and either the page frame or attributes."
+ */
+ mapping_iter = 1;
if (!cpu_has_pse)
use_pse = 0;
+repeat:
+ pages_2m = pages_4k = 0;
pfn = start_pfn;
pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
pgd = pgd_base + pgd_idx;
@@ -225,6 +244,13 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base,
if (use_pse) {
unsigned int addr2;
pgprot_t prot = PAGE_KERNEL_LARGE;
+ /*
+ * first pass will use the same initial
+ * identity mapping attribute + _PAGE_PSE.
+ */
+ pgprot_t init_prot =
+ __pgprot(PTE_IDENT_ATTR |
+ _PAGE_PSE);
addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
PAGE_OFFSET + PAGE_SIZE-1;
@@ -234,7 +260,10 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base,
prot = PAGE_KERNEL_LARGE_EXEC;
pages_2m++;
- set_pmd(pmd, pfn_pmd(pfn, prot));
+ if (mapping_iter == 1)
+ set_pmd(pmd, pfn_pmd(pfn, init_prot));
+ else
+ set_pmd(pmd, pfn_pmd(pfn, prot));
pfn += PTRS_PER_PTE;
continue;
@@ -246,17 +275,43 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base,
for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn;
pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) {
pgprot_t prot = PAGE_KERNEL;
+ /*
+ * first pass will use the same initial
+ * identity mapping attribute.
+ */
+ pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR);
if (is_kernel_text(addr))
prot = PAGE_KERNEL_EXEC;
pages_4k++;
- set_pte(pte, pfn_pte(pfn, prot));
+ if (mapping_iter == 1)
+ set_pte(pte, pfn_pte(pfn, init_prot));
+ else
+ set_pte(pte, pfn_pte(pfn, prot));
}
}
}
- update_page_count(PG_LEVEL_2M, pages_2m);
- update_page_count(PG_LEVEL_4K, pages_4k);
+ if (mapping_iter == 1) {
+ /*
+ * update direct mapping page count only in the first
+ * iteration.
+ */
+ update_page_count(PG_LEVEL_2M, pages_2m);
+ update_page_count(PG_LEVEL_4K, pages_4k);
+
+ /*
+ * local global flush tlb, which will flush the previous
+ * mappings present in both small and large page TLB's.
+ */
+ __flush_tlb_all();
+
+ /*
+ * Second iteration will set the actual desired PTE attributes.
+ */
+ mapping_iter = 2;
+ goto repeat;
+ }
}
/*
@@ -459,11 +514,7 @@ static void __init pagetable_init(void)
{
pgd_t *pgd_base = swapper_pg_dir;
- paravirt_pagetable_setup_start(pgd_base);
-
permanent_kmaps_init(pgd_base);
-
- paravirt_pagetable_setup_done(pgd_base);
}
#ifdef CONFIG_ACPI_SLEEP
@@ -723,7 +774,7 @@ void __init setup_bootmem_allocator(void)
after_init_bootmem = 1;
}
-static void __init find_early_table_space(unsigned long end)
+static void __init find_early_table_space(unsigned long end, int use_pse)
{
unsigned long puds, pmds, ptes, tables, start;
@@ -733,7 +784,7 @@ static void __init find_early_table_space(unsigned long end)
pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
tables += PAGE_ALIGN(pmds * sizeof(pmd_t));
- if (cpu_has_pse) {
+ if (use_pse) {
unsigned long extra;
extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
@@ -773,12 +824,22 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
pgd_t *pgd_base = swapper_pg_dir;
unsigned long start_pfn, end_pfn;
unsigned long big_page_start;
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ /*
+ * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
+ * This will simplify cpa(), which otherwise needs to support splitting
+ * large pages into small in interrupt context, etc.
+ */
+ int use_pse = 0;
+#else
+ int use_pse = cpu_has_pse;
+#endif
/*
* Find space for the kernel direct mapping tables.
*/
if (!after_init_bootmem)
- find_early_table_space(end);
+ find_early_table_space(end, use_pse);
#ifdef CONFIG_X86_PAE
set_nx();
@@ -824,7 +885,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
if (start_pfn < end_pfn)
kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn,
- cpu_has_pse);
+ use_pse);
/* tail is not big page alignment ? */
start_pfn = end_pfn;
@@ -987,7 +1048,6 @@ void __init mem_init(void)
if (boot_cpu_data.wp_works_ok < 0)
test_wp_bit();
- cpa_init();
save_pg_dir();
zap_low_mappings();
}
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 129618ca0ea..fb30486c82f 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -60,7 +60,7 @@ static unsigned long dma_reserve __initdata;
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-int direct_gbpages __meminitdata
+int direct_gbpages
#ifdef CONFIG_DIRECT_GBPAGES
= 1
#endif
@@ -88,7 +88,11 @@ early_param("gbpages", parse_direct_gbpages_on);
int after_bootmem;
-static __init void *spp_getpage(void)
+/*
+ * NOTE: This function is marked __ref because it calls __init function
+ * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
+ */
+static __ref void *spp_getpage(void)
{
void *ptr;
@@ -221,7 +225,7 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
void __init cleanup_highmap(void)
{
unsigned long vaddr = __START_KERNEL_map;
- unsigned long end = round_up((unsigned long)_end, PMD_SIZE) - 1;
+ unsigned long end = roundup((unsigned long)_end, PMD_SIZE) - 1;
pmd_t *pmd = level2_kernel_pgt;
pmd_t *last_pmd = pmd + PTRS_PER_PMD;
@@ -237,7 +241,7 @@ static unsigned long __initdata table_start;
static unsigned long __meminitdata table_end;
static unsigned long __meminitdata table_top;
-static __meminit void *alloc_low_page(unsigned long *phys)
+static __ref void *alloc_low_page(unsigned long *phys)
{
unsigned long pfn = table_end++;
void *adr;
@@ -258,7 +262,7 @@ static __meminit void *alloc_low_page(unsigned long *phys)
return adr;
}
-static __meminit void unmap_low_page(void *adr)
+static __ref void unmap_low_page(void *adr)
{
if (after_bootmem)
return;
@@ -267,7 +271,8 @@ static __meminit void unmap_low_page(void *adr)
}
static unsigned long __meminit
-phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end)
+phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
+ pgprot_t prot)
{
unsigned pages = 0;
unsigned long last_map_addr = end;
@@ -285,32 +290,40 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end)
break;
}
+ /*
+ * We will re-use the existing mapping.
+ * Xen for example has some special requirements, like mapping
+ * pagetable pages as RO. So assume someone who pre-setup
+ * these mappings are more intelligent.
+ */
if (pte_val(*pte))
continue;
if (0)
printk(" pte=%p addr=%lx pte=%016lx\n",
pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte);
- set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL));
- last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
pages++;
+ set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, prot));
+ last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
}
+
update_page_count(PG_LEVEL_4K, pages);
return last_map_addr;
}
static unsigned long __meminit
-phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end)
+phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end,
+ pgprot_t prot)
{
pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
- return phys_pte_init(pte, address, end);
+ return phys_pte_init(pte, address, end, prot);
}
static unsigned long __meminit
phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
- unsigned long page_size_mask)
+ unsigned long page_size_mask, pgprot_t prot)
{
unsigned long pages = 0;
unsigned long last_map_addr = end;
@@ -321,6 +334,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
unsigned long pte_phys;
pmd_t *pmd = pmd_page + pmd_index(address);
pte_t *pte;
+ pgprot_t new_prot = prot;
if (address >= end) {
if (!after_bootmem) {
@@ -331,25 +345,48 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
}
if (pmd_val(*pmd)) {
- if (!pmd_large(*pmd))
+ if (!pmd_large(*pmd)) {
+ spin_lock(&init_mm.page_table_lock);
last_map_addr = phys_pte_update(pmd, address,
- end);
- continue;
+ end, prot);
+ spin_unlock(&init_mm.page_table_lock);
+ continue;
+ }
+ /*
+ * If we are ok with PG_LEVEL_2M mapping, then we will
+ * use the existing mapping,
+ *
+ * Otherwise, we will split the large page mapping but
+ * use the same existing protection bits except for
+ * large page, so that we don't violate Intel's TLB
+ * Application note (317080) which says, while changing
+ * the page sizes, new and old translations should
+ * not differ with respect to page frame and
+ * attributes.
+ */
+ if (page_size_mask & (1 << PG_LEVEL_2M))
+ continue;
+ new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
}
if (page_size_mask & (1<<PG_LEVEL_2M)) {
pages++;
+ spin_lock(&init_mm.page_table_lock);
set_pte((pte_t *)pmd,
- pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
+ pfn_pte(address >> PAGE_SHIFT,
+ __pgprot(pgprot_val(prot) | _PAGE_PSE)));
+ spin_unlock(&init_mm.page_table_lock);
last_map_addr = (address & PMD_MASK) + PMD_SIZE;
continue;
}
pte = alloc_low_page(&pte_phys);
- last_map_addr = phys_pte_init(pte, address, end);
+ last_map_addr = phys_pte_init(pte, address, end, new_prot);
unmap_low_page(pte);
+ spin_lock(&init_mm.page_table_lock);
pmd_populate_kernel(&init_mm, pmd, __va(pte_phys));
+ spin_unlock(&init_mm.page_table_lock);
}
update_page_count(PG_LEVEL_2M, pages);
return last_map_addr;
@@ -357,14 +394,12 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
static unsigned long __meminit
phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
- unsigned long page_size_mask)
+ unsigned long page_size_mask, pgprot_t prot)
{
pmd_t *pmd = pmd_offset(pud, 0);
unsigned long last_map_addr;
- spin_lock(&init_mm.page_table_lock);
- last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask);
- spin_unlock(&init_mm.page_table_lock);
+ last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask, prot);
__flush_tlb_all();
return last_map_addr;
}
@@ -381,6 +416,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
unsigned long pmd_phys;
pud_t *pud = pud_page + pud_index(addr);
pmd_t *pmd;
+ pgprot_t prot = PAGE_KERNEL;
if (addr >= end)
break;
@@ -392,30 +428,49 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
}
if (pud_val(*pud)) {
- if (!pud_large(*pud))
+ if (!pud_large(*pud)) {
last_map_addr = phys_pmd_update(pud, addr, end,
- page_size_mask);
- continue;
+ page_size_mask, prot);
+ continue;
+ }
+ /*
+ * If we are ok with PG_LEVEL_1G mapping, then we will
+ * use the existing mapping.
+ *
+ * Otherwise, we will split the gbpage mapping but use
+ * the same existing protection bits except for large
+ * page, so that we don't violate Intel's TLB
+ * Application note (317080) which says, while changing
+ * the page sizes, new and old translations should
+ * not differ with respect to page frame and
+ * attributes.
+ */
+ if (page_size_mask & (1 << PG_LEVEL_1G))
+ continue;
+ prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
}
if (page_size_mask & (1<<PG_LEVEL_1G)) {
pages++;
+ spin_lock(&init_mm.page_table_lock);
set_pte((pte_t *)pud,
pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
+ spin_unlock(&init_mm.page_table_lock);
last_map_addr = (addr & PUD_MASK) + PUD_SIZE;
continue;
}
pmd = alloc_low_page(&pmd_phys);
+ last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask,
+ prot);
+ unmap_low_page(pmd);
spin_lock(&init_mm.page_table_lock);
- last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask);
- unmap_low_page(pmd);
pud_populate(&init_mm, pud, __va(pmd_phys));
spin_unlock(&init_mm.page_table_lock);
-
}
__flush_tlb_all();
+
update_page_count(PG_LEVEL_1G, pages);
return last_map_addr;
@@ -432,27 +487,28 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
return phys_pud_init(pud, addr, end, page_size_mask);
}
-static void __init find_early_table_space(unsigned long end)
+static void __init find_early_table_space(unsigned long end, int use_pse,
+ int use_gbpages)
{
unsigned long puds, pmds, ptes, tables, start;
puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
- tables = round_up(puds * sizeof(pud_t), PAGE_SIZE);
- if (direct_gbpages) {
+ tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
+ if (use_gbpages) {
unsigned long extra;
extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
} else
pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
- tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
+ tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
- if (cpu_has_pse) {
+ if (use_pse) {
unsigned long extra;
extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
} else
ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
- tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE);
+ tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
/*
* RED-PEN putting page tables only on node 0 could
@@ -505,17 +561,16 @@ static unsigned long __init kernel_physical_mapping_init(unsigned long start,
continue;
}
- if (after_bootmem)
- pud = pud_offset(pgd, start & PGDIR_MASK);
- else
- pud = alloc_low_page(&pud_phys);
-
+ pud = alloc_low_page(&pud_phys);
last_map_addr = phys_pud_init(pud, __pa(start), __pa(next),
page_size_mask);
unmap_low_page(pud);
- pgd_populate(&init_mm, pgd_offset_k(start),
- __va(pud_phys));
+
+ spin_lock(&init_mm.page_table_lock);
+ pgd_populate(&init_mm, pgd, __va(pud_phys));
+ spin_unlock(&init_mm.page_table_lock);
}
+ __flush_tlb_all();
return last_map_addr;
}
@@ -559,6 +614,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
struct map_range mr[NR_RANGE_MR];
int nr_range, i;
+ int use_pse, use_gbpages;
printk(KERN_INFO "init_memory_mapping\n");
@@ -572,9 +628,21 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
if (!after_bootmem)
init_gbpages();
- if (direct_gbpages)
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ /*
+ * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
+ * This will simplify cpa(), which otherwise needs to support splitting
+ * large pages into small in interrupt context, etc.
+ */
+ use_pse = use_gbpages = 0;
+#else
+ use_pse = cpu_has_pse;
+ use_gbpages = direct_gbpages;
+#endif
+
+ if (use_gbpages)
page_size_mask |= 1 << PG_LEVEL_1G;
- if (cpu_has_pse)
+ if (use_pse)
page_size_mask |= 1 << PG_LEVEL_2M;
memset(mr, 0, sizeof(mr));
@@ -635,7 +703,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
(mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
if (!after_bootmem)
- find_early_table_space(end);
+ find_early_table_space(end, use_pse, use_gbpages);
for (i = 0; i < nr_range; i++)
last_map_addr = kernel_physical_mapping_init(
@@ -794,8 +862,6 @@ void __init mem_init(void)
reservedpages << (PAGE_SHIFT-10),
datasize >> 10,
initsize >> 10);
-
- cpa_init();
}
void free_init_pages(char *what, unsigned long begin, unsigned long end)
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index fba57be9af6..6ab3196d12b 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -83,6 +83,25 @@ int page_is_ram(unsigned long pagenr)
return 0;
}
+int pagerange_is_ram(unsigned long start, unsigned long end)
+{
+ int ram_page = 0, not_rampage = 0;
+ unsigned long page_nr;
+
+ for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
+ ++page_nr) {
+ if (page_is_ram(page_nr))
+ ram_page = 1;
+ else
+ not_rampage = 1;
+
+ if (ram_page == not_rampage)
+ return -1;
+ }
+
+ return ram_page;
+}
+
/*
* Fix up the linear direct mapping of the kernel to avoid cache attribute
* conflicts.
@@ -170,7 +189,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
phys_addr &= PAGE_MASK;
size = PAGE_ALIGN(last_addr+1) - phys_addr;
- retval = reserve_memtype(phys_addr, phys_addr + size,
+ retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
prot_val, &new_prot_val);
if (retval) {
pr_debug("Warning: reserve_memtype returned %d\n", retval);
@@ -553,13 +572,11 @@ static int __init check_early_ioremap_leak(void)
{
if (!early_ioremap_nested)
return 0;
-
- printk(KERN_WARNING
+ WARN(1, KERN_WARNING
"Debug warning: early ioremap leak of %d areas detected.\n",
- early_ioremap_nested);
+ early_ioremap_nested);
printk(KERN_WARNING
- "please boot with early_ioremap_debug and report the dmesg.\n");
- WARN_ON(1);
+ "please boot with early_ioremap_debug and report the dmesg.\n");
return 1;
}
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index e7397e108be..635b50e8558 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -430,7 +430,9 @@ static void enter_uniprocessor(void)
"may miss events.\n");
}
-static void leave_uniprocessor(void)
+/* __ref because leave_uniprocessor calls cpu_up which is __cpuinit,
+ but this whole function is ifdefed CONFIG_HOTPLUG_CPU */
+static void __ref leave_uniprocessor(void)
{
int cpu;
int err;
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index a4dd793d600..cebcbf152d4 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -79,7 +79,7 @@ static int __init allocate_cachealigned_memnodemap(void)
return 0;
addr = 0x8000;
- nodemap_size = round_up(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
+ nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
nodemap_addr = find_e820_area(addr, max_pfn<<PAGE_SHIFT,
nodemap_size, L1_CACHE_BYTES);
if (nodemap_addr == -1UL) {
@@ -176,10 +176,10 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size;
unsigned long bootmap_start, nodedata_phys;
void *bootmap;
- const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
+ const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
int nid;
- start = round_up(start, ZONE_ALIGN);
+ start = roundup(start, ZONE_ALIGN);
printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
start, end);
@@ -210,9 +210,9 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn);
nid = phys_to_nid(nodedata_phys);
if (nid == nodeid)
- bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
+ bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE);
else
- bootmap_start = round_up(start, PAGE_SIZE);
+ bootmap_start = roundup(start, PAGE_SIZE);
/*
* SMP_CACHE_BYTES could be enough, but init_bootmem_node like
* to use that to align to PAGE_SIZE
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 0dcd42eb94e..e1d10690921 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -32,7 +32,7 @@ enum {
GPS = (1<<30)
};
-#define PAGE_TESTBIT __pgprot(_PAGE_UNUSED1)
+#define PAGE_CPA_TEST __pgprot(_PAGE_CPA_TEST)
static int pte_testbit(pte_t pte)
{
@@ -118,6 +118,7 @@ static int pageattr_test(void)
unsigned int level;
int i, k;
int err;
+ unsigned long test_addr;
if (print)
printk(KERN_INFO "CPA self-test:\n");
@@ -172,7 +173,8 @@ static int pageattr_test(void)
continue;
}
- err = change_page_attr_set(addr[i], len[i], PAGE_TESTBIT);
+ test_addr = addr[i];
+ err = change_page_attr_set(&test_addr, len[i], PAGE_CPA_TEST, 0);
if (err < 0) {
printk(KERN_ERR "CPA %d failed %d\n", i, err);
failed++;
@@ -204,7 +206,8 @@ static int pageattr_test(void)
failed++;
continue;
}
- err = change_page_attr_clear(addr[i], len[i], PAGE_TESTBIT);
+ test_addr = addr[i];
+ err = change_page_attr_clear(&test_addr, len[i], PAGE_CPA_TEST, 0);
if (err < 0) {
printk(KERN_ERR "CPA reverting failed: %d\n", err);
failed++;
@@ -221,8 +224,7 @@ static int pageattr_test(void)
failed += print_split(&sc);
if (failed) {
- printk(KERN_ERR "NOT PASSED. Please report.\n");
- WARN_ON(1);
+ WARN(1, KERN_ERR "NOT PASSED. Please report.\n");
return -EINVAL;
} else {
if (print)
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 65c6e46bf05..a9ec89c3fbc 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -25,15 +25,27 @@
* The current flushing context - we pass it instead of 5 arguments:
*/
struct cpa_data {
- unsigned long vaddr;
+ unsigned long *vaddr;
pgprot_t mask_set;
pgprot_t mask_clr;
int numpages;
- int flushtlb;
+ int flags;
unsigned long pfn;
unsigned force_split : 1;
+ int curpage;
};
+/*
+ * Serialize cpa() (for !DEBUG_PAGEALLOC which uses large identity mappings)
+ * using cpa_lock. So that we don't allow any other cpu, with stale large tlb
+ * entries change the page attribute in parallel to some other cpu
+ * splitting a large page entry along with changing the attribute.
+ */
+static DEFINE_SPINLOCK(cpa_lock);
+
+#define CPA_FLUSHTLB 1
+#define CPA_ARRAY 2
+
#ifdef CONFIG_PROC_FS
static unsigned long direct_pages_count[PG_LEVEL_NUM];
@@ -55,13 +67,19 @@ static void split_page_count(int level)
int arch_report_meminfo(char *page)
{
- int n = sprintf(page, "DirectMap4k: %8lu\n"
- "DirectMap2M: %8lu\n",
- direct_pages_count[PG_LEVEL_4K],
- direct_pages_count[PG_LEVEL_2M]);
+ int n = sprintf(page, "DirectMap4k: %8lu kB\n",
+ direct_pages_count[PG_LEVEL_4K] << 2);
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+ n += sprintf(page + n, "DirectMap2M: %8lu kB\n",
+ direct_pages_count[PG_LEVEL_2M] << 11);
+#else
+ n += sprintf(page + n, "DirectMap4M: %8lu kB\n",
+ direct_pages_count[PG_LEVEL_2M] << 12);
+#endif
#ifdef CONFIG_X86_64
- n += sprintf(page + n, "DirectMap1G: %8lu\n",
- direct_pages_count[PG_LEVEL_1G]);
+ if (direct_gbpages)
+ n += sprintf(page + n, "DirectMap1G: %8lu kB\n",
+ direct_pages_count[PG_LEVEL_1G] << 20);
#endif
return n;
}
@@ -78,7 +96,7 @@ static inline unsigned long highmap_start_pfn(void)
static inline unsigned long highmap_end_pfn(void)
{
- return __pa(round_up((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
+ return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
}
#endif
@@ -184,6 +202,41 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
}
}
+static void cpa_flush_array(unsigned long *start, int numpages, int cache)
+{
+ unsigned int i, level;
+ unsigned long *addr;
+
+ BUG_ON(irqs_disabled());
+
+ on_each_cpu(__cpa_flush_range, NULL, 1);
+
+ if (!cache)
+ return;
+
+ /* 4M threshold */
+ if (numpages >= 1024) {
+ if (boot_cpu_data.x86_model >= 4)
+ wbinvd();
+ return;
+ }
+ /*
+ * We only need to flush on one CPU,
+ * clflush is a MESI-coherent instruction that
+ * will cause all other CPUs to flush the same
+ * cachelines:
+ */
+ for (i = 0, addr = start; i < numpages; i++, addr++) {
+ pte_t *pte = lookup_address(*addr, &level);
+
+ /*
+ * Only flush present addresses:
+ */
+ if (pte && (pte_val(*pte) & _PAGE_PRESENT))
+ clflush_cache_range((void *) *addr, PAGE_SIZE);
+ }
+}
+
/*
* Certain areas of memory on x86 require very specific protection flags,
* for example the BIOS area or kernel text. Callers don't always get this
@@ -392,7 +445,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
*/
new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
__set_pmd_pte(kpte, address, new_pte);
- cpa->flushtlb = 1;
+ cpa->flags |= CPA_FLUSHTLB;
do_split = 0;
}
@@ -402,84 +455,6 @@ out_unlock:
return do_split;
}
-static LIST_HEAD(page_pool);
-static unsigned long pool_size, pool_pages, pool_low;
-static unsigned long pool_used, pool_failed;
-
-static void cpa_fill_pool(struct page **ret)
-{
- gfp_t gfp = GFP_KERNEL;
- unsigned long flags;
- struct page *p;
-
- /*
- * Avoid recursion (on debug-pagealloc) and also signal
- * our priority to get to these pagetables:
- */
- if (current->flags & PF_MEMALLOC)
- return;
- current->flags |= PF_MEMALLOC;
-
- /*
- * Allocate atomically from atomic contexts:
- */
- if (in_atomic() || irqs_disabled() || debug_pagealloc)
- gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
-
- while (pool_pages < pool_size || (ret && !*ret)) {
- p = alloc_pages(gfp, 0);
- if (!p) {
- pool_failed++;
- break;
- }
- /*
- * If the call site needs a page right now, provide it:
- */
- if (ret && !*ret) {
- *ret = p;
- continue;
- }
- spin_lock_irqsave(&pgd_lock, flags);
- list_add(&p->lru, &page_pool);
- pool_pages++;
- spin_unlock_irqrestore(&pgd_lock, flags);
- }
-
- current->flags &= ~PF_MEMALLOC;
-}
-
-#define SHIFT_MB (20 - PAGE_SHIFT)
-#define ROUND_MB_GB ((1 << 10) - 1)
-#define SHIFT_MB_GB 10
-#define POOL_PAGES_PER_GB 16
-
-void __init cpa_init(void)
-{
- struct sysinfo si;
- unsigned long gb;
-
- si_meminfo(&si);
- /*
- * Calculate the number of pool pages:
- *
- * Convert totalram (nr of pages) to MiB and round to the next
- * GiB. Shift MiB to Gib and multiply the result by
- * POOL_PAGES_PER_GB:
- */
- if (debug_pagealloc) {
- gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
- pool_size = POOL_PAGES_PER_GB * gb;
- } else {
- pool_size = 1;
- }
- pool_low = pool_size;
-
- cpa_fill_pool(NULL);
- printk(KERN_DEBUG
- "CPA: page pool initialized %lu of %lu pages preallocated\n",
- pool_pages, pool_size);
-}
-
static int split_large_page(pte_t *kpte, unsigned long address)
{
unsigned long flags, pfn, pfninc = 1;
@@ -488,28 +463,15 @@ static int split_large_page(pte_t *kpte, unsigned long address)
pgprot_t ref_prot;
struct page *base;
- /*
- * Get a page from the pool. The pool list is protected by the
- * pgd_lock, which we have to take anyway for the split
- * operation:
- */
- spin_lock_irqsave(&pgd_lock, flags);
- if (list_empty(&page_pool)) {
- spin_unlock_irqrestore(&pgd_lock, flags);
- base = NULL;
- cpa_fill_pool(&base);
- if (!base)
- return -ENOMEM;
- spin_lock_irqsave(&pgd_lock, flags);
- } else {
- base = list_first_entry(&page_pool, struct page, lru);
- list_del(&base->lru);
- pool_pages--;
-
- if (pool_pages < pool_low)
- pool_low = pool_pages;
- }
+ if (!debug_pagealloc)
+ spin_unlock(&cpa_lock);
+ base = alloc_pages(GFP_KERNEL, 0);
+ if (!debug_pagealloc)
+ spin_lock(&cpa_lock);
+ if (!base)
+ return -ENOMEM;
+ spin_lock_irqsave(&pgd_lock, flags);
/*
* Check for races, another CPU might have split this page
* up for us already:
@@ -566,11 +528,8 @@ out_unlock:
* If we dropped out via the lookup_address check under
* pgd_lock then stick the page back into the pool:
*/
- if (base) {
- list_add(&base->lru, &page_pool);
- pool_pages++;
- } else
- pool_used++;
+ if (base)
+ __free_page(base);
spin_unlock_irqrestore(&pgd_lock, flags);
return 0;
@@ -578,11 +537,16 @@ out_unlock:
static int __change_page_attr(struct cpa_data *cpa, int primary)
{
- unsigned long address = cpa->vaddr;
+ unsigned long address;
int do_split, err;
unsigned int level;
pte_t *kpte, old_pte;
+ if (cpa->flags & CPA_ARRAY)
+ address = cpa->vaddr[cpa->curpage];
+ else
+ address = *cpa->vaddr;
+
repeat:
kpte = lookup_address(address, &level);
if (!kpte)
@@ -592,10 +556,9 @@ repeat:
if (!pte_val(old_pte)) {
if (!primary)
return 0;
- printk(KERN_WARNING "CPA: called for zero pte. "
+ WARN(1, KERN_WARNING "CPA: called for zero pte. "
"vaddr = %lx cpa->vaddr = %lx\n", address,
- cpa->vaddr);
- WARN_ON(1);
+ *cpa->vaddr);
return -EINVAL;
}
@@ -621,7 +584,7 @@ repeat:
*/
if (pte_val(old_pte) != pte_val(new_pte)) {
set_pte_atomic(kpte, new_pte);
- cpa->flushtlb = 1;
+ cpa->flags |= CPA_FLUSHTLB;
}
cpa->numpages = 1;
return 0;
@@ -645,7 +608,25 @@ repeat:
*/
err = split_large_page(kpte, address);
if (!err) {
- cpa->flushtlb = 1;
+ /*
+ * Do a global flush tlb after splitting the large page
+ * and before we do the actual change page attribute in the PTE.
+ *
+ * With out this, we violate the TLB application note, that says
+ * "The TLBs may contain both ordinary and large-page
+ * translations for a 4-KByte range of linear addresses. This
+ * may occur if software modifies the paging structures so that
+ * the page size used for the address range changes. If the two
+ * translations differ with respect to page frame or attributes
+ * (e.g., permissions), processor behavior is undefined and may
+ * be implementation-specific."
+ *
+ * We do this global tlb flush inside the cpa_lock, so that we
+ * don't allow any other cpu, with stale tlb entries change the
+ * page attribute in parallel, that also falls into the
+ * just split large page entry.
+ */
+ flush_tlb_all();
goto repeat;
}
@@ -658,6 +639,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
{
struct cpa_data alias_cpa;
int ret = 0;
+ unsigned long temp_cpa_vaddr, vaddr;
if (cpa->pfn >= max_pfn_mapped)
return 0;
@@ -670,16 +652,24 @@ static int cpa_process_alias(struct cpa_data *cpa)
* No need to redo, when the primary call touched the direct
* mapping already:
*/
- if (!(within(cpa->vaddr, PAGE_OFFSET,
+ if (cpa->flags & CPA_ARRAY)
+ vaddr = cpa->vaddr[cpa->curpage];
+ else
+ vaddr = *cpa->vaddr;
+
+ if (!(within(vaddr, PAGE_OFFSET,
PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT))
#ifdef CONFIG_X86_64
- || within(cpa->vaddr, PAGE_OFFSET + (1UL<<32),
+ || within(vaddr, PAGE_OFFSET + (1UL<<32),
PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))
#endif
)) {
alias_cpa = *cpa;
- alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
+ temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
+ alias_cpa.vaddr = &temp_cpa_vaddr;
+ alias_cpa.flags &= ~CPA_ARRAY;
+
ret = __change_page_attr_set_clr(&alias_cpa, 0);
}
@@ -691,7 +681,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
* No need to redo, when the primary call touched the high
* mapping already:
*/
- if (within(cpa->vaddr, (unsigned long) _text, (unsigned long) _end))
+ if (within(vaddr, (unsigned long) _text, (unsigned long) _end))
return 0;
/*
@@ -702,8 +692,9 @@ static int cpa_process_alias(struct cpa_data *cpa)
return 0;
alias_cpa = *cpa;
- alias_cpa.vaddr =
- (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
+ temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
+ alias_cpa.vaddr = &temp_cpa_vaddr;
+ alias_cpa.flags &= ~CPA_ARRAY;
/*
* The high mapping range is imprecise, so ignore the return value.
@@ -723,8 +714,15 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
* preservation check.
*/
cpa->numpages = numpages;
+ /* for array changes, we can't use large page */
+ if (cpa->flags & CPA_ARRAY)
+ cpa->numpages = 1;
+ if (!debug_pagealloc)
+ spin_lock(&cpa_lock);
ret = __change_page_attr(cpa, checkalias);
+ if (!debug_pagealloc)
+ spin_unlock(&cpa_lock);
if (ret)
return ret;
@@ -741,7 +739,11 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
*/
BUG_ON(cpa->numpages > numpages);
numpages -= cpa->numpages;
- cpa->vaddr += cpa->numpages * PAGE_SIZE;
+ if (cpa->flags & CPA_ARRAY)
+ cpa->curpage++;
+ else
+ *cpa->vaddr += cpa->numpages * PAGE_SIZE;
+
}
return 0;
}
@@ -752,9 +754,9 @@ static inline int cache_attr(pgprot_t attr)
(_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD);
}
-static int change_page_attr_set_clr(unsigned long addr, int numpages,
+static int change_page_attr_set_clr(unsigned long *addr, int numpages,
pgprot_t mask_set, pgprot_t mask_clr,
- int force_split)
+ int force_split, int array)
{
struct cpa_data cpa;
int ret, cache, checkalias;
@@ -769,21 +771,38 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
return 0;
/* Ensure we are PAGE_SIZE aligned */
- if (addr & ~PAGE_MASK) {
- addr &= PAGE_MASK;
- /*
- * People should not be passing in unaligned addresses:
- */
- WARN_ON_ONCE(1);
+ if (!array) {
+ if (*addr & ~PAGE_MASK) {
+ *addr &= PAGE_MASK;
+ /*
+ * People should not be passing in unaligned addresses:
+ */
+ WARN_ON_ONCE(1);
+ }
+ } else {
+ int i;
+ for (i = 0; i < numpages; i++) {
+ if (addr[i] & ~PAGE_MASK) {
+ addr[i] &= PAGE_MASK;
+ WARN_ON_ONCE(1);
+ }
+ }
}
+ /* Must avoid aliasing mappings in the highmem code */
+ kmap_flush_unused();
+
cpa.vaddr = addr;
cpa.numpages = numpages;
cpa.mask_set = mask_set;
cpa.mask_clr = mask_clr;
- cpa.flushtlb = 0;
+ cpa.flags = 0;
+ cpa.curpage = 0;
cpa.force_split = force_split;
+ if (array)
+ cpa.flags |= CPA_ARRAY;
+
/* No alias checking for _NX bit modifications */
checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
@@ -792,7 +811,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
/*
* Check whether we really changed something:
*/
- if (!cpa.flushtlb)
+ if (!(cpa.flags & CPA_FLUSHTLB))
goto out;
/*
@@ -807,27 +826,30 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
* error case we fall back to cpa_flush_all (which uses
* wbindv):
*/
- if (!ret && cpu_has_clflush)
- cpa_flush_range(addr, numpages, cache);
- else
+ if (!ret && cpu_has_clflush) {
+ if (cpa.flags & CPA_ARRAY)
+ cpa_flush_array(addr, numpages, cache);
+ else
+ cpa_flush_range(*addr, numpages, cache);
+ } else
cpa_flush_all(cache);
out:
- cpa_fill_pool(NULL);
-
return ret;
}
-static inline int change_page_attr_set(unsigned long addr, int numpages,
- pgprot_t mask)
+static inline int change_page_attr_set(unsigned long *addr, int numpages,
+ pgprot_t mask, int array)
{
- return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0);
+ return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0,
+ array);
}
-static inline int change_page_attr_clear(unsigned long addr, int numpages,
- pgprot_t mask)
+static inline int change_page_attr_clear(unsigned long *addr, int numpages,
+ pgprot_t mask, int array)
{
- return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0);
+ return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0,
+ array);
}
int _set_memory_uc(unsigned long addr, int numpages)
@@ -835,8 +857,8 @@ int _set_memory_uc(unsigned long addr, int numpages)
/*
* for now UC MINUS. see comments in ioremap_nocache()
*/
- return change_page_attr_set(addr, numpages,
- __pgprot(_PAGE_CACHE_UC_MINUS));
+ return change_page_attr_set(&addr, numpages,
+ __pgprot(_PAGE_CACHE_UC_MINUS), 0);
}
int set_memory_uc(unsigned long addr, int numpages)
@@ -844,7 +866,7 @@ int set_memory_uc(unsigned long addr, int numpages)
/*
* for now UC MINUS. see comments in ioremap_nocache()
*/
- if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
+ if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
_PAGE_CACHE_UC_MINUS, NULL))
return -EINVAL;
@@ -852,10 +874,48 @@ int set_memory_uc(unsigned long addr, int numpages)
}
EXPORT_SYMBOL(set_memory_uc);
+int set_memory_array_uc(unsigned long *addr, int addrinarray)
+{
+ unsigned long start;
+ unsigned long end;
+ int i;
+ /*
+ * for now UC MINUS. see comments in ioremap_nocache()
+ */
+ for (i = 0; i < addrinarray; i++) {
+ start = __pa(addr[i]);
+ for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
+ if (end != __pa(addr[i + 1]))
+ break;
+ i++;
+ }
+ if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
+ goto out;
+ }
+
+ return change_page_attr_set(addr, addrinarray,
+ __pgprot(_PAGE_CACHE_UC_MINUS), 1);
+out:
+ for (i = 0; i < addrinarray; i++) {
+ unsigned long tmp = __pa(addr[i]);
+
+ if (tmp == start)
+ break;
+ for (end = tmp + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
+ if (end != __pa(addr[i + 1]))
+ break;
+ i++;
+ }
+ free_memtype(tmp, end);
+ }
+ return -EINVAL;
+}
+EXPORT_SYMBOL(set_memory_array_uc);
+
int _set_memory_wc(unsigned long addr, int numpages)
{
- return change_page_attr_set(addr, numpages,
- __pgprot(_PAGE_CACHE_WC));
+ return change_page_attr_set(&addr, numpages,
+ __pgprot(_PAGE_CACHE_WC), 0);
}
int set_memory_wc(unsigned long addr, int numpages)
@@ -863,7 +923,7 @@ int set_memory_wc(unsigned long addr, int numpages)
if (!pat_enabled)
return set_memory_uc(addr, numpages);
- if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
+ if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
_PAGE_CACHE_WC, NULL))
return -EINVAL;
@@ -873,49 +933,71 @@ EXPORT_SYMBOL(set_memory_wc);
int _set_memory_wb(unsigned long addr, int numpages)
{
- return change_page_attr_clear(addr, numpages,
- __pgprot(_PAGE_CACHE_MASK));
+ return change_page_attr_clear(&addr, numpages,
+ __pgprot(_PAGE_CACHE_MASK), 0);
}
int set_memory_wb(unsigned long addr, int numpages)
{
- free_memtype(addr, addr + numpages * PAGE_SIZE);
+ free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
return _set_memory_wb(addr, numpages);
}
EXPORT_SYMBOL(set_memory_wb);
+int set_memory_array_wb(unsigned long *addr, int addrinarray)
+{
+ int i;
+
+ for (i = 0; i < addrinarray; i++) {
+ unsigned long start = __pa(addr[i]);
+ unsigned long end;
+
+ for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
+ if (end != __pa(addr[i + 1]))
+ break;
+ i++;
+ }
+ free_memtype(start, end);
+ }
+ return change_page_attr_clear(addr, addrinarray,
+ __pgprot(_PAGE_CACHE_MASK), 1);
+}
+EXPORT_SYMBOL(set_memory_array_wb);
+
int set_memory_x(unsigned long addr, int numpages)
{
- return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_NX));
+ return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
}
EXPORT_SYMBOL(set_memory_x);
int set_memory_nx(unsigned long addr, int numpages)
{
- return change_page_attr_set(addr, numpages, __pgprot(_PAGE_NX));
+ return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
}
EXPORT_SYMBOL(set_memory_nx);
int set_memory_ro(unsigned long addr, int numpages)
{
- return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW));
+ return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
}
+EXPORT_SYMBOL_GPL(set_memory_ro);
int set_memory_rw(unsigned long addr, int numpages)
{
- return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW));
+ return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
}
+EXPORT_SYMBOL_GPL(set_memory_rw);
int set_memory_np(unsigned long addr, int numpages)
{
- return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
+ return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
}
int set_memory_4k(unsigned long addr, int numpages)
{
- return change_page_attr_set_clr(addr, numpages, __pgprot(0),
- __pgprot(0), 1);
+ return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
+ __pgprot(0), 1, 0);
}
int set_pages_uc(struct page *page, int numpages)
@@ -968,22 +1050,38 @@ int set_pages_rw(struct page *page, int numpages)
static int __set_pages_p(struct page *page, int numpages)
{
- struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
+ unsigned long tempaddr = (unsigned long) page_address(page);
+ struct cpa_data cpa = { .vaddr = &tempaddr,
.numpages = numpages,
.mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
- .mask_clr = __pgprot(0)};
+ .mask_clr = __pgprot(0),
+ .flags = 0};
- return __change_page_attr_set_clr(&cpa, 1);
+ /*
+ * No alias checking needed for setting present flag. otherwise,
+ * we may need to break large pages for 64-bit kernel text
+ * mappings (this adds to complexity if we want to do this from
+ * atomic context especially). Let's keep it simple!
+ */
+ return __change_page_attr_set_clr(&cpa, 0);
}
static int __set_pages_np(struct page *page, int numpages)
{
- struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
+ unsigned long tempaddr = (unsigned long) page_address(page);
+ struct cpa_data cpa = { .vaddr = &tempaddr,
.numpages = numpages,
.mask_set = __pgprot(0),
- .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)};
+ .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
+ .flags = 0};
- return __change_page_attr_set_clr(&cpa, 1);
+ /*
+ * No alias checking needed for setting not present flag. otherwise,
+ * we may need to break large pages for 64-bit kernel text
+ * mappings (this adds to complexity if we want to do this from
+ * atomic context especially). Let's keep it simple!
+ */
+ return __change_page_attr_set_clr(&cpa, 0);
}
void kernel_map_pages(struct page *page, int numpages, int enable)
@@ -1003,11 +1101,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
/*
* The return value is ignored as the calls cannot fail.
- * Large pages are kept enabled at boot time, and are
- * split up quickly with DEBUG_PAGEALLOC. If a splitup
- * fails here (due to temporary memory shortage) no damage
- * is done because we just keep the largepage intact up
- * to the next attempt when it will likely be split up:
+ * Large pages for identity mappings are not used at boot time
+ * and hence no memory allocations during large page split.
*/
if (enable)
__set_pages_p(page, numpages);
@@ -1019,53 +1114,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
* but that can deadlock->flush only current cpu:
*/
__flush_tlb_all();
-
- /*
- * Try to refill the page pool here. We can do this only after
- * the tlb flush.
- */
- cpa_fill_pool(NULL);
}
-#ifdef CONFIG_DEBUG_FS
-static int dpa_show(struct seq_file *m, void *v)
-{
- seq_puts(m, "DEBUG_PAGEALLOC\n");
- seq_printf(m, "pool_size : %lu\n", pool_size);
- seq_printf(m, "pool_pages : %lu\n", pool_pages);
- seq_printf(m, "pool_low : %lu\n", pool_low);
- seq_printf(m, "pool_used : %lu\n", pool_used);
- seq_printf(m, "pool_failed : %lu\n", pool_failed);
-
- return 0;
-}
-
-static int dpa_open(struct inode *inode, struct file *filp)
-{
- return single_open(filp, dpa_show, NULL);
-}
-
-static const struct file_operations dpa_fops = {
- .open = dpa_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static int __init debug_pagealloc_proc_init(void)
-{
- struct dentry *de;
-
- de = debugfs_create_file("debug_pagealloc", 0600, NULL, NULL,
- &dpa_fops);
- if (!de)
- return -ENOMEM;
-
- return 0;
-}
-__initcall(debug_pagealloc_proc_init);
-#endif
-
#ifdef CONFIG_HIBERNATION
bool kernel_page_present(struct page *page)
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 2fe30916d4b..738fd0f2495 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -7,24 +7,24 @@
* Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen.
*/
-#include <linux/mm.h>
+#include <linux/seq_file.h>
+#include <linux/bootmem.h>
+#include <linux/debugfs.h>
#include <linux/kernel.h>
#include <linux/gfp.h>
+#include <linux/mm.h>
#include <linux/fs.h>
-#include <linux/bootmem.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-#include <asm/msr.h>
-#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
#include <asm/processor.h>
-#include <asm/page.h>
+#include <asm/tlbflush.h>
#include <asm/pgtable.h>
-#include <asm/pat.h>
-#include <asm/e820.h>
-#include <asm/cacheflush.h>
#include <asm/fcntl.h>
+#include <asm/e820.h>
#include <asm/mtrr.h>
+#include <asm/page.h>
+#include <asm/msr.h>
+#include <asm/pat.h>
#include <asm/io.h>
#ifdef CONFIG_X86_PAT
@@ -46,6 +46,7 @@ early_param("nopat", nopat);
static int debug_enable;
+
static int __init pat_debug_setup(char *str)
{
debug_enable = 1;
@@ -145,14 +146,14 @@ static char *cattr_name(unsigned long flags)
*/
struct memtype {
- u64 start;
- u64 end;
- unsigned long type;
- struct list_head nd;
+ u64 start;
+ u64 end;
+ unsigned long type;
+ struct list_head nd;
};
static LIST_HEAD(memtype_list);
-static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */
+static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */
/*
* Does intersection of PAT memory type and MTRR memory type and returns
@@ -180,8 +181,8 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type)
return req_type;
}
-static int chk_conflict(struct memtype *new, struct memtype *entry,
- unsigned long *type)
+static int
+chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type)
{
if (new->type != entry->type) {
if (type) {
@@ -207,6 +208,69 @@ static int chk_conflict(struct memtype *new, struct memtype *entry,
return -EBUSY;
}
+static struct memtype *cached_entry;
+static u64 cached_start;
+
+/*
+ * For RAM pages, mark the pages as non WB memory type using
+ * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or
+ * set_memory_wc() on a RAM page at a time before marking it as WB again.
+ * This is ok, because only one driver will be owning the page and
+ * doing set_memory_*() calls.
+ *
+ * For now, we use PageNonWB to track that the RAM page is being mapped
+ * as non WB. In future, we will have to use one more flag
+ * (or some other mechanism in page_struct) to distinguish between
+ * UC and WC mapping.
+ */
+static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type,
+ unsigned long *new_type)
+{
+ struct page *page;
+ u64 pfn, end_pfn;
+
+ for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
+ page = pfn_to_page(pfn);
+ if (page_mapped(page) || PageNonWB(page))
+ goto out;
+
+ SetPageNonWB(page);
+ }
+ return 0;
+
+out:
+ end_pfn = pfn;
+ for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) {
+ page = pfn_to_page(pfn);
+ ClearPageNonWB(page);
+ }
+
+ return -EINVAL;
+}
+
+static int free_ram_pages_type(u64 start, u64 end)
+{
+ struct page *page;
+ u64 pfn, end_pfn;
+
+ for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
+ page = pfn_to_page(pfn);
+ if (page_mapped(page) || !PageNonWB(page))
+ goto out;
+
+ ClearPageNonWB(page);
+ }
+ return 0;
+
+out:
+ end_pfn = pfn;
+ for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) {
+ page = pfn_to_page(pfn);
+ SetPageNonWB(page);
+ }
+ return -EINVAL;
+}
+
/*
* req_type typically has one of the:
* - _PAGE_CACHE_WB
@@ -223,14 +287,15 @@ static int chk_conflict(struct memtype *new, struct memtype *entry,
* it will return a negative return value.
*/
int reserve_memtype(u64 start, u64 end, unsigned long req_type,
- unsigned long *new_type)
+ unsigned long *new_type)
{
struct memtype *new, *entry;
unsigned long actual_type;
struct list_head *where;
+ int is_range_ram;
int err = 0;
- BUG_ON(start >= end); /* end is exclusive */
+ BUG_ON(start >= end); /* end is exclusive */
if (!pat_enabled) {
/* This is identical to page table setting without PAT */
@@ -263,28 +328,41 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
actual_type = _PAGE_CACHE_WB;
else
actual_type = _PAGE_CACHE_UC_MINUS;
- } else
+ } else {
actual_type = pat_x_mtrr_type(start, end,
req_type & _PAGE_CACHE_MASK);
+ }
+
+ is_range_ram = pagerange_is_ram(start, end);
+ if (is_range_ram == 1)
+ return reserve_ram_pages_type(start, end, req_type, new_type);
+ else if (is_range_ram < 0)
+ return -EINVAL;
new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
if (!new)
return -ENOMEM;
- new->start = start;
- new->end = end;
- new->type = actual_type;
+ new->start = start;
+ new->end = end;
+ new->type = actual_type;
if (new_type)
*new_type = actual_type;
spin_lock(&memtype_lock);
+ if (cached_entry && start >= cached_start)
+ entry = cached_entry;
+ else
+ entry = list_entry(&memtype_list, struct memtype, nd);
+
/* Search for existing mapping that overlaps the current range */
where = NULL;
- list_for_each_entry(entry, &memtype_list, nd) {
+ list_for_each_entry_continue(entry, &memtype_list, nd) {
if (end <= entry->start) {
where = entry->nd.prev;
+ cached_entry = list_entry(where, struct memtype, nd);
break;
} else if (start <= entry->start) { /* end > entry->start */
err = chk_conflict(new, entry, new_type);
@@ -292,6 +370,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
dprintk("Overlap at 0x%Lx-0x%Lx\n",
entry->start, entry->end);
where = entry->nd.prev;
+ cached_entry = list_entry(where,
+ struct memtype, nd);
}
break;
} else if (start < entry->end) { /* start > entry->start */
@@ -299,7 +379,20 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
if (!err) {
dprintk("Overlap at 0x%Lx-0x%Lx\n",
entry->start, entry->end);
- where = &entry->nd;
+ cached_entry = list_entry(entry->nd.prev,
+ struct memtype, nd);
+
+ /*
+ * Move to right position in the linked
+ * list to add this new entry
+ */
+ list_for_each_entry_continue(entry,
+ &memtype_list, nd) {
+ if (start <= entry->start) {
+ where = entry->nd.prev;
+ break;
+ }
+ }
}
break;
}
@@ -311,9 +404,12 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
start, end, cattr_name(new->type), cattr_name(req_type));
kfree(new);
spin_unlock(&memtype_lock);
+
return err;
}
+ cached_start = start;
+
if (where)
list_add(&new->nd, where);
else
@@ -332,6 +428,7 @@ int free_memtype(u64 start, u64 end)
{
struct memtype *entry;
int err = -EINVAL;
+ int is_range_ram;
if (!pat_enabled)
return 0;
@@ -340,9 +437,18 @@ int free_memtype(u64 start, u64 end)
if (is_ISA_range(start, end - 1))
return 0;
+ is_range_ram = pagerange_is_ram(start, end);
+ if (is_range_ram == 1)
+ return free_ram_pages_type(start, end);
+ else if (is_range_ram < 0)
+ return -EINVAL;
+
spin_lock(&memtype_lock);
list_for_each_entry(entry, &memtype_list, nd) {
if (entry->start == start && entry->end == end) {
+ if (cached_entry == entry || cached_start == start)
+ cached_entry = NULL;
+
list_del(&entry->nd);
kfree(entry);
err = 0;
@@ -357,18 +463,11 @@ int free_memtype(u64 start, u64 end)
}
dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end);
+
return err;
}
-/*
- * /dev/mem mmap interface. The memtype used for mapping varies:
- * - Use UC for mappings with O_SYNC flag
- * - Without O_SYNC flag, if there is any conflict in reserve_memtype,
- * inherit the memtype from existing mapping.
- * - Else use UC_MINUS memtype (for backward compatibility with existing
- * X drivers.
- */
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot)
{
@@ -406,14 +505,14 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t *vma_prot)
{
u64 offset = ((u64) pfn) << PAGE_SHIFT;
- unsigned long flags = _PAGE_CACHE_UC_MINUS;
+ unsigned long flags = -1;
int retval;
if (!range_is_allowed(pfn, size))
return 0;
if (file->f_flags & O_SYNC) {
- flags = _PAGE_CACHE_UC;
+ flags = _PAGE_CACHE_UC_MINUS;
}
#ifdef CONFIG_X86_32
@@ -436,13 +535,14 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
#endif
/*
- * With O_SYNC, we can only take UC mapping. Fail if we cannot.
+ * With O_SYNC, we can only take UC_MINUS mapping. Fail if we cannot.
+ *
* Without O_SYNC, we want to get
* - WB for WB-able memory and no other conflicting mappings
* - UC_MINUS for non-WB-able memory with no other conflicting mappings
* - Inherit from confliting mappings otherwise
*/
- if (flags != _PAGE_CACHE_UC_MINUS) {
+ if (flags != -1) {
retval = reserve_memtype(offset, offset + size, flags, NULL);
} else {
retval = reserve_memtype(offset, offset + size, -1, &flags);
@@ -470,9 +570,9 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
{
+ unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
u64 addr = (u64)pfn << PAGE_SHIFT;
unsigned long flags;
- unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
reserve_memtype(addr, addr + size, want_flags, &flags);
if (flags != want_flags) {
@@ -492,7 +592,7 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
free_memtype(addr, addr + size);
}
-#if defined(CONFIG_DEBUG_FS)
+#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
/* get Nth element of the linked list */
static struct memtype *memtype_get_idx(loff_t pos)
@@ -515,6 +615,7 @@ static struct memtype *memtype_get_idx(loff_t pos)
}
spin_unlock(&memtype_lock);
kfree(print_entry);
+
return NULL;
}
@@ -545,6 +646,7 @@ static int memtype_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type),
print_entry->start, print_entry->end);
kfree(print_entry);
+
return 0;
}
@@ -576,4 +678,4 @@ static int __init pat_memtype_list_init(void)
late_initcall(pat_memtype_list_init);
-#endif /* CONFIG_DEBUG_FS */
+#endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index d50302774fe..86f2ffc43c3 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -63,10 +63,8 @@ static inline void pgd_list_del(pgd_t *pgd)
#define UNSHARED_PTRS_PER_PGD \
(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
-static void pgd_ctor(void *p)
+static void pgd_ctor(pgd_t *pgd)
{
- pgd_t *pgd = p;
-
/* If the pgd points to a shared pagetable level (either the
ptes in non-PAE, or shared PMD in PAE), then just copy the
references from swapper_pg_dir. */
@@ -87,7 +85,7 @@ static void pgd_ctor(void *p)
pgd_list_add(pgd);
}
-static void pgd_dtor(void *pgd)
+static void pgd_dtor(pgd_t *pgd)
{
unsigned long flags; /* can be called from interrupt context */
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index cab0abbd1eb..0951db9ee51 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -123,7 +123,8 @@ static int __init parse_vmalloc(char *arg)
if (!arg)
return -EINVAL;
- __VMALLOC_RESERVE = memparse(arg, &arg);
+ /* Add VMALLOC_OFFSET to the parsed value due to vm area guard hole*/
+ __VMALLOC_RESERVE = memparse(arg, &arg) + VMALLOC_OFFSET;
return 0;
}
early_param("vmalloc", parse_vmalloc);
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index 1eb2973a301..16ae70fc57e 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -178,7 +178,7 @@ void acpi_numa_arch_fixup(void)
* start of the node, and that the current "end" address is after
* the previous one.
*/
-static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk)
+static __init int node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk)
{
/*
* Only add present memory as told by the e820.
@@ -189,10 +189,10 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c
if (memory_chunk->start_pfn >= max_pfn) {
printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n",
memory_chunk->start_pfn, memory_chunk->end_pfn);
- return;
+ return -1;
}
if (memory_chunk->nid != nid)
- return;
+ return -1;
if (!node_has_online_mem(nid))
node_start_pfn[nid] = memory_chunk->start_pfn;
@@ -202,6 +202,8 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c
if (node_end_pfn[nid] < memory_chunk->end_pfn)
node_end_pfn[nid] = memory_chunk->end_pfn;
+
+ return 0;
}
int __init get_memcfg_from_srat(void)
@@ -259,7 +261,9 @@ int __init get_memcfg_from_srat(void)
printk(KERN_DEBUG
"chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
- node_read_chunk(chunk->nid, chunk);
+ if (node_read_chunk(chunk->nid, chunk))
+ continue;
+
e820_register_active_regions(chunk->nid, chunk->start_pfn,
min(chunk->end_pfn, max_pfn));
}
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 3f90289410e..8a5f1614a3d 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -15,6 +15,7 @@
#include <linux/slab.h>
#include <linux/moduleparam.h>
#include <linux/kdebug.h>
+#include <linux/cpu.h>
#include <asm/nmi.h>
#include <asm/msr.h>
#include <asm/apic.h>
@@ -28,23 +29,48 @@ static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
static int nmi_start(void);
static void nmi_stop(void);
+static void nmi_cpu_start(void *dummy);
+static void nmi_cpu_stop(void *dummy);
/* 0 == registered but off, 1 == registered and on */
static int nmi_enabled = 0;
+#ifdef CONFIG_SMP
+static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
+ void *data)
+{
+ int cpu = (unsigned long)data;
+ switch (action) {
+ case CPU_DOWN_FAILED:
+ case CPU_ONLINE:
+ smp_call_function_single(cpu, nmi_cpu_start, NULL, 0);
+ break;
+ case CPU_DOWN_PREPARE:
+ smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block oprofile_cpu_nb = {
+ .notifier_call = oprofile_cpu_notifier
+};
+#endif
+
#ifdef CONFIG_PM
static int nmi_suspend(struct sys_device *dev, pm_message_t state)
{
+ /* Only one CPU left, just stop that one */
if (nmi_enabled == 1)
- nmi_stop();
+ nmi_cpu_stop(NULL);
return 0;
}
static int nmi_resume(struct sys_device *dev)
{
if (nmi_enabled == 1)
- nmi_start();
+ nmi_cpu_start(NULL);
return 0;
}
@@ -269,10 +295,12 @@ static void nmi_cpu_shutdown(void *dummy)
static void nmi_shutdown(void)
{
- struct op_msrs *msrs = &get_cpu_var(cpu_msrs);
+ struct op_msrs *msrs;
+
nmi_enabled = 0;
on_each_cpu(nmi_cpu_shutdown, NULL, 1);
unregister_die_notifier(&profile_exceptions_nb);
+ msrs = &get_cpu_var(cpu_msrs);
model->shutdown(msrs);
free_msrs();
put_cpu_var(cpu_msrs);
@@ -463,6 +491,9 @@ int __init op_nmi_init(struct oprofile_operations *ops)
}
init_sysfs();
+#ifdef CONFIG_SMP
+ register_cpu_notifier(&oprofile_cpu_nb);
+#endif
using_nmi = 1;
ops->create_files = nmi_create_files;
ops->setup = nmi_setup;
@@ -476,6 +507,10 @@ int __init op_nmi_init(struct oprofile_operations *ops)
void op_nmi_exit(void)
{
- if (using_nmi)
+ if (using_nmi) {
exit_sysfs();
+#ifdef CONFIG_SMP
+ unregister_cpu_notifier(&oprofile_cpu_nb);
+#endif
+ }
}
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 56b4757a1f4..43ac5af338d 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -10,11 +10,12 @@
#include <linux/oprofile.h>
#include <linux/smp.h>
+#include <linux/ptrace.h>
+#include <linux/nmi.h>
#include <asm/msr.h>
-#include <asm/ptrace.h>
#include <asm/fixmap.h>
#include <asm/apic.h>
-#include <asm/nmi.h>
+
#include "op_x86_model.h"
#include "op_counter.h"
@@ -40,7 +41,7 @@ static unsigned int num_controls = NUM_CONTROLS_NON_HT;
static inline void setup_num_counters(void)
{
#ifdef CONFIG_SMP
- if (smp_num_siblings == 2){
+ if (smp_num_siblings == 2) {
num_counters = NUM_COUNTERS_HT2;
num_controls = NUM_CONTROLS_HT2;
}
@@ -86,7 +87,7 @@ struct p4_event_binding {
#define CTR_FLAME_2 (1 << 6)
#define CTR_IQ_5 (1 << 7)
-static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
+static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
{ CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 },
{ CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 },
{ CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
@@ -97,32 +98,32 @@ static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
{ CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 }
};
-#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
+#define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT)
/* p4 event codes in libop/op_event.h are indices into this table. */
static struct p4_event_binding p4_events[NUM_EVENTS] = {
-
+
{ /* BRANCH_RETIRED */
- 0x05, 0x06,
+ 0x05, 0x06,
{ {CTR_IQ_4, MSR_P4_CRU_ESCR2},
{CTR_IQ_5, MSR_P4_CRU_ESCR3} }
},
-
+
{ /* MISPRED_BRANCH_RETIRED */
- 0x04, 0x03,
+ 0x04, 0x03,
{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
{ CTR_IQ_5, MSR_P4_CRU_ESCR1} }
},
-
+
{ /* TC_DELIVER_MODE */
0x01, 0x01,
- { { CTR_MS_0, MSR_P4_TC_ESCR0},
+ { { CTR_MS_0, MSR_P4_TC_ESCR0},
{ CTR_MS_2, MSR_P4_TC_ESCR1} }
},
-
+
{ /* BPU_FETCH_REQUEST */
- 0x00, 0x03,
+ 0x00, 0x03,
{ { CTR_BPU_0, MSR_P4_BPU_ESCR0},
{ CTR_BPU_2, MSR_P4_BPU_ESCR1} }
},
@@ -146,7 +147,7 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
},
{ /* LOAD_PORT_REPLAY */
- 0x02, 0x04,
+ 0x02, 0x04,
{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
{ CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
},
@@ -170,43 +171,43 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
},
{ /* BSQ_CACHE_REFERENCE */
- 0x07, 0x0c,
+ 0x07, 0x0c,
{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
{ CTR_BPU_2, MSR_P4_BSU_ESCR1} }
},
{ /* IOQ_ALLOCATION */
- 0x06, 0x03,
+ 0x06, 0x03,
{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
{ 0, 0 } }
},
{ /* IOQ_ACTIVE_ENTRIES */
- 0x06, 0x1a,
+ 0x06, 0x1a,
{ { CTR_BPU_2, MSR_P4_FSB_ESCR1},
{ 0, 0 } }
},
{ /* FSB_DATA_ACTIVITY */
- 0x06, 0x17,
+ 0x06, 0x17,
{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
{ CTR_BPU_2, MSR_P4_FSB_ESCR1} }
},
{ /* BSQ_ALLOCATION */
- 0x07, 0x05,
+ 0x07, 0x05,
{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
{ 0, 0 } }
},
{ /* BSQ_ACTIVE_ENTRIES */
0x07, 0x06,
- { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
+ { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
{ 0, 0 } }
},
{ /* X87_ASSIST */
- 0x05, 0x03,
+ 0x05, 0x03,
{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
{ CTR_IQ_5, MSR_P4_CRU_ESCR3} }
},
@@ -216,21 +217,21 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
},
-
+
{ /* PACKED_SP_UOP */
- 0x01, 0x08,
+ 0x01, 0x08,
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
},
-
+
{ /* PACKED_DP_UOP */
- 0x01, 0x0c,
+ 0x01, 0x0c,
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
},
{ /* SCALAR_SP_UOP */
- 0x01, 0x0a,
+ 0x01, 0x0a,
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
},
@@ -242,31 +243,31 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
},
{ /* 64BIT_MMX_UOP */
- 0x01, 0x02,
+ 0x01, 0x02,
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
},
-
+
{ /* 128BIT_MMX_UOP */
- 0x01, 0x1a,
+ 0x01, 0x1a,
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
},
{ /* X87_FP_UOP */
- 0x01, 0x04,
+ 0x01, 0x04,
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
},
-
+
{ /* X87_SIMD_MOVES_UOP */
- 0x01, 0x2e,
+ 0x01, 0x2e,
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
},
-
+
{ /* MACHINE_CLEAR */
- 0x05, 0x02,
+ 0x05, 0x02,
{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
{ CTR_IQ_5, MSR_P4_CRU_ESCR3} }
},
@@ -276,9 +277,9 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
{ CTR_BPU_2, MSR_P4_FSB_ESCR1} }
},
-
+
{ /* TC_MS_XFER */
- 0x00, 0x05,
+ 0x00, 0x05,
{ { CTR_MS_0, MSR_P4_MS_ESCR0},
{ CTR_MS_2, MSR_P4_MS_ESCR1} }
},
@@ -308,7 +309,7 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
},
{ /* INSTR_RETIRED */
- 0x04, 0x02,
+ 0x04, 0x02,
{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
{ CTR_IQ_5, MSR_P4_CRU_ESCR1} }
},
@@ -319,14 +320,14 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
{ CTR_IQ_5, MSR_P4_CRU_ESCR1} }
},
- { /* UOP_TYPE */
- 0x02, 0x02,
+ { /* UOP_TYPE */
+ 0x02, 0x02,
{ { CTR_IQ_4, MSR_P4_RAT_ESCR0},
{ CTR_IQ_5, MSR_P4_RAT_ESCR1} }
},
{ /* RETIRED_MISPRED_BRANCH_TYPE */
- 0x02, 0x05,
+ 0x02, 0x05,
{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
{ CTR_MS_2, MSR_P4_TBPU_ESCR1} }
},
@@ -349,8 +350,8 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
-#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
-#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
+#define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
+#define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
#define CCCR_RESERVED_BITS 0x38030FFF
#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
@@ -360,15 +361,15 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
-#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
-#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
+#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
+#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
-#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
-#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
-#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
-#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
+#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
+#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
+#define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0)
+#define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0)
#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
@@ -380,7 +381,7 @@ static unsigned int get_stagger(void)
#ifdef CONFIG_SMP
int cpu = smp_processor_id();
return (cpu != first_cpu(per_cpu(cpu_sibling_map, cpu)));
-#endif
+#endif
return 0;
}
@@ -395,25 +396,23 @@ static unsigned long reset_value[NUM_COUNTERS_NON_HT];
static void p4_fill_in_addresses(struct op_msrs * const msrs)
{
- unsigned int i;
+ unsigned int i;
unsigned int addr, cccraddr, stag;
setup_num_counters();
stag = get_stagger();
/* initialize some registers */
- for (i = 0; i < num_counters; ++i) {
+ for (i = 0; i < num_counters; ++i)
msrs->counters[i].addr = 0;
- }
- for (i = 0; i < num_controls; ++i) {
+ for (i = 0; i < num_controls; ++i)
msrs->controls[i].addr = 0;
- }
-
+
/* the counter & cccr registers we pay attention to */
for (i = 0; i < num_counters; ++i) {
addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
- if (reserve_perfctr_nmi(addr)){
+ if (reserve_perfctr_nmi(addr)) {
msrs->counters[i].addr = addr;
msrs->controls[i].addr = cccraddr;
}
@@ -447,22 +446,22 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs)
if (reserve_evntsel_nmi(addr))
msrs->controls[i].addr = addr;
}
-
+
for (addr = MSR_P4_MS_ESCR0 + stag;
- addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
+ addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
if (reserve_evntsel_nmi(addr))
msrs->controls[i].addr = addr;
}
-
+
for (addr = MSR_P4_IX_ESCR0 + stag;
- addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
+ addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
if (reserve_evntsel_nmi(addr))
msrs->controls[i].addr = addr;
}
/* there are 2 remaining non-contiguously located ESCRs */
- if (num_counters == NUM_COUNTERS_NON_HT) {
+ if (num_counters == NUM_COUNTERS_NON_HT) {
/* standard non-HT CPUs handle both remaining ESCRs*/
if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
@@ -498,20 +497,20 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
unsigned int stag;
stag = get_stagger();
-
+
/* convert from counter *number* to counter *bit* */
counter_bit = 1 << VIRT_CTR(stag, ctr);
-
+
/* find our event binding structure. */
if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
- printk(KERN_ERR
- "oprofile: P4 event code 0x%lx out of range\n",
+ printk(KERN_ERR
+ "oprofile: P4 event code 0x%lx out of range\n",
counter_config[ctr].event);
return;
}
-
+
ev = &(p4_events[counter_config[ctr].event - 1]);
-
+
for (i = 0; i < maxbind; i++) {
if (ev->bindings[i].virt_counter & counter_bit) {
@@ -526,25 +525,24 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
}
ESCR_SET_EVENT_SELECT(escr, ev->event_select);
- ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
+ ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
ESCR_WRITE(escr, high, ev, i);
-
+
/* modify CCCR */
CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
CCCR_CLEAR(cccr);
CCCR_SET_REQUIRED_BITS(cccr);
CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
- if (stag == 0) {
+ if (stag == 0)
CCCR_SET_PMI_OVF_0(cccr);
- } else {
+ else
CCCR_SET_PMI_OVF_1(cccr);
- }
CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
return;
}
}
- printk(KERN_ERR
+ printk(KERN_ERR
"oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
counter_config[ctr].event, stag, ctr);
}
@@ -559,14 +557,14 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs)
stag = get_stagger();
rdmsr(MSR_IA32_MISC_ENABLE, low, high);
- if (! MISC_PMC_ENABLED_P(low)) {
+ if (!MISC_PMC_ENABLED_P(low)) {
printk(KERN_ERR "oprofile: P4 PMC not available\n");
return;
}
/* clear the cccrs we will use */
for (i = 0 ; i < num_counters ; i++) {
- if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
+ if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
continue;
rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
CCCR_CLEAR(low);
@@ -576,14 +574,14 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs)
/* clear all escrs (including those outside our concern) */
for (i = num_counters; i < num_controls; i++) {
- if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
+ if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
continue;
wrmsr(msrs->controls[i].addr, 0, 0);
}
/* setup all counters */
for (i = 0 ; i < num_counters ; ++i) {
- if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) {
+ if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) {
reset_value[i] = counter_config[i].count;
pmc_setup_one_p4_counter(i);
CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
@@ -603,11 +601,11 @@ static int p4_check_ctrs(struct pt_regs * const regs,
stag = get_stagger();
for (i = 0; i < num_counters; ++i) {
-
- if (!reset_value[i])
+
+ if (!reset_value[i])
continue;
- /*
+ /*
* there is some eccentricity in the hardware which
* requires that we perform 2 extra corrections:
*
@@ -616,24 +614,24 @@ static int p4_check_ctrs(struct pt_regs * const regs,
*
* - write the counter back twice to ensure it gets
* updated properly.
- *
+ *
* the former seems to be related to extra NMIs happening
* during the current NMI; the latter is reported as errata
* N15 in intel doc 249199-029, pentium 4 specification
* update, though their suggested work-around does not
* appear to solve the problem.
*/
-
+
real = VIRT_CTR(stag, i);
CCCR_READ(low, high, real);
- CTR_READ(ctr, high, real);
+ CTR_READ(ctr, high, real);
if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
oprofile_add_sample(regs, i);
- CTR_WRITE(reset_value[i], real);
+ CTR_WRITE(reset_value[i], real);
CCCR_CLEAR_OVF(low);
CCCR_WRITE(low, high, real);
- CTR_WRITE(reset_value[i], real);
+ CTR_WRITE(reset_value[i], real);
}
}
@@ -683,15 +681,16 @@ static void p4_shutdown(struct op_msrs const * const msrs)
int i;
for (i = 0 ; i < num_counters ; ++i) {
- if (CTR_IS_RESERVED(msrs,i))
+ if (CTR_IS_RESERVED(msrs, i))
release_perfctr_nmi(msrs->counters[i].addr);
}
- /* some of the control registers are specially reserved in
+ /*
+ * some of the control registers are specially reserved in
* conjunction with the counter registers (hence the starting offset).
* This saves a few bits.
*/
for (i = num_counters ; i < num_controls ; ++i) {
- if (CTRL_IS_RESERVED(msrs,i))
+ if (CTRL_IS_RESERVED(msrs, i))
release_evntsel_nmi(msrs->controls[i].addr);
}
}
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index dbf53236971..22e057665e5 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -1,6 +1,7 @@
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/topology.h>
+#include <linux/cpu.h>
#include "pci.h"
#ifdef CONFIG_X86_64
@@ -555,15 +556,17 @@ static int __init early_fill_mp_bus_info(void)
return 0;
}
-postcore_initcall(early_fill_mp_bus_info);
+#else /* !CONFIG_X86_64 */
-#endif
+static int __init early_fill_mp_bus_info(void) { return 0; }
+
+#endif /* !CONFIG_X86_64 */
/* common 32/64 bit code */
#define ENABLE_CF8_EXT_CFG (1ULL << 46)
-static void enable_pci_io_ecs_per_cpu(void *unused)
+static void enable_pci_io_ecs(void *unused)
{
u64 reg;
rdmsrl(MSR_AMD64_NB_CFG, reg);
@@ -573,14 +576,51 @@ static void enable_pci_io_ecs_per_cpu(void *unused)
}
}
-static int __init enable_pci_io_ecs(void)
+static int __cpuinit amd_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
{
+ int cpu = (long)hcpu;
+ switch (action) {
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ smp_call_function_single(cpu, enable_pci_io_ecs, NULL, 0);
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata amd_cpu_notifier = {
+ .notifier_call = amd_cpu_notify,
+};
+
+static int __init pci_io_ecs_init(void)
+{
+ int cpu;
+
/* assume all cpus from fam10h have IO ECS */
if (boot_cpu_data.x86 < 0x10)
return 0;
- on_each_cpu(enable_pci_io_ecs_per_cpu, NULL, 1);
+
+ register_cpu_notifier(&amd_cpu_notifier);
+ for_each_online_cpu(cpu)
+ amd_cpu_notify(&amd_cpu_notifier, (unsigned long)CPU_ONLINE,
+ (void *)(long)cpu);
pci_probe |= PCI_HAS_IO_ECS;
+
+ return 0;
+}
+
+static int __init amd_postcore_init(void)
+{
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+ return 0;
+
+ early_fill_mp_bus_info();
+ pci_io_ecs_init();
+
return 0;
}
-postcore_initcall(enable_pci_io_ecs);
+postcore_initcall(amd_postcore_init);
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 5807d1bc73f..8791fc55e71 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -128,8 +128,7 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
pr = pci_find_parent_resource(dev, r);
if (!r->start || !pr ||
request_resource(pr, r) < 0) {
- dev_err(&dev->dev, "BAR %d: can't "
- "allocate resource\n", idx);
+ dev_err(&dev->dev, "BAR %d: can't allocate resource\n", idx);
/*
* Something is wrong with the region.
* Invalidate the resource to prevent
@@ -164,15 +163,13 @@ static void __init pcibios_allocate_resources(int pass)
else
disabled = !(command & PCI_COMMAND_MEMORY);
if (pass == disabled) {
- dev_dbg(&dev->dev, "resource %#08llx-%#08llx "
- "(f=%lx, d=%d, p=%d)\n",
+ dev_dbg(&dev->dev, "resource %#08llx-%#08llx (f=%lx, d=%d, p=%d)\n",
(unsigned long long) r->start,
(unsigned long long) r->end,
r->flags, disabled, pass);
pr = pci_find_parent_resource(dev, r);
if (!pr || request_resource(pr, r) < 0) {
- dev_err(&dev->dev, "BAR %d: can't "
- "allocate resource\n", idx);
+ dev_err(&dev->dev, "BAR %d: can't allocate resource\n", idx);
/* We'll assign a new address later */
r->end -= r->start;
r->start = 0;
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index fec0123b33a..006599db0dc 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -590,6 +590,8 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route
case PCI_DEVICE_ID_INTEL_ICH10_1:
case PCI_DEVICE_ID_INTEL_ICH10_2:
case PCI_DEVICE_ID_INTEL_ICH10_3:
+ case PCI_DEVICE_ID_INTEL_PCH_0:
+ case PCI_DEVICE_ID_INTEL_PCH_1:
r->name = "PIIX/ICH";
r->get = pirq_piix_get;
r->set = pirq_piix_set;
@@ -1041,35 +1043,44 @@ static void __init pcibios_fixup_irqs(void)
if (io_apic_assign_pci_irqs) {
int irq;
- if (pin) {
- /*
- * interrupt pins are numbered starting
- * from 1
- */
- pin--;
- irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
- PCI_SLOT(dev->devfn), pin);
- /*
- * Busses behind bridges are typically not listed in the MP-table.
- * In this case we have to look up the IRQ based on the parent bus,
- * parent slot, and pin number. The SMP code detects such bridged
- * busses itself so we should get into this branch reliably.
- */
- if (irq < 0 && dev->bus->parent) { /* go back to the bridge */
- struct pci_dev *bridge = dev->bus->self;
-
- pin = (pin + PCI_SLOT(dev->devfn)) % 4;
- irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
- PCI_SLOT(bridge->devfn), pin);
- if (irq >= 0)
- dev_warn(&dev->dev, "using bridge %s INT %c to get IRQ %d\n",
- pci_name(bridge),
- 'A' + pin, irq);
- }
- if (irq >= 0) {
- dev_info(&dev->dev, "PCI->APIC IRQ transform: INT %c -> IRQ %d\n", 'A' + pin, irq);
- dev->irq = irq;
- }
+ if (!pin)
+ continue;
+
+ /*
+ * interrupt pins are numbered starting from 1
+ */
+ pin--;
+ irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
+ PCI_SLOT(dev->devfn), pin);
+ /*
+ * Busses behind bridges are typically not listed in the
+ * MP-table. In this case we have to look up the IRQ
+ * based on the parent bus, parent slot, and pin number.
+ * The SMP code detects such bridged busses itself so we
+ * should get into this branch reliably.
+ */
+ if (irq < 0 && dev->bus->parent) {
+ /* go back to the bridge */
+ struct pci_dev *bridge = dev->bus->self;
+ int bus;
+
+ pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+ bus = bridge->bus->number;
+ irq = IO_APIC_get_PCI_irq_vector(bus,
+ PCI_SLOT(bridge->devfn), pin);
+ if (irq >= 0)
+ dev_warn(&dev->dev,
+ "using bridge %s INT %c to "
+ "get IRQ %d\n",
+ pci_name(bridge),
+ 'A' + pin, irq);
+ }
+ if (irq >= 0) {
+ dev_info(&dev->dev,
+ "PCI->APIC IRQ transform: INT %c "
+ "-> IRQ %d\n",
+ 'A' + pin, irq);
+ dev->irq = irq;
}
}
#endif
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index ec9ce35e44d..b722dd481b3 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -14,7 +14,7 @@ static void __devinit pcibios_fixup_peer_bridges(void)
int n, devfn;
long node;
- if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff)
+ if (pcibios_last_bus <= 0 || pcibios_last_bus > 0xff)
return;
DBG("PCI: Peer bridge fixup\n");
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 23faaa890ff..d9635764ce3 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -293,7 +293,7 @@ static acpi_status __init find_mboard_resource(acpi_handle handle, u32 lvl,
return AE_OK;
}
-static int __init is_acpi_reserved(unsigned long start, unsigned long end)
+static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used)
{
struct resource mcfg_res;
@@ -310,6 +310,41 @@ static int __init is_acpi_reserved(unsigned long start, unsigned long end)
return mcfg_res.flags;
}
+typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type);
+
+static int __init is_mmconf_reserved(check_reserved_t is_reserved,
+ u64 addr, u64 size, int i,
+ typeof(pci_mmcfg_config[0]) *cfg, int with_e820)
+{
+ u64 old_size = size;
+ int valid = 0;
+
+ while (!is_reserved(addr, addr + size - 1, E820_RESERVED)) {
+ size >>= 1;
+ if (size < (16UL<<20))
+ break;
+ }
+
+ if (size >= (16UL<<20) || size == old_size) {
+ printk(KERN_NOTICE
+ "PCI: MCFG area at %Lx reserved in %s\n",
+ addr, with_e820?"E820":"ACPI motherboard resources");
+ valid = 1;
+
+ if (old_size != size) {
+ /* update end_bus_number */
+ cfg->end_bus_number = cfg->start_bus_number + ((size>>20) - 1);
+ printk(KERN_NOTICE "PCI: updated MCFG configuration %d: base %lx "
+ "segment %hu buses %u - %u\n",
+ i, (unsigned long)cfg->address, cfg->pci_segment,
+ (unsigned int)cfg->start_bus_number,
+ (unsigned int)cfg->end_bus_number);
+ }
+ }
+
+ return valid;
+}
+
static void __init pci_mmcfg_reject_broken(int early)
{
typeof(pci_mmcfg_config[0]) *cfg;
@@ -324,21 +359,22 @@ static void __init pci_mmcfg_reject_broken(int early)
for (i = 0; i < pci_mmcfg_config_num; i++) {
int valid = 0;
- u32 size = (cfg->end_bus_number + 1) << 20;
+ u64 addr, size;
+
cfg = &pci_mmcfg_config[i];
+ addr = cfg->start_bus_number;
+ addr <<= 20;
+ addr += cfg->address;
+ size = cfg->end_bus_number + 1 - cfg->start_bus_number;
+ size <<= 20;
printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx "
"segment %hu buses %u - %u\n",
i, (unsigned long)cfg->address, cfg->pci_segment,
(unsigned int)cfg->start_bus_number,
(unsigned int)cfg->end_bus_number);
- if (!early &&
- is_acpi_reserved(cfg->address, cfg->address + size - 1)) {
- printk(KERN_NOTICE "PCI: MCFG area at %Lx reserved "
- "in ACPI motherboard resources\n",
- cfg->address);
- valid = 1;
- }
+ if (!early)
+ valid = is_mmconf_reserved(is_acpi_reserved, addr, size, i, cfg, 0);
if (valid)
continue;
@@ -347,16 +383,11 @@ static void __init pci_mmcfg_reject_broken(int early)
printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not"
" reserved in ACPI motherboard resources\n",
cfg->address);
+
/* Don't try to do this check unless configuration
type 1 is available. how about type 2 ?*/
- if (raw_pci_ops && e820_all_mapped(cfg->address,
- cfg->address + size - 1,
- E820_RESERVED)) {
- printk(KERN_NOTICE
- "PCI: MCFG area at %Lx reserved in E820\n",
- cfg->address);
- valid = 1;
- }
+ if (raw_pci_ops)
+ valid = is_mmconf_reserved(e820_all_mapped, addr, size, i, cfg, 1);
if (!valid)
goto reject;
@@ -365,7 +396,7 @@ static void __init pci_mmcfg_reject_broken(int early)
return;
reject:
- printk(KERN_ERR "PCI: Not using MMCONFIG.\n");
+ printk(KERN_INFO "PCI: Not using MMCONFIG.\n");
pci_mmcfg_arch_free();
kfree(pci_mmcfg_config);
pci_mmcfg_config = NULL;
diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c
index 7dc5d5cf50a..d3e083dea72 100644
--- a/arch/x86/power/cpu_32.c
+++ b/arch/x86/power/cpu_32.c
@@ -45,7 +45,7 @@ static void __save_processor_state(struct saved_context *ctxt)
ctxt->cr0 = read_cr0();
ctxt->cr2 = read_cr2();
ctxt->cr3 = read_cr3();
- ctxt->cr4 = read_cr4();
+ ctxt->cr4 = read_cr4_safe();
}
/* Needed by apm.c */
@@ -98,7 +98,9 @@ static void __restore_processor_state(struct saved_context *ctxt)
/*
* control registers
*/
- write_cr4(ctxt->cr4);
+ /* cr4 was introduced in the Pentium CPU */
+ if (ctxt->cr4)
+ write_cr4(ctxt->cr4);
write_cr3(ctxt->cr3);
write_cr2(ctxt->cr2);
write_cr0(ctxt->cr0);
diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S
index b95aa6cfe3c..d1e9b53f9d3 100644
--- a/arch/x86/power/hibernate_asm_32.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -1,5 +1,3 @@
-.text
-
/*
* This may not use any stack, nor any variable that is not "NoSave":
*
@@ -12,25 +10,26 @@
#include <asm/segment.h>
#include <asm/page.h>
#include <asm/asm-offsets.h>
+#include <asm/processor-flags.h>
- .text
+.text
ENTRY(swsusp_arch_suspend)
-
movl %esp, saved_context_esp
movl %ebx, saved_context_ebx
movl %ebp, saved_context_ebp
movl %esi, saved_context_esi
movl %edi, saved_context_edi
- pushfl ; popl saved_context_eflags
+ pushfl
+ popl saved_context_eflags
call swsusp_save
ret
ENTRY(restore_image)
- movl resume_pg_dir, %ecx
- subl $__PAGE_OFFSET, %ecx
- movl %ecx, %cr3
+ movl resume_pg_dir, %eax
+ subl $__PAGE_OFFSET, %eax
+ movl %eax, %cr3
movl restore_pblist, %edx
.p2align 4,,7
@@ -52,17 +51,21 @@ copy_loop:
done:
/* go back to the original page tables */
- movl $swapper_pg_dir, %ecx
- subl $__PAGE_OFFSET, %ecx
- movl %ecx, %cr3
+ movl $swapper_pg_dir, %eax
+ subl $__PAGE_OFFSET, %eax
+ movl %eax, %cr3
/* Flush TLB, including "global" things (vmalloc) */
- movl mmu_cr4_features, %eax
- movl %eax, %edx
- andl $~(1<<7), %edx; # PGE
+ movl mmu_cr4_features, %ecx
+ jecxz 1f # cr4 Pentium and higher, skip if zero
+ movl %ecx, %edx
+ andl $~(X86_CR4_PGE), %edx
movl %edx, %cr4; # turn off PGE
- movl %cr3, %ecx; # flush TLB
- movl %ecx, %cr3
- movl %eax, %cr4; # turn PGE back on
+1:
+ movl %cr3, %eax; # flush TLB
+ movl %eax, %cr3
+ jecxz 1f # cr4 Pentium and higher, skip if zero
+ movl %ecx, %cr4; # turn PGE back on
+1:
movl saved_context_esp, %esp
movl saved_context_ebp, %ebp
@@ -70,7 +73,8 @@ done:
movl saved_context_esi, %esi
movl saved_context_edi, %edi
- pushl saved_context_eflags ; popfl
+ pushl saved_context_eflags
+ popfl
xorl %eax, %eax
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 8d28925ebed..a27d562a974 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -844,7 +844,7 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
/* Early in boot, while setting up the initial pagetable, assume
everything is pinned. */
-static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn)
+static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
{
#ifdef CONFIG_FLATMEM
BUG_ON(mem_map); /* should only be used early */
@@ -854,7 +854,7 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn)
/* Early release_pte assumes that all pts are pinned, since there's
only init_mm and anything attached to that is pinned. */
-static void xen_release_pte_init(u32 pfn)
+static void xen_release_pte_init(unsigned long pfn)
{
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
}
@@ -870,7 +870,7 @@ static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
/* This needs to make sure the new pte page is pinned iff its being
attached to a pinned pagetable. */
-static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level)
+static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level)
{
struct page *page = pfn_to_page(pfn);
@@ -888,12 +888,12 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level)
}
}
-static void xen_alloc_pte(struct mm_struct *mm, u32 pfn)
+static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn)
{
xen_alloc_ptpage(mm, pfn, PT_PTE);
}
-static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn)
+static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
{
xen_alloc_ptpage(mm, pfn, PT_PMD);
}
@@ -941,7 +941,7 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
}
/* This should never happen until we're OK to use struct page */
-static void xen_release_ptpage(u32 pfn, unsigned level)
+static void xen_release_ptpage(unsigned long pfn, unsigned level)
{
struct page *page = pfn_to_page(pfn);
@@ -955,23 +955,23 @@ static void xen_release_ptpage(u32 pfn, unsigned level)
}
}
-static void xen_release_pte(u32 pfn)
+static void xen_release_pte(unsigned long pfn)
{
xen_release_ptpage(pfn, PT_PTE);
}
-static void xen_release_pmd(u32 pfn)
+static void xen_release_pmd(unsigned long pfn)
{
xen_release_ptpage(pfn, PT_PMD);
}
#if PAGETABLE_LEVELS == 4
-static void xen_alloc_pud(struct mm_struct *mm, u32 pfn)
+static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
{
xen_alloc_ptpage(mm, pfn, PT_PUD);
}
-static void xen_release_pud(u32 pfn)
+static void xen_release_pud(unsigned long pfn)
{
xen_release_ptpage(pfn, PT_PUD);
}
@@ -1354,7 +1354,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
.ptep_modify_prot_commit = __ptep_modify_prot_commit,
.pte_val = xen_pte_val,
- .pte_flags = native_pte_val,
+ .pte_flags = native_pte_flags,
.pgd_val = xen_pgd_val,
.make_pte = xen_make_pte,
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index b6acc3a0af4..d6790108388 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -42,7 +42,7 @@ char * __init xen_memory_setup(void)
e820.nr_map = 0;
- e820_add_region(0, PFN_PHYS(max_pfn), E820_RAM);
+ e820_add_region(0, PFN_PHYS((u64)max_pfn), E820_RAM);
/*
* Even though this is normal, usable memory under Xen, reserve